about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2019-12-06 17:49:10 -0500
committerJune McEnroe <june@causal.agency>2019-12-06 17:49:10 -0500
commit8aa2dcecc48a10d85b52f10046cf2b414e4bfa31 (patch)
tree928afa796efd4a87e4dcb5922f022b276209e3aa
parentFold networks into contexts (diff)
downloadlitterbox-8aa2dcecc48a10d85b52f10046cf2b414e4bfa31.tar.gz
litterbox-8aa2dcecc48a10d85b52f10046cf2b414e4bfa31.zip
Use named parameters in unscoop matchers
-rw-r--r--unscoop.c191
1 files changed, 93 insertions, 98 deletions
diff --git a/unscoop.c b/unscoop.c
index fa3a5a2..782ce7f 100644
--- a/unscoop.c
+++ b/unscoop.c
@@ -31,122 +31,118 @@
 
 #define ARRAY_LEN(a) (sizeof(a) / sizeof((a)[0]))
 
+enum { ParamCap = 8 };
+
 struct Matcher {
 	const char *pattern;
-	regex_t regex;
 	enum Type type;
-	size_t time;
-	size_t nick;
-	size_t user;
-	size_t host;
-	size_t target;
-	size_t message;
+	const char *params[ParamCap];
 };
 
 #define WS "[[:blank:]]"
 #define P1_TIME "[[]([^]]+)[]]"
 #define P0_MODE "[!~&@%+ ]?"
 
-static struct Matcher Generic[] = {
+static const struct Matcher Generic[] = {
 	{
 		"^" P1_TIME WS "<" P0_MODE "([^>]+)" ">" WS "(.+)",
-		.type = Privmsg, .time = 1, .nick = 2, .message = 3,
+		Privmsg, { "$time", "$nick", "$message" },
 	},
 	{
 		"^" P1_TIME WS "-" P0_MODE "([^-]+)" "-" WS "(.+)",
-		.type = Notice, .time = 1, .nick = 2, .message = 3,
+		Notice, { "$time", "$nick", "$message" },
 	},
 	{
 		"^" P1_TIME WS "[*]" WS P0_MODE "([^[:blank:]]+)" WS "(.+)",
-		.type = Action, .time = 1, .nick = 2, .message = 3,
+		Action, { "$time", "$nick", "$message" },
 	},
 };
 
 #define P2_USERHOST "[(]([^@]+)@([^)]+)[)]"
 #define P2_MESSAGE "( [(]([^)]+)[)])?"
-static struct Matcher Textual[] = {
+static const struct Matcher Textual[] = {
 	{
 		"^" P1_TIME " <" P0_MODE "([^>]+)> (.+)",
-		.type = Privmsg, .time = 1, .nick = 2, .message = 3,
+		Privmsg, { "$time", "$nick", "$message" },
 	},
 	{
 		"^" P1_TIME " -" P0_MODE "([^-]+)- (.+)",
-		.type = Notice, .time = 1, .nick = 2, .message = 3,
+		Notice, { "$time", "$nick", "$message" },
 	},
 	{
 		"^" P1_TIME " • ([^:]+): (.+)",
-		.type = Action, .time = 1, .nick = 2, .message = 3,
+		Action, { "$time", "$nick", "$message" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) " P2_USERHOST " joined the channel",
-		.type = Join, .time = 1, .nick = 2, .user = 3, .host = 4,
+		Join, { "$time", "$nick", "$user", "$host" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) " P2_USERHOST " left the channel" P2_MESSAGE,
-		.type = Part, .time = 1, .nick = 2, .user = 3, .host = 4, .message = 6,
+		Part, { "$time", "$nick", "$user", "$host", NULL, "$message" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) kicked ([^ ]+) from the channel" P2_MESSAGE,
-		.type = Kick, .time = 1, .nick = 2, .target = 3, .message = 5,
+		Kick, { "$time", "$nick", "$target", NULL, "$message" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) " P2_USERHOST " left IRC" P2_MESSAGE,
-		.type = Quit, .time = 1, .nick = 2, .user = 3, .host = 4, .message = 6,
+		Quit, { "$time", "$nick", "$user", "$host", NULL, "$message" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) is now known as ([^ ]+)",
-		.type = Nick, .time = 1, .nick = 2, .target = 3,
+		Nick, { "$time", "$nick", "$target" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) changed the topic to (.+)",
-		.type = Topic, .time = 1, .nick = 2, .message = 3,
+		Topic, { "$time", "$nick", "$message" },
 	},
 };
 
 #undef P2_MESSAGE
 #define P2_MESSAGE "(, \"([^\"]+)\")?"
-static struct Matcher Catgirl[] = {
+static const struct Matcher Catgirl[] = {
 	{
 		"^" P1_TIME " <([^>]+)> (.+)",
-		.type = Privmsg, .time = 1, .nick = 2, .message = 3,
+		Privmsg, { "$time", "$nick", "$message" },
 	},
 	{
 		"^" P1_TIME " -([^-]+)- (.+)",
-		.type = Notice, .time = 1, .nick = 2, .message = 3,
+		Notice, { "$time", "$nick", "$message" },
 	},
 	{
 		"^" P1_TIME " [*] ([^ ]+) (.+)",
-		.type = Action, .time = 1, .nick = 2, .message = 3,
+		Action, { "$time", "$nick", "$message" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) arrives",
-		.type = Join, .time = 1, .nick = 2,
+		Join, { "$time", "$nick" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) leaves [^,]+" P2_MESSAGE,
-		.type = Part, .time = 1, .nick = 2, .message = 4,
+		Part, { "$time", "$nick", NULL, "$message" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) kicks ([^ ]+) out of [^,]+" P2_MESSAGE,
-		.type = Kick, .time = 1, .nick = 2, .target = 3, .message = 5,
+		Kick, { "$time", "$nick", "$target", NULL, "$message" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) leaves" P2_MESSAGE,
-		.type = Quit, .time = 1, .nick = 2, .message = 4,
+		Quit, { "$time", "$nick", NULL, "$message" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) is now known as ([^ ]+)",
-		.type = Nick, .time = 1, .nick = 2, .target = 3,
+		Nick, { "$time", "$nick", "$target" },
 	},
 	{
 		"^" P1_TIME " ([^ ]+) places a new sign in [^,]+" P2_MESSAGE,
-		.type = Topic, .time = 1, .nick = 2, .message = 3,
+		Topic, { "$time", "$nick", "$message" },
 	},
 };
 
 static const struct Format {
 	const char *name;
-	struct Matcher *matchers;
+	const struct Matcher *matchers;
 	size_t len;
 } Formats[] = {
 	{ "generic", Generic, ARRAY_LEN(Generic) },
@@ -170,6 +166,64 @@ bindMatch(sqlite3_stmt *stmt, int param, const char *str, regmatch_t match) {
 	}
 }
 
+static sqlite3_stmt *insertName;
+static sqlite3_stmt *insertEvent;
+
+static void prepareInsert(sqlite3 *db) {
+	insertName = dbPrepare(
+		db, SQLITE_PREPARE_PERSISTENT,
+		"INSERT OR IGNORE INTO names (nick, user, host)"
+		"VALUES ($nick, coalesce($user, '*'), coalesce($host, '*'));"
+	);
+
+	// SQLite expects a colon in the timezone, but ISO8601 does not.
+	insertEvent = dbPrepare(
+		db, SQLITE_PREPARE_PERSISTENT,
+		"INSERT INTO events (context, type, time, name, target, message)"
+		"SELECT"
+		" $context, $type,"
+		" datetime(substr($time, 1, 22) || ':' || substr($time, -2)),"
+		" name, $target, $message"
+		" FROM names"
+		" WHERE nick = $nick"
+		" AND user = coalesce($user, '*')"
+		" AND host = coalesce($host, '*');"
+	);
+}
+
+static void matchLine(
+	int64_t context, const struct Format *format,
+	const regex_t *regex, const char *line
+) {
+	for (size_t i = 0; i < format->len; ++i) {
+		const struct Matcher *matcher = &format->matchers[i];
+		regmatch_t match[ParamCap];
+		int error = regexec(&regex[i], line, ParamCap, match, 0);
+		if (error) continue;
+
+		sqlite3_clear_bindings(insertName);
+		sqlite3_clear_bindings(insertEvent);
+
+		dbBindInt(insertEvent, 1, context);
+		dbBindInt(insertEvent, 2, matcher->type);
+
+		for (size_t i = 0; i < ARRAY_LEN(matcher->params); ++i) {
+			const char *param = matcher->params[i];
+			if (!param) continue;
+			int p = sqlite3_bind_parameter_index(insertName, param);
+			if (p) bindMatch(insertName, p, line, match[1 + i]);
+			p = sqlite3_bind_parameter_index(insertEvent, param);
+			if (!p) errx(EX_SOFTWARE, "no such parameter %s", param);
+			bindMatch(insertEvent, p, line, match[1 + i]);
+		}
+
+		dbStep(insertName);
+		dbStep(insertEvent);
+		sqlite3_reset(insertName);
+		sqlite3_reset(insertEvent);
+	}
+}
+
 int main(int argc, char *argv[]) {
 	char *path = NULL;
 	bool dedup = false;
@@ -222,14 +276,15 @@ int main(int argc, char *argv[]) {
 		return EX_OK;
 	}
 
+	regex_t regex[format->len];
 	for (size_t i = 0; i < format->len; ++i) {
-		struct Matcher *matcher = &format->matchers[i];
+		const struct Matcher *matcher = &format->matchers[i];
 		int error = regcomp(
-			&matcher->regex, matcher->pattern, REG_EXTENDED | REG_NEWLINE
+			&regex[i], matcher->pattern, REG_EXTENDED | REG_NEWLINE
 		);
 		if (!error) continue;
 		char buf[256];
-		regerror(error, &matcher->regex, buf, sizeof(buf));
+		regerror(error, &regex[i], buf, sizeof(buf));
 		errx(EX_SOFTWARE, "regcomp: %s: %s", buf, matcher->pattern);
 	}
 
@@ -244,7 +299,6 @@ int main(int argc, char *argv[]) {
 	dbStep(insertContext);
 	sqlite3_finalize(insertContext);
 
-	int64_t contextID;
 	sqlite3_stmt *selectContext = dbPrepare(
 		db, 0,
 		"SELECT context FROM contexts"
@@ -253,25 +307,10 @@ int main(int argc, char *argv[]) {
 	dbBindText(selectContext, 1, network, -1);
 	dbBindText(selectContext, 2, context, -1);
 	assert(SQLITE_ROW == dbStep(selectContext));
-	contextID = sqlite3_column_int64(selectContext, 0);
+	int64_t id = sqlite3_column_int64(selectContext, 0);
 	sqlite3_finalize(selectContext);
 
-	sqlite3_stmt *insertName = dbPrepare(
-		db, SQLITE_PREPARE_PERSISTENT,
-		"INSERT OR IGNORE INTO names (nick, user, host)"
-		"VALUES ($nick, $user, $host);"
-	);
-	// SQLite expects a colon in the timezone, but ISO8601 does not.
-	sqlite3_stmt *insertEvent = dbPrepare(
-		db, SQLITE_PREPARE_PERSISTENT,
-		"INSERT INTO events (context, type, time, name, target, message)"
-		"SELECT"
-		" $context, $type,"
-		" datetime(substr($time, 1, 22) || ':' || substr($time, -2)),"
-		" name, $target, $message"
-		" FROM names WHERE nick = $nick AND user = $user AND host = $host;"
-	);
-	dbBindInt(insertEvent, 1, contextID);
+	prepareInsert(db);
 
 	size_t sizeTotal = 0;
 	for (int i = optind; i < argc; ++i) {
@@ -280,7 +319,6 @@ int main(int argc, char *argv[]) {
 		if (error) err(EX_NOINPUT, "%s", argv[i]);
 		sizeTotal += st.st_size;
 	}
-
 	size_t sizeRead = 0;
 	size_t sizePercent = 101;
 
@@ -289,53 +327,10 @@ int main(int argc, char *argv[]) {
 	for (int i = optind; i < argc; ++i) {
 		FILE *file = fopen(argv[i], "r");
 		if (!file) err(EX_NOINPUT, "%s", argv[i]);
-
 		dbBegin(db);
 		ssize_t len;
 		while (0 < (len = getline(&line, &cap, file))) {
-			for (size_t i = 0; i < format->len; ++i) {
-				const struct Matcher *matcher = &format->matchers[i];
-				regmatch_t match[8];
-				int error = regexec(
-					&matcher->regex, line, ARRAY_LEN(match), match, 0
-				);
-				if (error) continue;
-
-				dbBindInt(insertEvent, 2, matcher->type);
-				bindMatch(insertEvent, 3, line, match[matcher->time]);
-				if (matcher->target) {
-					bindMatch(insertEvent, 4, line, match[matcher->target]);
-				} else {
-					dbBindText(insertEvent, 4, NULL, -1);
-				}
-				if (matcher->message) {
-					bindMatch(insertEvent, 5, line, match[matcher->message]);
-				} else {
-					dbBindText(insertEvent, 5, NULL, -1);
-				}
-				bindMatch(insertEvent, 6, line, match[matcher->nick]);
-				bindMatch(insertName, 1, line, match[matcher->nick]);
-				if (matcher->user) {
-					bindMatch(insertEvent, 7, line, match[matcher->user]);
-					bindMatch(insertName, 2, line, match[matcher->user]);
-				} else {
-					dbBindText(insertEvent, 7, "*", -1);
-					dbBindText(insertName, 2, "*", -1);
-				}
-				if (matcher->host) {
-					bindMatch(insertEvent, 8, line, match[matcher->host]);
-					bindMatch(insertName, 3, line, match[matcher->host]);
-				} else {
-					dbBindText(insertEvent, 8, "*", -1);
-					dbBindText(insertName, 3, "*", -1);
-				}
-
-				dbStep(insertName);
-				dbStep(insertEvent);
-				sqlite3_reset(insertName);
-				sqlite3_reset(insertEvent);
-			}
-
+			matchLine(id, format, regex, line);
 			sizeRead += len;
 			if (100 * sizeRead / sizeTotal != sizePercent) {
 				sizePercent = 100 * sizeRead / sizeTotal;