diff options
-rw-r--r-- | unscoop.c | 191 |
1 files changed, 93 insertions, 98 deletions
diff --git a/unscoop.c b/unscoop.c index fa3a5a2..782ce7f 100644 --- a/unscoop.c +++ b/unscoop.c @@ -31,122 +31,118 @@ #define ARRAY_LEN(a) (sizeof(a) / sizeof((a)[0])) +enum { ParamCap = 8 }; + struct Matcher { const char *pattern; - regex_t regex; enum Type type; - size_t time; - size_t nick; - size_t user; - size_t host; - size_t target; - size_t message; + const char *params[ParamCap]; }; #define WS "[[:blank:]]" #define P1_TIME "[[]([^]]+)[]]" #define P0_MODE "[!~&@%+ ]?" -static struct Matcher Generic[] = { +static const struct Matcher Generic[] = { { "^" P1_TIME WS "<" P0_MODE "([^>]+)" ">" WS "(.+)", - .type = Privmsg, .time = 1, .nick = 2, .message = 3, + Privmsg, { "$time", "$nick", "$message" }, }, { "^" P1_TIME WS "-" P0_MODE "([^-]+)" "-" WS "(.+)", - .type = Notice, .time = 1, .nick = 2, .message = 3, + Notice, { "$time", "$nick", "$message" }, }, { "^" P1_TIME WS "[*]" WS P0_MODE "([^[:blank:]]+)" WS "(.+)", - .type = Action, .time = 1, .nick = 2, .message = 3, + Action, { "$time", "$nick", "$message" }, }, }; #define P2_USERHOST "[(]([^@]+)@([^)]+)[)]" #define P2_MESSAGE "( [(]([^)]+)[)])?" -static struct Matcher Textual[] = { +static const struct Matcher Textual[] = { { "^" P1_TIME " <" P0_MODE "([^>]+)> (.+)", - .type = Privmsg, .time = 1, .nick = 2, .message = 3, + Privmsg, { "$time", "$nick", "$message" }, }, { "^" P1_TIME " -" P0_MODE "([^-]+)- (.+)", - .type = Notice, .time = 1, .nick = 2, .message = 3, + Notice, { "$time", "$nick", "$message" }, }, { "^" P1_TIME " • ([^:]+): (.+)", - .type = Action, .time = 1, .nick = 2, .message = 3, + Action, { "$time", "$nick", "$message" }, }, { "^" P1_TIME " ([^ ]+) " P2_USERHOST " joined the channel", - .type = Join, .time = 1, .nick = 2, .user = 3, .host = 4, + Join, { "$time", "$nick", "$user", "$host" }, }, { "^" P1_TIME " ([^ ]+) " P2_USERHOST " left the channel" P2_MESSAGE, - .type = Part, .time = 1, .nick = 2, .user = 3, .host = 4, .message = 6, + Part, { "$time", "$nick", "$user", "$host", NULL, "$message" }, }, { "^" P1_TIME " ([^ ]+) kicked ([^ ]+) from the channel" P2_MESSAGE, - .type = Kick, .time = 1, .nick = 2, .target = 3, .message = 5, + Kick, { "$time", "$nick", "$target", NULL, "$message" }, }, { "^" P1_TIME " ([^ ]+) " P2_USERHOST " left IRC" P2_MESSAGE, - .type = Quit, .time = 1, .nick = 2, .user = 3, .host = 4, .message = 6, + Quit, { "$time", "$nick", "$user", "$host", NULL, "$message" }, }, { "^" P1_TIME " ([^ ]+) is now known as ([^ ]+)", - .type = Nick, .time = 1, .nick = 2, .target = 3, + Nick, { "$time", "$nick", "$target" }, }, { "^" P1_TIME " ([^ ]+) changed the topic to (.+)", - .type = Topic, .time = 1, .nick = 2, .message = 3, + Topic, { "$time", "$nick", "$message" }, }, }; #undef P2_MESSAGE #define P2_MESSAGE "(, \"([^\"]+)\")?" -static struct Matcher Catgirl[] = { +static const struct Matcher Catgirl[] = { { "^" P1_TIME " <([^>]+)> (.+)", - .type = Privmsg, .time = 1, .nick = 2, .message = 3, + Privmsg, { "$time", "$nick", "$message" }, }, { "^" P1_TIME " -([^-]+)- (.+)", - .type = Notice, .time = 1, .nick = 2, .message = 3, + Notice, { "$time", "$nick", "$message" }, }, { "^" P1_TIME " [*] ([^ ]+) (.+)", - .type = Action, .time = 1, .nick = 2, .message = 3, + Action, { "$time", "$nick", "$message" }, }, { "^" P1_TIME " ([^ ]+) arrives", - .type = Join, .time = 1, .nick = 2, + Join, { "$time", "$nick" }, }, { "^" P1_TIME " ([^ ]+) leaves [^,]+" P2_MESSAGE, - .type = Part, .time = 1, .nick = 2, .message = 4, + Part, { "$time", "$nick", NULL, "$message" }, }, { "^" P1_TIME " ([^ ]+) kicks ([^ ]+) out of [^,]+" P2_MESSAGE, - .type = Kick, .time = 1, .nick = 2, .target = 3, .message = 5, + Kick, { "$time", "$nick", "$target", NULL, "$message" }, }, { "^" P1_TIME " ([^ ]+) leaves" P2_MESSAGE, - .type = Quit, .time = 1, .nick = 2, .message = 4, + Quit, { "$time", "$nick", NULL, "$message" }, }, { "^" P1_TIME " ([^ ]+) is now known as ([^ ]+)", - .type = Nick, .time = 1, .nick = 2, .target = 3, + Nick, { "$time", "$nick", "$target" }, }, { "^" P1_TIME " ([^ ]+) places a new sign in [^,]+" P2_MESSAGE, - .type = Topic, .time = 1, .nick = 2, .message = 3, + Topic, { "$time", "$nick", "$message" }, }, }; static const struct Format { const char *name; - struct Matcher *matchers; + const struct Matcher *matchers; size_t len; } Formats[] = { { "generic", Generic, ARRAY_LEN(Generic) }, @@ -170,6 +166,64 @@ bindMatch(sqlite3_stmt *stmt, int param, const char *str, regmatch_t match) { } } +static sqlite3_stmt *insertName; +static sqlite3_stmt *insertEvent; + +static void prepareInsert(sqlite3 *db) { + insertName = dbPrepare( + db, SQLITE_PREPARE_PERSISTENT, + "INSERT OR IGNORE INTO names (nick, user, host)" + "VALUES ($nick, coalesce($user, '*'), coalesce($host, '*'));" + ); + + // SQLite expects a colon in the timezone, but ISO8601 does not. + insertEvent = dbPrepare( + db, SQLITE_PREPARE_PERSISTENT, + "INSERT INTO events (context, type, time, name, target, message)" + "SELECT" + " $context, $type," + " datetime(substr($time, 1, 22) || ':' || substr($time, -2))," + " name, $target, $message" + " FROM names" + " WHERE nick = $nick" + " AND user = coalesce($user, '*')" + " AND host = coalesce($host, '*');" + ); +} + +static void matchLine( + int64_t context, const struct Format *format, + const regex_t *regex, const char *line +) { + for (size_t i = 0; i < format->len; ++i) { + const struct Matcher *matcher = &format->matchers[i]; + regmatch_t match[ParamCap]; + int error = regexec(®ex[i], line, ParamCap, match, 0); + if (error) continue; + + sqlite3_clear_bindings(insertName); + sqlite3_clear_bindings(insertEvent); + + dbBindInt(insertEvent, 1, context); + dbBindInt(insertEvent, 2, matcher->type); + + for (size_t i = 0; i < ARRAY_LEN(matcher->params); ++i) { + const char *param = matcher->params[i]; + if (!param) continue; + int p = sqlite3_bind_parameter_index(insertName, param); + if (p) bindMatch(insertName, p, line, match[1 + i]); + p = sqlite3_bind_parameter_index(insertEvent, param); + if (!p) errx(EX_SOFTWARE, "no such parameter %s", param); + bindMatch(insertEvent, p, line, match[1 + i]); + } + + dbStep(insertName); + dbStep(insertEvent); + sqlite3_reset(insertName); + sqlite3_reset(insertEvent); + } +} + int main(int argc, char *argv[]) { char *path = NULL; bool dedup = false; @@ -222,14 +276,15 @@ int main(int argc, char *argv[]) { return EX_OK; } + regex_t regex[format->len]; for (size_t i = 0; i < format->len; ++i) { - struct Matcher *matcher = &format->matchers[i]; + const struct Matcher *matcher = &format->matchers[i]; int error = regcomp( - &matcher->regex, matcher->pattern, REG_EXTENDED | REG_NEWLINE + ®ex[i], matcher->pattern, REG_EXTENDED | REG_NEWLINE ); if (!error) continue; char buf[256]; - regerror(error, &matcher->regex, buf, sizeof(buf)); + regerror(error, ®ex[i], buf, sizeof(buf)); errx(EX_SOFTWARE, "regcomp: %s: %s", buf, matcher->pattern); } @@ -244,7 +299,6 @@ int main(int argc, char *argv[]) { dbStep(insertContext); sqlite3_finalize(insertContext); - int64_t contextID; sqlite3_stmt *selectContext = dbPrepare( db, 0, "SELECT context FROM contexts" @@ -253,25 +307,10 @@ int main(int argc, char *argv[]) { dbBindText(selectContext, 1, network, -1); dbBindText(selectContext, 2, context, -1); assert(SQLITE_ROW == dbStep(selectContext)); - contextID = sqlite3_column_int64(selectContext, 0); + int64_t id = sqlite3_column_int64(selectContext, 0); sqlite3_finalize(selectContext); - sqlite3_stmt *insertName = dbPrepare( - db, SQLITE_PREPARE_PERSISTENT, - "INSERT OR IGNORE INTO names (nick, user, host)" - "VALUES ($nick, $user, $host);" - ); - // SQLite expects a colon in the timezone, but ISO8601 does not. - sqlite3_stmt *insertEvent = dbPrepare( - db, SQLITE_PREPARE_PERSISTENT, - "INSERT INTO events (context, type, time, name, target, message)" - "SELECT" - " $context, $type," - " datetime(substr($time, 1, 22) || ':' || substr($time, -2))," - " name, $target, $message" - " FROM names WHERE nick = $nick AND user = $user AND host = $host;" - ); - dbBindInt(insertEvent, 1, contextID); + prepareInsert(db); size_t sizeTotal = 0; for (int i = optind; i < argc; ++i) { @@ -280,7 +319,6 @@ int main(int argc, char *argv[]) { if (error) err(EX_NOINPUT, "%s", argv[i]); sizeTotal += st.st_size; } - size_t sizeRead = 0; size_t sizePercent = 101; @@ -289,53 +327,10 @@ int main(int argc, char *argv[]) { for (int i = optind; i < argc; ++i) { FILE *file = fopen(argv[i], "r"); if (!file) err(EX_NOINPUT, "%s", argv[i]); - dbBegin(db); ssize_t len; while (0 < (len = getline(&line, &cap, file))) { - for (size_t i = 0; i < format->len; ++i) { - const struct Matcher *matcher = &format->matchers[i]; - regmatch_t match[8]; - int error = regexec( - &matcher->regex, line, ARRAY_LEN(match), match, 0 - ); - if (error) continue; - - dbBindInt(insertEvent, 2, matcher->type); - bindMatch(insertEvent, 3, line, match[matcher->time]); - if (matcher->target) { - bindMatch(insertEvent, 4, line, match[matcher->target]); - } else { - dbBindText(insertEvent, 4, NULL, -1); - } - if (matcher->message) { - bindMatch(insertEvent, 5, line, match[matcher->message]); - } else { - dbBindText(insertEvent, 5, NULL, -1); - } - bindMatch(insertEvent, 6, line, match[matcher->nick]); - bindMatch(insertName, 1, line, match[matcher->nick]); - if (matcher->user) { - bindMatch(insertEvent, 7, line, match[matcher->user]); - bindMatch(insertName, 2, line, match[matcher->user]); - } else { - dbBindText(insertEvent, 7, "*", -1); - dbBindText(insertName, 2, "*", -1); - } - if (matcher->host) { - bindMatch(insertEvent, 8, line, match[matcher->host]); - bindMatch(insertName, 3, line, match[matcher->host]); - } else { - dbBindText(insertEvent, 8, "*", -1); - dbBindText(insertName, 3, "*", -1); - } - - dbStep(insertName); - dbStep(insertEvent); - sqlite3_reset(insertName); - sqlite3_reset(insertEvent); - } - + matchLine(id, format, regex, line); sizeRead += len; if (100 * sizeRead / sizeTotal != sizePercent) { sizePercent = 100 * sizeRead / sizeTotal; |