diff options
Diffstat (limited to 'unscoop.c')
-rw-r--r-- | unscoop.c | 161 |
1 files changed, 136 insertions, 25 deletions
diff --git a/unscoop.c b/unscoop.c index 153841c..a290831 100644 --- a/unscoop.c +++ b/unscoop.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2019 C. McEnroe <june@causal.agency> +/* Copyright (C) 2019 June McEnroe <june@causal.agency> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -49,6 +49,8 @@ struct Matcher { #define P0_MODE "[!~&@%+ ]?" #define P1_TIME "^[[]([^]]+)[]][ \t]" +#define P2_USERHOST "[(]([^@]+)@([^)]+)[)]" +#define P2_MESSAGE "( [(]([^)]+)[)])?" static const struct Matcher Catgirl[] = { { @@ -147,8 +149,6 @@ static const struct Matcher IRC[] = { #undef P2_TAGS #undef P3_ORIGIN -#define P2_USERHOST "[(]([^@]+)@([^)]+)[)]" -#define P2_MESSAGE "( [(]([^)]+)[)])?" static const struct Matcher Textual[] = { { P1_TIME "<" P0_MODE "([^>]+)> (.+)", @@ -185,8 +185,86 @@ static const struct Matcher Textual[] = { Unban, { ":time", ":nick", ":target" }, } }; -#undef P2_USERHOST -#undef P2_MESSAGE + +static const struct Matcher WeeChat[] = { + { + "([^\t]+)\t-->\t([^ ]+) " P2_USERHOST " has joined", + Join, { ":time", ":nick", ":user", ":host" }, + }, { + "([^\t]+)\t<--\t([^ ]+) " P2_USERHOST " has left [^ ]+" P2_MESSAGE, + Part, { ":time", ":nick", ":user", ":host", NULL, ":message" }, + }, { + "([^\t]+)\t<--\t([^ ]+) has kicked ([^ ]+)" P2_MESSAGE, + Kick, { ":time", ":nick", ":target", NULL, ":message" }, + }, { + "([^\t]+)\t<--\t([^ ]+) " P2_USERHOST " has quit" P2_MESSAGE, + Quit, { ":time", ":nick", ":user", ":host", NULL, ":message" }, + }, { + "([^\t]+)\t--\t([^ ]+) is now known as ([^ ]+)", + Nick, { ":time", ":nick", ":target" }, + }, { + "([^\t]+)\t--\t([^ ]+) has changed topic for [^ ]+ to \"(.+)\"", + Topic, { ":time", ":nick", ":message" }, + }, { + "([^\t]+)\t--\t([^ ]+) has unset topic", + Topic, { ":time", ":nick" }, + }, { + "([^\t]+)\t--\tMode [^ ]+ [[][+]b+ ([^]]+)[]] by ([^ ]+)", + Ban, { ":time", ":target", ":nick" }, + }, { + "([^\t]+)\t--\tMode [^ ]+ [[][-]b+ ([^]]+)[]] by ([^ ]+)", + Unban, { ":time", ":target", ":nick" }, + }, { + "([^\t]+)\t--\tNotice[(]([^)]+)[)]: (.+)", + Notice, { ":time", ":nick", ":message" }, + }, { + "([^\t]+)\t--\t([^ :]+): (.+)", + Notice, { ":time", ":nick", ":message" }, + }, { + "([^\t]+)\t [*]\t([^ ]+) (.+)", + Action, { ":time", ":nick", ":message" }, + }, { + "([^\t]+)\t" P0_MODE "([^-][^\t]*)\t(.+)", + Privmsg, { ":time", ":nick", ":message" }, + } +}; + +static const struct Matcher ZNC[] = { + { + P1_TIME "<([^>]+)> (.+)", + Privmsg, { ":time", ":nick", ":message" }, + }, { + P1_TIME "-([^-]+)- (.+)", + Notice, { ":time", ":nick", ":message" }, + }, { + P1_TIME "[*] ([^ ]+) (.+)", + Action, { ":time", ":nick", ":message" }, + }, { + P1_TIME "[*]{3} Joins: ([^ ]+) " P2_USERHOST, + Join, { ":time", ":nick", ":user", ":host" }, + }, { + P1_TIME "[*]{3} Parts: ([^ ]+) " P2_USERHOST " [(](.*)[)]", + Part, { ":time", ":nick", ":user", ":host", ":message" }, + }, { + P1_TIME "[*]{3} ([^ ]+) was kicked by ([^ ]+) [(](.*)[)]", + Kick, { ":time", ":target", ":nick", ":message" }, + }, { + P1_TIME "[*]{3} Quits: ([^ ]+) " P2_USERHOST " [(](.*)[)]", + Quit, { ":time", ":nick", ":user", ":host", ":message" }, + }, { + P1_TIME "[*]{3} ([^ ]+) is now known as ([^ ]+)", + Nick, { ":time", ":nick", ":target" }, + }, { + P1_TIME "[*]{3} ([^ ]+) changes topic to '(.*)'", + Topic, { ":time", ":nick", ":message" }, + }, { + P1_TIME "[*]{3} ([^ ]+) sets mode: [+]b+ (.+)", + Ban, { ":time", ":nick", ":target" }, + }, { + P1_TIME "[*]{3} ([^ ]+) sets mode: [-]b+ (.+)", + Unban, { ":time", ":nick", ":target" }, + } +}; static const struct Format { const char *name; @@ -195,18 +273,20 @@ static const struct Format { const char *pattern; size_t network; size_t context; + size_t date; + bool local; } Formats[] = { { "generic", Generic, ARRAY_LEN(Generic), - "([^/]+)/([^/]+)/[^/]+$", 1, 2, + "([^/]+)/([^/]+)/[^/]+$", 1, 2, 0, false, }, { "catgirl", Catgirl, ARRAY_LEN(Catgirl), - "([^/]+)/([^/]+)/[0-9-]+[.]log$", 1, 2, + "([^/]+)/([^/]+)/[0-9-]+[.]log$", 1, 2, 0, false, }, { "irc", IRC, ARRAY_LEN(IRC), - "^$", 0, 0, + "^$", 0, 0, 0, false, }, { "textual", Textual, ARRAY_LEN(Textual), @@ -216,14 +296,25 @@ static const struct Format { "([^/]+)/" "[0-9-]+[.]txt$" ), - 1, 4, + 1, 4, 0, false, + }, + { + "weechat", WeeChat, ARRAY_LEN(WeeChat), + "irc[.](.+)[.]([^.]+)[.]weechatlog$", 1, 2, 0, true, + }, + { + "znc", ZNC, ARRAY_LEN(ZNC), + "([^/]+)/(moddata/log/)?([^/]+)/([0-9-]+)[.]log$", 1, 3, 4, true, }, }; static const struct Format *formatParse(const char *name) { + bool list = !strcmp(name, "?"); for (size_t i = 0; i < ARRAY_LEN(Formats); ++i) { if (!strcmp(name, Formats[i].name)) return &Formats[i]; + if (list) printf("%s\n", Formats[i].name); } + if (list) exit(EX_OK); errx(EX_USAGE, "no such format %s", name); } @@ -250,6 +341,7 @@ static sqlite3_stmt *insertName; static sqlite3_stmt *insertEvent; static int paramNetwork; static int paramContext; +static int paramDate; static void prepareInsert(void) { const char *InsertName = SQL( @@ -262,9 +354,13 @@ static void prepareInsert(void) { INSERT INTO events (time, type, context, name, target, message) SELECT // SQLite expects a colon in the timezine, but ISO8601 does not. - CASE WHEN :time LIKE '%Z' - THEN strftime('%s', :time) - ELSE strftime('%s', substr(:time, 1, 22) || ':' || substr(:time, -2)) + CASE + WHEN :time LIKE '%+____' OR :time LIKE '%-____' THEN + strftime('%s', substr(:time, 1, 22) || ':' || substr(:time, -2)) + WHEN :local THEN + strftime('%s', coalesce(:date || ' ', "") || :time, 'utc') + ELSE + strftime('%s', coalesce(:date || ' ', "") || :time) END, :type, context, names.name, :target, :message FROM contexts, names @@ -277,21 +373,24 @@ static void prepareInsert(void) { dbPersist(&insertEvent, InsertEvent); paramNetwork = dbParam(insertEvent, ":network"); paramContext = dbParam(insertEvent, ":context"); + paramDate = dbParam(insertEvent, ":date"); } static void matchLine(const struct Format *format, const regex_t *regex, const char *line) { for (size_t i = 0; i < format->len; ++i) { const struct Matcher *matcher = &format->matchers[i]; - regmatch_t match[ParamCap]; - if (regexec(®ex[i], line, ParamCap, match, 0)) continue; + regmatch_t match[1 + ParamCap]; + if (regexec(®ex[i], line, 1 + ParamCap, match, 0)) continue; sqlite3_clear_bindings(insertName); for (int i = 1; i <= sqlite3_bind_parameter_count(insertEvent); ++i) { - if (i == paramNetwork || i == paramContext) continue; - sqlite3_bind_null(insertEvent, i); + if (i != paramNetwork && i != paramContext && i != paramDate) { + sqlite3_bind_null(insertEvent, i); + } } + dbBindInt(insertEvent, ":local", format->local); dbBindInt(insertEvent, ":type", matcher->type); for (size_t i = 0; i < ARRAY_LEN(matcher->params); ++i) { const char *param = matcher->params[i]; @@ -349,7 +448,8 @@ int main(int argc, char *argv[]) { } regex_t pathRegex = compile(format->pattern); - regex_t regex[format->len]; + regex_t *regex = calloc(format->len, sizeof(*regex)); + if (!regex) err(EX_OSERR, "calloc"); for (size_t i = 0; i < format->len; ++i) { regex[i] = compile(format->matchers[i].pattern); } @@ -384,11 +484,13 @@ int main(int argc, char *argv[]) { size_t sizeTotal = 0; size_t sizeRead = 0; - size_t sizePercent = -1; - regmatch_t match[argc][ParamCap]; + size_t sizePercent = 0; + struct { + regmatch_t match[ParamCap]; + } *paths = calloc(argc, sizeof(*paths)); for (int i = optind; i < argc; ++i) { - int error = regexec(&pathRegex, argv[i], ParamCap, match[i], 0); + int error = regexec(&pathRegex, argv[i], ParamCap, paths[i].match, 0); if (error && (!network || !context)) { warnx("skipping %s", argv[i]); argv[i] = NULL; @@ -405,13 +507,15 @@ int main(int argc, char *argv[]) { size_t cap = 0; for (int i = optind; i < argc; ++i) { if (!argv[i]) continue; + printf("%s\n", argv[i]); + fprintf(stderr, " %3zu%%\r", sizePercent); FILE *file = fopen(argv[i], "r"); if (!file) err(EX_NOINPUT, "%s", argv[i]); dbExec(SQL(BEGIN TRANSACTION;)); - regmatch_t pathNetwork = match[i][format->network]; - regmatch_t pathContext = match[i][format->context]; + regmatch_t pathNetwork = paths[i].match[format->network]; + regmatch_t pathContext = paths[i].match[format->context]; if (!network) { bindMatch(insertContext, ":network", argv[i], pathNetwork); bindMatch(insertEvent, ":network", argv[i], pathNetwork); @@ -422,13 +526,20 @@ int main(int argc, char *argv[]) { } dbRun(insertContext); + if (format->date) { + bindMatch( + insertEvent, ":date", argv[i], paths[i].match[format->date] + ); + } + for (ssize_t len; 0 < (len = getline(&line, &cap, file));) { + if (len >= 1 && line[len-1] == '\n') line[len-1] = '\0'; + if (len >= 2 && line[len-2] == '\r') line[len-2] = '\0'; matchLine(format, regex, line); sizeRead += len; if (100 * sizeRead / sizeTotal != sizePercent) { sizePercent = 100 * sizeRead / sizeTotal; - printf("\r%3zu%%", sizePercent); - fflush(stdout); + fprintf(stderr, " %3zu%%\r", sizePercent); } } if (ferror(file)) err(EX_IOERR, "%s", argv[i]); @@ -436,7 +547,7 @@ int main(int argc, char *argv[]) { fclose(file); dbExec(SQL(COMMIT TRANSACTION;)); } - printf("\n"); + fprintf(stderr, "\n"); dbClose(); } |