diff options
-rw-r--r-- | database.h | 42 | ||||
-rw-r--r-- | litterbox.1 | 22 | ||||
-rw-r--r-- | litterbox.c | 16 | ||||
-rw-r--r-- | unscoop.c | 110 |
4 files changed, 88 insertions, 102 deletions
diff --git a/database.h b/database.h index 7d61507..1299ab0 100644 --- a/database.h +++ b/database.h @@ -14,7 +14,6 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -#include <assert.h> #include <err.h> #include <errno.h> #include <limits.h> @@ -26,24 +25,29 @@ #include <sys/stat.h> #include <sysexits.h> -#define DATABASE_PATH "litterbox/litterbox.sqlite" - #define SQL(...) #__VA_ARGS__ +#define DATABASE_PATH "litterbox/litterbox.sqlite" + enum { DatabaseVersion = 0 }; enum Type { Privmsg, - Action, Notice, + Action, Join, Part, - Kick, Quit, + Kick, Nick, Topic, }; +static inline void dbExec(sqlite3 *db, const char *sql) { + int error = sqlite3_exec(db, sql, NULL, NULL, NULL); + if (error) errx(EX_SOFTWARE, "%s: %s", sqlite3_errmsg(db), sql); +} + static inline sqlite3 *dbOpen(char *path, int flags) { char *base = strrchr(path, '/'); if (flags & SQLITE_OPEN_CREATE && base) { @@ -62,9 +66,7 @@ static inline sqlite3 *dbOpen(char *path, int flags) { if (error) errx(EX_NOINPUT, "%s: %s", path, sqlite3_errmsg(db)); sqlite3_busy_timeout(db, 1000); - - error = sqlite3_exec(db, SQL(PRAGMA foreign_keys = true;), NULL, NULL, NULL); - if (error) errx(EX_SOFTWARE, "sqlite3_exec: %s", sqlite3_errmsg(db)); + dbExec(db, SQL(PRAGMA foreign_keys = true;)); return db; } @@ -96,21 +98,11 @@ static inline sqlite3 *dbFind(int flags) { return NULL; } -static inline void dbBegin(sqlite3 *db) { - int error = sqlite3_exec(db, SQL(BEGIN TRANSACTION;), NULL, NULL, NULL); - if (error) errx(EX_SOFTWARE, "sqlite3_exec: %s", sqlite3_errmsg(db)); -} - -static inline void dbCommit(sqlite3 *db) { - int error = sqlite3_exec(db, SQL(COMMIT TRANSACTION;), NULL, NULL, NULL); - if (error) errx(EX_SOFTWARE, "sqlite3_exec: %s", sqlite3_errmsg(db)); -} - static inline sqlite3_stmt * dbPrepare(sqlite3 *db, unsigned flags, const char *sql) { sqlite3_stmt *stmt; int error = sqlite3_prepare_v3(db, sql, -1, flags, &stmt, NULL); - if (error) errx(EX_SOFTWARE, "sqlite3_prepare_v3: %s", sqlite3_errmsg(db)); + if (error) errx(EX_SOFTWARE, "%s: %s", sqlite3_errmsg(db), sql); return stmt; } @@ -137,14 +129,14 @@ static inline int dbStep(sqlite3_stmt *stmt) { int error = sqlite3_step(stmt); if (error == SQLITE_ROW || error == SQLITE_DONE) return error; errx( - EX_SOFTWARE, "sqlite3_step: %s", - sqlite3_errmsg(sqlite3_db_handle(stmt)) + EX_SOFTWARE, "%s: %s", + sqlite3_errmsg(sqlite3_db_handle(stmt)), sqlite3_expanded_sql(stmt) ); } static inline int dbVersion(sqlite3 *db) { sqlite3_stmt *stmt = dbPrepare(db, 0, SQL(PRAGMA user_version;)); - assert(SQLITE_ROW == dbStep(stmt)); + dbStep(stmt); int version = sqlite3_column_int(stmt, 0); sqlite3_finalize(stmt); return version; @@ -214,8 +206,7 @@ static const char *InitSQL = SQL( ); static inline void dbInit(sqlite3 *db) { - int error = sqlite3_exec(db, InitSQL, NULL, NULL, NULL); - if (error) errx(EX_SOFTWARE, "sqlite3_exec: %s", sqlite3_errmsg(db)); + dbExec(db, InitSQL); } static const char *MigrationSQL[] = { @@ -224,7 +215,6 @@ static const char *MigrationSQL[] = { static inline void dbMigrate(sqlite3 *db) { for (int version = dbVersion(db); version < DatabaseVersion; ++version) { - int error = sqlite3_exec(db, MigrationSQL[version], NULL, NULL, NULL); - if (error) errx(EX_SOFTWARE, "sqlite3_exec: %s", sqlite3_errmsg(db)); + dbExec(db, MigrationSQL[version]); } } diff --git a/litterbox.1 b/litterbox.1 index d7163da..d1c9b57 100644 --- a/litterbox.1 +++ b/litterbox.1 @@ -1,4 +1,4 @@ -.Dd December 1, 2019 +.Dd December 13, 2019 .Dt LITTERBOX 1 .Os . @@ -8,17 +8,17 @@ . .Sh SYNOPSIS .Nm +.Op Fl d Ar path .Op Fl h Ar host .Op Fl j Ar join .Op Fl n Ar nick .Op Fl p Ar port .Op Fl u Ar user .Op Fl w Ar pass -.Op Ar path . .Nm .Fl i | m -.Op Ar path +.Op Fl d Ar path . .Sh DESCRIPTION The @@ -31,6 +31,14 @@ which may be queried with The arguments are as follows: . .Bl -tag -width "-h host" +.It Fl d Ar path +Set the path to the database file. +The database must be initialized with +.Fl i . +See +.Sx FILES +for the default path. +. .It Fl h Ar host Connect to .Ar host . @@ -64,14 +72,6 @@ The default username is the same as the nickname. .It Fl w Ar pass Log in with the server password .Ar pass . -. -.It Ar path -The path to the database file. -The database must be initialized with -.Fl i . -See -.Sx FILES -for the default path. .El . .Sh FILES diff --git a/litterbox.c b/litterbox.c index ca59df0..2fa52fb 100644 --- a/litterbox.c +++ b/litterbox.c @@ -25,12 +25,14 @@ #include "database.h" int main(int argc, char *argv[]) { + char *path = NULL; bool init = false; bool migrate = false; int opt; - while (0 < (opt = getopt(argc, argv, "im"))) { + while (0 < (opt = getopt(argc, argv, "d:im"))) { switch (opt) { + break; case 'd': path = optarg; break; case 'i': init = true; break; case 'm': migrate = true; break; default: return EX_USAGE; @@ -40,12 +42,7 @@ int main(int argc, char *argv[]) { int flags = SQLITE_OPEN_READWRITE; if (init) flags |= SQLITE_OPEN_CREATE; - sqlite3 *db; - if (optind < argc) { - db = dbOpen(argv[optind], flags); - } else { - db = dbFind(flags); - } + sqlite3 *db = (path ? dbOpen(path, flags) : dbFind(flags)); if (!db) errx(EX_NOINPUT, "database not found"); if (init) { @@ -57,8 +54,7 @@ int main(int argc, char *argv[]) { return EX_OK; } - int version = dbVersion(db); - if (version != DatabaseVersion) { - errx(EX_CONFIG, "database needs migration"); + if (dbVersion(db) != DatabaseVersion) { + errx(EX_CONFIG, "database out of date; migrate with -m"); } } diff --git a/unscoop.c b/unscoop.c index 0dc255b..f2d79cb 100644 --- a/unscoop.c +++ b/unscoop.c @@ -14,7 +14,6 @@ * along with this program. If not, see <https://www.gnu.org/licenses/>. */ -#include <assert.h> #include <err.h> #include <regex.h> #include <sqlite3.h> @@ -171,27 +170,25 @@ static const struct Format { } Formats[] = { { "generic", Generic, ARRAY_LEN(Generic), - .pattern = "([^/]+)/([^/]+)/[^/]+$", - .network = 1, .context = 2, + "([^/]+)/([^/]+)/[^/]+$", 1, 2, }, { "catgirl", Catgirl, ARRAY_LEN(Catgirl), - .pattern = "([^/]+)/([^/]+)/[0-9-]+[.]log$", - .network = 1, .context = 2, + "([^/]+)/([^/]+)/[0-9-]+[.]log$", 1, 2, }, { "irc", IRC, ARRAY_LEN(IRC), - .pattern = "^$", + "^$", 0, 0, }, { "textual", Textual, ARRAY_LEN(Textual), - .pattern = ( + ( "(([^ /]| [^(])+) [(][0-9A-F]+[)]/" "(Channels|Queries)/" "([^/]+)/" "[0-9-]+[.]txt$" ), - .network = 1, .context = 4, + 1, 4, }, }; @@ -222,19 +219,21 @@ bindMatch(sqlite3_stmt *stmt, int param, const char *str, regmatch_t match) { static sqlite3_stmt *insertName; static sqlite3_stmt *insertEvent; -static int paramNetwork, paramContext; +static int paramNetwork; +static int paramContext; +static int paramType; static void prepareInsert(sqlite3 *db) { - static const char *InsertName = SQL( + const char *InsertName = SQL( INSERT OR IGNORE INTO names (nick, user, host) VALUES (:nick, coalesce(:user, '*'), coalesce(:host, '*')); ); insertName = dbPrepare(db, SQLITE_PREPARE_PERSISTENT, InsertName); - // SQLite expects a colon in the timezone, but ISO8601 does not. - static const char *InsertEvent = SQL( + const char *InsertEvent = SQL( INSERT INTO events (time, type, context, name, target, message) SELECT + // SQLite expects a colon in the timezine, but ISO8601 does not. CASE WHEN :time LIKE '%Z' THEN datetime(:time) ELSE datetime(substr(:time, 1, 22) || ':' || substr(:time, -2)) @@ -250,6 +249,7 @@ static void prepareInsert(sqlite3 *db) { insertEvent = dbPrepare(db, SQLITE_PREPARE_PERSISTENT, InsertEvent); paramNetwork = sqlite3_bind_parameter_index(insertEvent, ":network"); paramContext = sqlite3_bind_parameter_index(insertEvent, ":context"); + paramType = sqlite3_bind_parameter_index(insertEvent, ":type"); } static void @@ -257,20 +257,15 @@ matchLine(const struct Format *format, const regex_t *regex, const char *line) { for (size_t i = 0; i < format->len; ++i) { const struct Matcher *matcher = &format->matchers[i]; regmatch_t match[ParamCap]; - int error = regexec(®ex[i], line, ParamCap, match, 0); - if (error) continue; + if (regexec(®ex[i], line, ParamCap, match, 0)) continue; sqlite3_clear_bindings(insertName); for (int i = 1; i <= sqlite3_bind_parameter_count(insertEvent); ++i) { if (i == paramNetwork || i == paramContext) continue; - dbBindText(insertEvent, i, NULL, -1); + sqlite3_bind_null(insertEvent, i); } - dbBindInt( - insertEvent, - sqlite3_bind_parameter_index(insertEvent, ":type"), - matcher->type - ); + dbBindInt(insertEvent, paramType, matcher->type); for (size_t i = 0; i < ARRAY_LEN(matcher->params); ++i) { const char *param = matcher->params[i]; if (!param) continue; @@ -289,6 +284,25 @@ matchLine(const struct Format *format, const regex_t *regex, const char *line) { } } +static void dedupEvents(sqlite3 *db) { + if (sqlite3_libversion_number() < 3025000) { + errx(EX_CONFIG, "SQLite version 3.25.0 or newer required"); + } + const char *Delete = SQL( + WITH potentials (event, diff) AS ( + SELECT event, event - first_value(event) OVER matching + FROM events JOIN names USING (name) + WINDOW matching ( + PARTITION BY time, type, context, nick, target, message + ORDER BY event + ) + ), duplicates AS (SELECT event FROM potentials WHERE diff > 50) + DELETE FROM events WHERE event IN duplicates; + ); + dbExec(db, Delete); + printf("deleted %d events\n", sqlite3_changes(db)); +} + int main(int argc, char *argv[]) { char *path = NULL; bool dedup = false; @@ -313,42 +327,27 @@ int main(int argc, char *argv[]) { if (!db) errx(EX_NOINPUT, "database not found"); if (dbVersion(db) != DatabaseVersion) { - errx(EX_CONFIG, "database needs migration"); + errx(EX_CONFIG, "database out of date; migrate with litterbox -m"); } if (dedup) { - if (sqlite3_libversion_number() < 3025000) { - errx(EX_CONFIG, "SQLite version 3.25.0 or newer required"); - } - static const char *Dedup = SQL( - WITH potentials (event, diff) AS ( - SELECT event, event - first_value(event) OVER ( - PARTITION BY time, type, context, nick, target, message - ORDER BY event - ) - FROM events JOIN names USING (name) - ), duplicates AS (SELECT event FROM potentials WHERE diff > 50) - DELETE FROM events WHERE event IN duplicates; - ); - int error = sqlite3_exec(db, Dedup, NULL, NULL, NULL); - if (error) { - errx(EX_SOFTWARE, "sqlite3_exec: %s", sqlite3_errmsg(db)); - } - printf("deleted %d events\n", sqlite3_changes(db)); + dedupEvents(db); + sqlite3_close(db); return EX_OK; } + regex_t pathRegex = compile(format->pattern); regex_t regex[format->len]; for (size_t i = 0; i < format->len; ++i) { regex[i] = compile(format->matchers[i].pattern); } - regex_t pathRegex = compile(format->pattern); - static const char *InsertContext = SQL( + const char *InsertContext = SQL( INSERT OR IGNORE INTO contexts (network, name, query) - SELECT + VALUES ( :network, :context, - NOT (:context LIKE '#%' OR :context LIKE '&%'); + NOT (:context LIKE '#%' OR :context LIKE '&%') + ); ); sqlite3_stmt *insertContext = dbPrepare( db, SQLITE_PREPARE_PERSISTENT, InsertContext @@ -361,7 +360,10 @@ int main(int argc, char *argv[]) { dbBindText(insertEvent, paramContext, context, -1); size_t sizeTotal = 0; + size_t sizeRead = 0; + size_t sizePercent = -1; regmatch_t match[argc][ParamCap]; + for (int i = optind; i < argc; ++i) { int error = regexec(&pathRegex, argv[i], ParamCap, match[i], 0); if (error && (!network || !context)) { @@ -374,28 +376,26 @@ int main(int argc, char *argv[]) { if (error) err(EX_NOINPUT, "%s", argv[i]); sizeTotal += st.st_size; } - size_t sizeRead = 0; - size_t sizePercent = 101; + if (!sizeTotal) errx(EX_NOINPUT, "no input files"); char *line = NULL; size_t cap = 0; for (int i = optind; i < argc; ++i) { if (!argv[i]) continue; + FILE *file = fopen(argv[i], "r"); if (!file) err(EX_NOINPUT, "%s", argv[i]); - dbBegin(db); + dbExec(db, SQL(BEGIN TRANSACTION;)); + regmatch_t pathNetwork = match[i][format->network]; + regmatch_t pathContext = match[i][format->context]; if (!network) { - bindMatch(insertContext, 1, argv[i], match[i][format->network]); - bindMatch( - insertEvent, paramNetwork, argv[i], match[i][format->network] - ); + bindMatch(insertContext, 1, argv[i], pathNetwork); + bindMatch(insertEvent, paramNetwork, argv[i], pathNetwork); } if (!context) { - bindMatch(insertContext, 2, argv[i], match[i][format->context]); - bindMatch( - insertEvent, paramContext, argv[i], match[i][format->context] - ); + bindMatch(insertContext, 2, argv[i], pathContext); + bindMatch(insertEvent, paramContext, argv[i], pathContext); } dbStep(insertContext); sqlite3_reset(insertContext); @@ -413,7 +413,7 @@ int main(int argc, char *argv[]) { if (ferror(file)) err(EX_IOERR, "%s", argv[i]); fclose(file); - dbCommit(db); + dbExec(db, SQL(COMMIT TRANSACTION;)); } printf("\n"); |