From 9b09a5ff483aef05dc5b4d9ab0fd0243d21cb1d3 Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Sat, 12 Jun 2021 19:20:47 -0400 Subject: Use SEARCH for a subset of thread roots This does way less duplicate work by fetching all threads and all thread root envelopes once, then doing searches for subsets of thread roots. --- archive.c | 76 ++++++++++++++++++------------------------------------- archive.h | 34 +++++++++++++++---------- atom.c | 12 ++++----- concat.c | 86 +++++++++++++++++++++++++++++++++++++++++++++------------------ html.c | 75 +++++++++++++++++++++++++++++-------------------------- imap.h | 1 + 6 files changed, 154 insertions(+), 130 deletions(-) diff --git a/archive.c b/archive.c index 74491a1..20ea5d0 100644 --- a/archive.c +++ b/archive.c @@ -80,12 +80,7 @@ static void createDirs(void) { createDir("thread"); } -static struct Search { - size_t cap; - size_t len; - char **names; - char **exprs; -} search; +struct Search search; static void searchAdd(const char *name, const char *expr) { if (search.len == search.cap) { @@ -134,51 +129,24 @@ static void searchDefault(void) { searchAdd("index", "ALL"); } -static const char *algo = "REFERENCES"; - -static void -searchThreads(struct IMAP *imap, const char *name, const char *expr) { +static void searchThreads( + struct IMAP *imap, struct List threads, const struct Envelope *envelopes, + const char *name, const char *expr +) { struct Resp resp; - struct List threads = {0}; - struct List envelopeItems = {0}; - struct Envelope *envelopes = NULL; - - enum Atom thread = atom("thread"); - fprintf( - imap->w, "%s UID THREAD %s UTF-8 %s\r\n", - Atoms[thread], algo, expr - ); - for (; resp = respOk(imapResp(imap)), resp.tag != thread; respFree(resp)) { - if (resp.resp != AtomThread) continue; - threads = resp.data; - resp.data = (struct List) {0}; // prevent freeing threads with resp + struct List roots = {0}; + + enum Atom search = atom("search"); + concatSearch(imap->w, search, threads, expr); + for (; resp = respOk(imapResp(imap)), resp.tag != search; respFree(resp)) { + if (resp.resp != AtomSearch) continue; + roots = resp.data; + resp.data = (struct List) {0}; // prevent freeing roots with resp } respFree(resp); - if (!threads.len) goto concat; - enum Atom concat = atom("concat"); - envelopes = calloc(threads.len, sizeof(*envelopes)); - if (!envelopes) err(EX_OSERR, "calloc"); - concatFetch(imap->w, concat, threads); - for (; resp = respOk(imapResp(imap)), resp.tag != concat; respFree(resp)) { - if (resp.resp != AtomFetch) continue; - if (!resp.data.len) errx(EX_PROTOCOL, "missing FETCH data"); - // Prevent freeing data in envelopes with resp: - struct Data items = dataTake(&resp.data.ptr[0]); - concatData(threads, envelopes, dataCheck(items, List).list); - listPush(&envelopeItems, items); - } - respFree(resp); - -concat: - concatSearch(name, threads, envelopes, search.names, search.len); - - for (size_t i = 0; i < threads.len; ++i) { - envelopeFree(envelopes[i]); - } - free(envelopes); - listFree(envelopeItems); - listFree(threads); + concatIndex(name, roots, threads, envelopes); + listFree(roots); } int main(int argc, char *argv[]) { @@ -191,6 +159,7 @@ int main(int argc, char *argv[]) { bool idle = false; const char *mailbox = "Archive"; + const char *algo = "REFERENCES"; const char *searchPath = NULL; for ( @@ -198,7 +167,7 @@ int main(int argc, char *argv[]) { 0 < (opt = getopt(argc, argv, "A:C:H:S:T:a:h:im:p:qs:u:vw:y:")); ) { switch (opt) { - break; case 'A': concatSearchEntries = strtoul(optarg, NULL, 10); + break; case 'A': concatIndexEntries = strtoul(optarg, NULL, 10); break; case 'C': { int error = chdir(optarg); if (error) err(EX_NOINPUT, "%s", optarg); @@ -362,6 +331,13 @@ concat:; respFree(resp); concatThreads(threads, envelopes); + for (size_t i = 0; i < search.len; ++i) { + searchThreads( + &imap, threads, envelopes, + search.names[i], search.exprs[i] + ); + } + for (size_t i = 0; i < threads.len; ++i) { envelopeFree(envelopes[i]); } @@ -369,10 +345,6 @@ concat:; listFree(envelopeItems); listFree(threads); - for (size_t i = 0; i < search.len; ++i) { - searchThreads(&imap, search.names[i], search.exprs[i]); - } - fflush(stdout); uidWrite("UIDNEXT", uidNext); if (!idle) goto logout; diff --git a/archive.h b/archive.h index 7740b98..d6b9608 100644 --- a/archive.h +++ b/archive.h @@ -44,7 +44,7 @@ #define PATH_THREAD "thread/[messageID].[type]" #define PATH_ATTACHMENT \ "attachment/[messageID]/[section]/[name][disposition][.][subtype]" -#define PATH_SEARCH "[name].[type]" +#define PATH_INDEX "[name].[type]" #define MBOX_HEADERS \ "Date Subject From Sender Reply-To To Cc Bcc " \ @@ -60,6 +60,13 @@ extern const char *baseMailto; extern const char *baseSubscribe; extern const char *baseStylesheet; +extern struct Search { + size_t cap; + size_t len; + char **names; + char **exprs; +} search; + static inline struct U32 { char s[sizeof("4294967295")]; } u32(uint32_t u) { @@ -215,15 +222,18 @@ bool exportFetch(FILE *imap, enum Atom tag, struct List threads); bool exportData(FILE *imap, enum Atom tag, struct List items); extern const char *concatHead; -extern size_t concatSearchEntries; +extern size_t concatIndexEntries; void concatFetch(FILE *imap, enum Atom tag, struct List threads); +void concatSearch( + FILE *imap, enum Atom tag, struct List threads, const char *expr +); void concatData( struct List threads, struct Envelope *envelopes, struct List items ); void concatThreads(struct List threads, const struct Envelope *envelopes); -void concatSearch( - const char *name, struct List threads, const struct Envelope *envelopes, - char *searches[const], size_t len +void concatIndex( + const char *name, struct List roots, + struct List threads, const struct Envelope *envelopes ); int mboxFrom(FILE *file); @@ -235,8 +245,8 @@ int atomContent(FILE *file, const char *content); int atomEntryClose(FILE *file); int atomThreadOpen(FILE *file, const struct Envelope *envelope); int atomThreadClose(FILE *file); -int atomSearchOpen(FILE *file, const char *name); -int atomSearchClose(FILE *file); +int atomIndexOpen(FILE *file, const char *name); +int atomIndexClose(FILE *file); int htmlMessageOpen(FILE *file, const struct Envelope *envelope, bool nested); int htmlInline(FILE *file, const struct BodyPart *part, const char *content); @@ -251,11 +261,9 @@ int htmlThreadOpen(FILE *file, const struct Envelope *envelope); int htmlSubthreadOpen(FILE *file, struct List thread); int htmlSubthreadClose(FILE *file); int htmlThreadClose(FILE *file); -int htmlSearchHead(FILE *file, const char *name); -int htmlSearchOpen( - FILE *file, const char *name, char *searches[const], size_t len -); -int htmlSearchThread( +int htmlIndexHead(FILE *file, const char *name); +int htmlIndexOpen(FILE *file, const char *name); +int htmlIndexThread( FILE *file, const struct Envelope *envelope, struct List thread ); -int htmlSearchClose(FILE *file); +int htmlIndexClose(FILE *file); diff --git a/atom.c b/atom.c index a58f71c..b49a2b7 100644 --- a/atom.c +++ b/atom.c @@ -163,18 +163,18 @@ int atomThreadClose(FILE *file) { return templateRender(file, Q(), NULL, NULL); } -static char *atomSearchURL(const char *name, const char *type) { +static char *atomIndexURL(const char *name, const char *type) { struct Variable vars[] = { { "name", name }, { "type", type }, {0}, }; - return templateString("/" PATH_SEARCH, vars, escapeURL); + return templateString("/" PATH_INDEX, vars, escapeURL); } -int atomSearchOpen(FILE *file, const char *name) { - char *atom = atomSearchURL(name, "atom"); - char *html = atomSearchURL(name, "html"); +int atomIndexOpen(FILE *file, const char *name) { + char *atom = atomIndexURL(name, "atom"); + char *html = atomIndexURL(name, "html"); const char *template = XML_DECL Q( bubger @@ -200,6 +200,6 @@ int atomSearchOpen(FILE *file, const char *name) { return error; } -int atomSearchClose(FILE *file) { +int atomIndexClose(FILE *file) { return templateRender(file, Q(), NULL, NULL); } diff --git a/concat.c b/concat.c index 800a7a2..54a49a7 100644 --- a/concat.c +++ b/concat.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -54,6 +55,17 @@ void concatFetch(FILE *imap, enum Atom tag, struct List threads) { fprintf(imap, " (UID ENVELOPE)\r\n"); } +void concatSearch( + FILE *imap, enum Atom tag, struct List threads, const char *expr +) { + fprintf(imap, "%s UID SEARCH CHARSET UTF-8 UID ", Atoms[tag]); + for (size_t i = 0; i < threads.len; ++i) { + uint32_t root = threadRoot(dataCheck(threads.ptr[i], List).list); + fprintf(imap, "%s%" PRIu32, (i ? "," : ""), root); + } + fprintf(imap, " %s\r\n", expr); +} + void concatData( struct List threads, struct Envelope *envelopes, struct List items ) { @@ -230,13 +242,13 @@ void concatThreads(struct List threads, const struct Envelope *envelopes) { } } -static char *searchPath(const char *name, const char *type) { +static char *indexPath(const char *name, const char *type) { struct Variable vars[] = { { "name", name }, { "type", type }, {0}, }; - return templateString(PATH_SEARCH, vars, escapePath); + return templateString(PATH_INDEX, vars, escapePath); } static int numberCompare(const void *_a, const void *_b) { @@ -261,23 +273,43 @@ static int sortCompare(const void *_a, const void *_b) { } } -size_t concatSearchEntries = 20; +size_t concatIndexEntries = 20; -void concatSearch( - const char *name, struct List threads, const struct Envelope *envelopes, - char *searches[const], size_t len +void concatIndex( + const char *name, struct List roots, + struct List threads, const struct Envelope *envelopes ) { - char *path = searchPath(name, "atom"); + bool *bitmap = calloc(threads.len, sizeof(*bitmap)); + if (!bitmap) err(EX_OSERR, "calloc"); + + for (size_t i = 0; i < roots.len; ++i) { + dataCheck(roots.ptr[i], Number); + } + for (size_t i = 0; i < threads.len; ++i) { + uint32_t root = threadRoot(dataCheck(threads.ptr[i], List).list); + for (size_t j = 0; j < roots.len; ++j) { + if (root == roots.ptr[j].number) { + bitmap[i] = true; + break; + } + } + } + + char *path = indexPath(name, "atom"); FILE *file = fopen(path, "w"); if (!file) err(EX_CANTCREAT, "%s", path); - int error = atomSearchOpen(file, name); + int error = atomIndexOpen(file, name); if (error) err(EX_IOERR, "%s", path); struct List flat = {0}; - listFlatten(&flat, threads); + for (size_t i = 0; i < threads.len; ++i) { + if (!bitmap[i]) continue; + listFlatten(&flat, threads.ptr[i].list); + } qsort(flat.ptr, flat.len, sizeof(*flat.ptr), numberCompare); - for (size_t i = 0; i < flat.len && i < concatSearchEntries; ++i) { + + for (size_t i = 0; i < flat.len && i < concatIndexEntries; ++i) { uint32_t uid = dataCheck(flat.ptr[i], Number).number; char *src = uidPath(uid, "atom"); error = concatFile(file, src); @@ -286,30 +318,37 @@ void concatSearch( } listFree(flat); - error = atomSearchClose(file) || fclose(file); + error = atomIndexClose(file) || fclose(file); if (error) err(EX_IOERR, "%s", path); if (!quiet) printf("%s\n", path); free(path); - - struct Sort *order = calloc(threads.len, sizeof(*order)); + + size_t len = 0; + struct Sort *order = calloc(roots.len, sizeof(*order)); if (!order) err(EX_OSERR, "calloc"); for (size_t i = 0; i < threads.len; ++i) { - order[i].index = i; - order[i].created = envelopes[i].time; + if (!bitmap[i]) continue; struct stat status; char *path = threadPath(envelopes[i].messageID, "html"); - if (!stat(path, &status)) order[i].updated = status.st_mtime; + error = stat(path, &status); + if (error) err(EX_DATAERR, "%s", path); free(path); + + order[len].index = i; + order[len].created = envelopes[i].time; + order[len].updated = status.st_mtime; + len++; } - qsort(order, threads.len, sizeof(*order), sortCompare); + qsort(order, len, sizeof(*order), sortCompare); + free(bitmap); - path = searchPath(name, "html"); + path = indexPath(name, "html"); file = fopen(path, "w"); if (!file) err(EX_CANTCREAT, "%s", path); - error = htmlSearchHead(file, name); + error = htmlIndexHead(file, name); if (error) err(EX_IOERR, "%s", path); if (concatHead) { @@ -317,19 +356,18 @@ void concatSearch( if (error) err(EX_IOERR, "%s", path); } - error = htmlSearchOpen(file, name, searches, len); + error = htmlIndexOpen(file, name); if (error) err(EX_IOERR, "%s", path); - for (size_t i = threads.len - 1; i < threads.len; --i) { - if (!order[i].updated) continue; + for (size_t i = len - 1; i < len; --i) { const struct Envelope *envelope = &envelopes[order[i].index]; struct List thread = dataCheck(threads.ptr[order[i].index], List).list; - error = htmlSearchThread(file, envelope, thread); + error = htmlIndexThread(file, envelope, thread); if (error) err(EX_IOERR, "%s", path); } free(order); - error = htmlSearchClose(file) || fclose(file); + error = htmlIndexClose(file) || fclose(file); if (error) err(EX_IOERR, "%s", path); if (!quiet) printf("%s\n", path); free(path); diff --git a/html.c b/html.c index 6b25996..1dc531c 100644 --- a/html.c +++ b/html.c @@ -512,17 +512,17 @@ int htmlThreadClose(FILE *file) { || htmlFooter(file); } -static char *htmlSearchURL(const char *name, const char *type) { +static char *htmlIndexURL(const char *name, const char *type) { struct Variable vars[] = { { "name", name }, { "type", type }, {0}, }; - return templateString(PATH_SEARCH, vars, escapeURL); + return templateString(PATH_INDEX, vars, escapeURL); } -int htmlSearchHead(FILE *file, const char *name) { - char *atom = htmlSearchURL(name, "atom"); +int htmlIndexHead(FILE *file, const char *name) { + char *atom = htmlIndexURL(name, "atom"); const char *template = Q( @@ -545,40 +545,47 @@ int htmlSearchHead(FILE *file, const char *name) { return error; } -static int htmlSearchNav( - FILE *file, const char *name, char *searches[const], size_t len -) { - if (len < 2 || strcmp(name, "index")) return 0; - int error = templateRender(file, Q(