From 0583e4c0c36ac3ebd4479c674ebd1dd0d5e8fe0d Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Sun, 12 Apr 2020 14:09:34 -0400 Subject: Fetch and parse BODYSTRUCTURE --- archive.c | 7 ++-- archive.h | 51 +++++++++++++++++++++++++- export.c | 87 +++++++++++++++++++++++++------------------- imap.h | 1 + parse.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 229 insertions(+), 39 deletions(-) diff --git a/archive.c b/archive.c index c84b74e..69d3c77 100644 --- a/archive.c +++ b/archive.c @@ -112,6 +112,7 @@ int main(int argc, char *argv[]) { Concat, Logout, } state = Ready; + size_t exportTags = 0; enum Atom login = atom("login"); enum Atom examine = atom("examine"); @@ -185,6 +186,7 @@ int main(int argc, char *argv[]) { threads = resp.data; resp.data = (struct List) {0}; if (exportFetch(imap, export, threads)) { + exportTags = 1; state = Export; } else { concatFetch(imap, concat, threads); @@ -195,9 +197,10 @@ int main(int argc, char *argv[]) { break; case Export: { if (resp.resp == AtomFetch) { if (!resp.data.len) errx(EX_PROTOCOL, "missing FETCH data"); - exportData(dataCheck(resp.data.ptr[0], List).list); + struct List items = dataCheck(resp.data.ptr[0], List).list; + if (exportData(imap, export, items)) exportTags++; } - if (resp.tag != export) break; + if (resp.tag != export || --exportTags) break; concatFetch(imap, concat, threads); state = Concat; } diff --git a/archive.h b/archive.h index 85428fe..820ca37 100644 --- a/archive.h +++ b/archive.h @@ -64,10 +64,59 @@ static inline void envelopeFree(struct Envelope envelope) { free(envelope.bcc.addrs); } +struct BodyPart { + bool multipart; + union { + const char *type; + struct { + size_t len; + struct BodyPart *ptr; + } parts; + }; + const char *subtype; + struct List params; + const char *id; + const char *description; + const char *encoding; + uint32_t size; + struct { + struct Envelope *envelope; + struct BodyPart *structure; + uint32_t lines; + } message; + struct { + uint32_t lines; + } text; + const char *md5; + struct { + const char *type; + struct List params; + } disposition; + struct List language; + struct List location; +}; + +static inline void bodyPartFree(struct BodyPart part) { + if (part.multipart) { + for (size_t i = 0; i < part.parts.len; ++i) { + bodyPartFree(part.parts.ptr[i]); + } + } + if (part.message.envelope) { + envelopeFree(*part.message.envelope); + free(part.message.envelope); + } + if (part.message.structure) { + bodyPartFree(*part.message.structure); + free(part.message.structure); + } +} + void parseEnvelope(struct Envelope *envelope, struct List list); +void parseBodyPart(struct BodyPart *part, struct List list); bool exportFetch(FILE *imap, enum Atom tag, struct List threads); -void exportData(struct List items); +bool exportData(FILE *imap, enum Atom tag, struct List items); void concatFetch(FILE *imap, enum Atom tag, struct List threads); void concatData(struct List threads, struct List items); diff --git a/export.c b/export.c index 0f35602..8fc82d8 100644 --- a/export.c +++ b/export.c @@ -60,15 +60,57 @@ bool exportFetch(FILE *imap, enum Atom tag, struct List threads) { } fprintf( imap, - " (UID ENVELOPE" + " (UID ENVELOPE BODYSTRUCTURE" " BODY[HEADER.FIELDS (" MBOX_HEADERS ")] BODY[TEXT])\r\n" ); return true; } -void exportData(struct List items) { +static void exportEnvelope( + uint32_t uid, struct Envelope *envelope, char *header, char *body +) { + const char *path; + FILE *file; + int error; + + path = uidPath(uid, "mbox"); + file = fopen(path, "w"); + if (!file) err(EX_CANTCREAT, "%s", path); + error = 0 + || mboxFrom(file) + || mboxHeader(file, header) + || mboxBody(file, body) + || fclose(file); + if (error) err(EX_IOERR, "%s", path); + + const char *dest = messagePath(envelope->messageID, "mbox"); + unlink(dest); + error = link(path, dest); + if (error) err(EX_CANTCREAT, "%s", dest); + + path = uidPath(uid, "html"); + file = fopen(path, "w"); + if (!file) err(EX_CANTCREAT, "%s", path); + error = 0 + || htmlMessageHead(file, envelope) + || htmlMessageTail(file) + || fclose(file); + if (error) err(EX_IOERR, "%s", path); + + path = uidPath(uid, "atom"); + file = fopen(path, "w"); + if (!file) err(EX_CANTCREAT, "%s", path); + error = 0 + || atomEntryHead(file, envelope) + || atomEntryTail(file) + || fclose(file); + if (error) err(EX_IOERR, "%s", path); +} + +bool exportData(FILE *imap, enum Atom tag, struct List items) { uint32_t uid = 0; struct Envelope envelope = {0}; + struct BodyPart structure = {0}; char *header = NULL; char *body = NULL; @@ -85,7 +127,7 @@ void exportData(struct List items) { } else { errx(EX_PROTOCOL, "invalid data item name"); } - + struct Data data = items.ptr[i + 1]; switch (name) { break; case AtomBody: @@ -94,6 +136,8 @@ void exportData(struct List items) { uid = dataCheck(data, Number).number; break; case AtomEnvelope: parseEnvelope(&envelope, dataCheck(data, List).list); + break; case AtomBodyStructure: + parseBodyPart(&structure, dataCheck(data, List).list); break; case AtomHeaderFields: header = dataCheck(data, String).string; break; case AtomText: @@ -107,38 +151,9 @@ void exportData(struct List items) { if (!header) errx(EX_PROTOCOL, "missing BODY[HEADER.FIELDS] data item"); if (!body) errx(EX_PROTOCOL, "missing BODY[TEXT] data item"); - const char *path; - FILE *file; - int error; - - path = uidPath(uid, "mbox"); - file = fopen(path, "w"); - if (!file) err(EX_CANTCREAT, "%s", path); - error = mboxFrom(file) - || mboxHeader(file, header) - || mboxBody(file, body) - || fclose(file); - if (error) err(EX_IOERR, "%s", path); - - const char *dest = messagePath(envelope.messageID, "mbox"); - error = link(path, dest); - if (error) err(EX_CANTCREAT, "%s", dest); - - path = uidPath(uid, "html"); - file = fopen(path, "w"); - if (!file) err(EX_CANTCREAT, "%s", path); - error = htmlMessageHead(file, &envelope) - || htmlMessageTail(file) - || fclose(file); - if (error) err(EX_IOERR, "%s", path); - - path = uidPath(uid, "atom"); - file = fopen(path, "w"); - if (!file) err(EX_CANTCREAT, "%s", path); - error = atomEntryHead(file, &envelope) - || atomEntryTail(file) - || fclose(file); - if (error) err(EX_IOERR, "%s", path); - + exportEnvelope(uid, &envelope, header, body); envelopeFree(envelope); + bodyPartFree(structure); + + return false; } diff --git a/imap.h b/imap.h index 3a30d5a..a364111 100644 --- a/imap.h +++ b/imap.h @@ -55,6 +55,7 @@ X(AtomThread, "THREAD") \ X(AtomUID, "UID") \ X(AtomEnvelope, "ENVELOPE") \ + X(AtomBodyStructure, "BODYSTRUCTURE") \ X(AtomBody, "BODY") \ X(AtomHeaderFields, "HEADER.FIELDS") \ X(AtomText, "TEXT") \ diff --git a/parse.c b/parse.c index d3c907f..33acd5b 100644 --- a/parse.c +++ b/parse.c @@ -102,3 +102,125 @@ void parseEnvelope(struct Envelope *envelope, struct List list) { } envelope->messageID = parseID(dataCheck(list.ptr[MessageID], String).string); } + +static void parseDisposition(struct BodyPart *part, struct List list) { + if (list.len < 2) errx(EX_PROTOCOL, "missing disposition fields"); + part->disposition.type = dataCheck(list.ptr[0], String).string; + if (list.ptr[1].type == List) { + part->disposition.params = list.ptr[1].list; + } +} + +static void parseNonMultipart(struct BodyPart *part, struct List list) { + enum { Type, Subtype, Params, ID, Description, Encoding, Size, BasicLen }; + if (list.len < BasicLen) errx(EX_PROTOCOL, "missing body part fields"); + + part->multipart = false; + part->type = dataCheck(list.ptr[Type], String).string; + part->subtype = dataCheck(list.ptr[Subtype], String).string; + if (list.ptr[Params].type == List) { + part->params = list.ptr[Params].list; + } + if (list.ptr[ID].type == String) { + part->id = list.ptr[ID].string; + } + if (list.ptr[Description].type == String) { + part->description = list.ptr[Description].string; + } + part->encoding = dataCheck(list.ptr[Encoding], String).string; + part->size = dataCheck(list.ptr[Size], Number).number; + + list.len -= BasicLen; + list.ptr += BasicLen; + + if (!strcmp(part->type, "MESSAGE") && !strcmp(part->subtype, "RFC822")) { + enum { Envelope, BodyStructure, Lines, MessageLen }; + if (list.len < MessageLen) { + errx(EX_PROTOCOL, "missing body part message fields"); + } + part->message.envelope = calloc(1, sizeof(*part->message.envelope)); + part->message.structure = calloc(1, sizeof(*part->message.structure)); + if (!part->message.envelope || !part->message.structure) { + err(EX_OSERR, "calloc"); + } + + parseEnvelope( + part->message.envelope, + dataCheck(list.ptr[Envelope], List).list + ); + parseBodyPart( + part->message.structure, + dataCheck(list.ptr[BodyStructure], List).list + ); + part->message.lines = dataCheck(list.ptr[Lines], Number).number; + + list.len -= MessageLen; + list.ptr += MessageLen; + } + + if (!strcmp(part->type, "TEXT")) { + if (!list.len) errx(EX_PROTOCOL, "missing body part text lines"); + part->text.lines = dataCheck(list.ptr[0], Number).number; + list.len--; + list.ptr++; + } + + enum { MD5, Disposition, Language, Location }; + if (MD5 < list.len && list.ptr[MD5].type == String) { + part->md5 = list.ptr[MD5].string; + } + if (Disposition < list.len && list.ptr[Disposition].type == List) { + parseDisposition(part, list.ptr[Disposition].list); + } + if (Language < list.len && list.ptr[Language].type == List) { + part->language = list.ptr[Language].list; + } + if (Location < list.len && list.ptr[Location].type == List) { + part->location = list.ptr[Location].list; + } +} + +static void parseMultipart(struct BodyPart *part, struct List list) { + part->multipart = true; + for ( + part->parts.len = 0; + part->parts.len < list.len && list.ptr[part->parts.len].type == List; + part->parts.len++ + ); + part->parts.ptr = calloc(part->parts.len, sizeof(*part->parts.ptr)); + if (!part->parts.ptr) err(EX_OSERR, "calloc"); + + for (size_t i = 0; i < part->parts.len; ++i) { + parseBodyPart(&part->parts.ptr[i], list.ptr[i].list); + } + list.len -= part->parts.len; + list.ptr += part->parts.len; + + if (!list.len) errx(EX_PROTOCOL, "missing multipart subtype"); + part->subtype = dataCheck(list.ptr[0], String).string; + list.len--; + list.ptr++; + + enum { Params, Disposition, Language, Location }; + if (Params < list.len && list.ptr[Params].type == List) { + part->params = list.ptr[Params].list; + } + if (Disposition < list.len && list.ptr[Disposition].type == List) { + parseDisposition(part, list.ptr[Disposition].list); + } + if (Language < list.len && list.ptr[Language].type == List) { + part->language = list.ptr[Language].list; + } + if (Location < list.len && list.ptr[Location].type == List) { + part->location = list.ptr[Location].list; + } +} + +void parseBodyPart(struct BodyPart *part, struct List list) { + if (!list.len) errx(EX_PROTOCOL, "empty body part"); + if (list.ptr[0].type != List) { + parseNonMultipart(part, list); + } else { + parseMultipart(part, list); + } +} -- cgit 1.4.1