about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2020-04-12 14:09:34 -0400
committerJune McEnroe <june@causal.agency>2020-04-12 14:13:43 -0400
commit0583e4c0c36ac3ebd4479c674ebd1dd0d5e8fe0d (patch)
treeeddd00889239bf88eb495d1f31fa735257f9f320
parentDo not use <h1> for subject lines (diff)
downloadbubger-0583e4c0c36ac3ebd4479c674ebd1dd0d5e8fe0d.tar.gz
bubger-0583e4c0c36ac3ebd4479c674ebd1dd0d5e8fe0d.zip
Fetch and parse BODYSTRUCTURE
-rw-r--r--archive.c7
-rw-r--r--archive.h51
-rw-r--r--export.c87
-rw-r--r--imap.h1
-rw-r--r--parse.c122
5 files changed, 229 insertions, 39 deletions
diff --git a/archive.c b/archive.c
index c84b74e..69d3c77 100644
--- a/archive.c
+++ b/archive.c
@@ -112,6 +112,7 @@ int main(int argc, char *argv[]) {
 		Concat,
 		Logout,
 	} state = Ready;
+	size_t exportTags = 0;
 
 	enum Atom login = atom("login");
 	enum Atom examine = atom("examine");
@@ -185,6 +186,7 @@ int main(int argc, char *argv[]) {
 				threads = resp.data;
 				resp.data = (struct List) {0};
 				if (exportFetch(imap, export, threads)) {
+					exportTags = 1;
 					state = Export;
 				} else {
 					concatFetch(imap, concat, threads);
@@ -195,9 +197,10 @@ int main(int argc, char *argv[]) {
 			break; case Export: {
 				if (resp.resp == AtomFetch) {
 					if (!resp.data.len) errx(EX_PROTOCOL, "missing FETCH data");
-					exportData(dataCheck(resp.data.ptr[0], List).list);
+					struct List items = dataCheck(resp.data.ptr[0], List).list;
+					if (exportData(imap, export, items)) exportTags++;
 				}
-				if (resp.tag != export) break;
+				if (resp.tag != export || --exportTags) break;
 				concatFetch(imap, concat, threads);
 				state = Concat;
 			}
diff --git a/archive.h b/archive.h
index 85428fe..820ca37 100644
--- a/archive.h
+++ b/archive.h
@@ -64,10 +64,59 @@ static inline void envelopeFree(struct Envelope envelope) {
 	free(envelope.bcc.addrs);
 }
 
+struct BodyPart {
+	bool multipart;
+	union {
+		const char *type;
+		struct {
+			size_t len;
+			struct BodyPart *ptr;
+		} parts;
+	};
+	const char *subtype;
+	struct List params;
+	const char *id;
+	const char *description;
+	const char *encoding;
+	uint32_t size;
+	struct {
+		struct Envelope *envelope;
+		struct BodyPart *structure;
+		uint32_t lines;
+	} message;
+	struct {
+		uint32_t lines;
+	} text;
+	const char *md5;
+	struct {
+		const char *type;
+		struct List params;
+	} disposition;
+	struct List language;
+	struct List location;
+};
+
+static inline void bodyPartFree(struct BodyPart part) {
+	if (part.multipart) {
+		for (size_t i = 0; i < part.parts.len; ++i) {
+			bodyPartFree(part.parts.ptr[i]);
+		}
+	}
+	if (part.message.envelope) {
+		envelopeFree(*part.message.envelope);
+		free(part.message.envelope);
+	}
+	if (part.message.structure) {
+		bodyPartFree(*part.message.structure);
+		free(part.message.structure);
+	}
+}
+
 void parseEnvelope(struct Envelope *envelope, struct List list);
+void parseBodyPart(struct BodyPart *part, struct List list);
 
 bool exportFetch(FILE *imap, enum Atom tag, struct List threads);
-void exportData(struct List items);
+bool exportData(FILE *imap, enum Atom tag, struct List items);
 
 void concatFetch(FILE *imap, enum Atom tag, struct List threads);
 void concatData(struct List threads, struct List items);
diff --git a/export.c b/export.c
index 0f35602..8fc82d8 100644
--- a/export.c
+++ b/export.c
@@ -60,15 +60,57 @@ bool exportFetch(FILE *imap, enum Atom tag, struct List threads) {
 	}
 	fprintf(
 		imap,
-		" (UID ENVELOPE"
+		" (UID ENVELOPE BODYSTRUCTURE"
 		" BODY[HEADER.FIELDS (" MBOX_HEADERS ")] BODY[TEXT])\r\n"
 	);
 	return true;
 }
 
-void exportData(struct List items) {
+static void exportEnvelope(
+	uint32_t uid, struct Envelope *envelope, char *header, char *body
+) {
+	const char *path;
+	FILE *file;
+	int error;
+
+	path = uidPath(uid, "mbox");
+	file = fopen(path, "w");
+	if (!file) err(EX_CANTCREAT, "%s", path);
+	error = 0
+		|| mboxFrom(file)
+		|| mboxHeader(file, header)
+		|| mboxBody(file, body)
+		|| fclose(file);
+	if (error) err(EX_IOERR, "%s", path);
+
+	const char *dest = messagePath(envelope->messageID, "mbox");
+	unlink(dest);
+	error = link(path, dest);
+	if (error) err(EX_CANTCREAT, "%s", dest);
+
+	path = uidPath(uid, "html");
+	file = fopen(path, "w");
+	if (!file) err(EX_CANTCREAT, "%s", path);
+	error = 0
+		|| htmlMessageHead(file, envelope)
+		|| htmlMessageTail(file)
+		|| fclose(file);
+	if (error) err(EX_IOERR, "%s", path);
+
+	path = uidPath(uid, "atom");
+	file = fopen(path, "w");
+	if (!file) err(EX_CANTCREAT, "%s", path);
+	error = 0
+		|| atomEntryHead(file, envelope)
+		|| atomEntryTail(file)
+		|| fclose(file);
+	if (error) err(EX_IOERR, "%s", path);
+}
+
+bool exportData(FILE *imap, enum Atom tag, struct List items) {
 	uint32_t uid = 0;
 	struct Envelope envelope = {0};
+	struct BodyPart structure = {0};
 	char *header = NULL;
 	char *body = NULL;
 
@@ -85,7 +127,7 @@ void exportData(struct List items) {
 		} else {
 			errx(EX_PROTOCOL, "invalid data item name");
 		}
-		
+
 		struct Data data = items.ptr[i + 1];
 		switch (name) {
 			break; case AtomBody:
@@ -94,6 +136,8 @@ void exportData(struct List items) {
 				uid = dataCheck(data, Number).number;
 			break; case AtomEnvelope:
 				parseEnvelope(&envelope, dataCheck(data, List).list);
+			break; case AtomBodyStructure:
+				parseBodyPart(&structure, dataCheck(data, List).list);
 			break; case AtomHeaderFields:
 				header = dataCheck(data, String).string;
 			break; case AtomText:
@@ -107,38 +151,9 @@ void exportData(struct List items) {
 	if (!header) errx(EX_PROTOCOL, "missing BODY[HEADER.FIELDS] data item");
 	if (!body) errx(EX_PROTOCOL, "missing BODY[TEXT] data item");
 
-	const char *path;
-	FILE *file;
-	int error;
-
-	path = uidPath(uid, "mbox");
-	file = fopen(path, "w");
-	if (!file) err(EX_CANTCREAT, "%s", path);
-	error = mboxFrom(file)
-		|| mboxHeader(file, header)
-		|| mboxBody(file, body)
-		|| fclose(file);
-	if (error) err(EX_IOERR, "%s", path);
-
-	const char *dest = messagePath(envelope.messageID, "mbox");
-	error = link(path, dest);
-	if (error) err(EX_CANTCREAT, "%s", dest);
-
-	path = uidPath(uid, "html");
-	file = fopen(path, "w");
-	if (!file) err(EX_CANTCREAT, "%s", path);
-	error = htmlMessageHead(file, &envelope)
-		|| htmlMessageTail(file)
-		|| fclose(file);
-	if (error) err(EX_IOERR, "%s", path);
-
-	path = uidPath(uid, "atom");
-	file = fopen(path, "w");
-	if (!file) err(EX_CANTCREAT, "%s", path);
-	error = atomEntryHead(file, &envelope)
-		|| atomEntryTail(file)
-		|| fclose(file);
-	if (error) err(EX_IOERR, "%s", path);
-
+	exportEnvelope(uid, &envelope, header, body);
 	envelopeFree(envelope);
+	bodyPartFree(structure);
+
+	return false;
 }
diff --git a/imap.h b/imap.h
index 3a30d5a..a364111 100644
--- a/imap.h
+++ b/imap.h
@@ -55,6 +55,7 @@
 	X(AtomThread, "THREAD") \
 	X(AtomUID, "UID") \
 	X(AtomEnvelope, "ENVELOPE") \
+	X(AtomBodyStructure, "BODYSTRUCTURE") \
 	X(AtomBody, "BODY") \
 	X(AtomHeaderFields, "HEADER.FIELDS") \
 	X(AtomText, "TEXT") \
diff --git a/parse.c b/parse.c
index d3c907f..33acd5b 100644
--- a/parse.c
+++ b/parse.c
@@ -102,3 +102,125 @@ void parseEnvelope(struct Envelope *envelope, struct List list) {
 	}
 	envelope->messageID = parseID(dataCheck(list.ptr[MessageID], String).string);
 }
+
+static void parseDisposition(struct BodyPart *part, struct List list) {
+	if (list.len < 2) errx(EX_PROTOCOL, "missing disposition fields");
+	part->disposition.type = dataCheck(list.ptr[0], String).string;
+	if (list.ptr[1].type == List) {
+		part->disposition.params = list.ptr[1].list;
+	}
+}
+
+static void parseNonMultipart(struct BodyPart *part, struct List list) {
+	enum { Type, Subtype, Params, ID, Description, Encoding, Size, BasicLen };
+	if (list.len < BasicLen) errx(EX_PROTOCOL, "missing body part fields");
+
+	part->multipart = false;
+	part->type = dataCheck(list.ptr[Type], String).string;
+	part->subtype = dataCheck(list.ptr[Subtype], String).string;
+	if (list.ptr[Params].type == List) {
+		part->params = list.ptr[Params].list;
+	}
+	if (list.ptr[ID].type == String) {
+		part->id = list.ptr[ID].string;
+	}
+	if (list.ptr[Description].type == String) {
+		part->description = list.ptr[Description].string;
+	}
+	part->encoding = dataCheck(list.ptr[Encoding], String).string;
+	part->size = dataCheck(list.ptr[Size], Number).number;
+
+	list.len -= BasicLen;
+	list.ptr += BasicLen;
+
+	if (!strcmp(part->type, "MESSAGE") && !strcmp(part->subtype, "RFC822")) {
+		enum { Envelope, BodyStructure, Lines, MessageLen };
+		if (list.len < MessageLen) {
+			errx(EX_PROTOCOL, "missing body part message fields");
+		}
+		part->message.envelope = calloc(1, sizeof(*part->message.envelope));
+		part->message.structure = calloc(1, sizeof(*part->message.structure));
+		if (!part->message.envelope || !part->message.structure) {
+			err(EX_OSERR, "calloc");
+		}
+
+		parseEnvelope(
+			part->message.envelope,
+			dataCheck(list.ptr[Envelope], List).list
+		);
+		parseBodyPart(
+			part->message.structure,
+			dataCheck(list.ptr[BodyStructure], List).list
+		);
+		part->message.lines = dataCheck(list.ptr[Lines], Number).number;
+
+		list.len -= MessageLen;
+		list.ptr += MessageLen;
+	}
+
+	if (!strcmp(part->type, "TEXT")) {
+		if (!list.len) errx(EX_PROTOCOL, "missing body part text lines");
+		part->text.lines = dataCheck(list.ptr[0], Number).number;
+		list.len--;
+		list.ptr++;
+	}
+
+	enum { MD5, Disposition, Language, Location };
+	if (MD5 < list.len && list.ptr[MD5].type == String) {
+		part->md5 = list.ptr[MD5].string;
+	}
+	if (Disposition < list.len && list.ptr[Disposition].type == List) {
+		parseDisposition(part, list.ptr[Disposition].list);
+	}
+	if (Language < list.len && list.ptr[Language].type == List) {
+		part->language = list.ptr[Language].list;
+	}
+	if (Location < list.len && list.ptr[Location].type == List) {
+		part->location = list.ptr[Location].list;
+	}
+}
+
+static void parseMultipart(struct BodyPart *part, struct List list) {
+	part->multipart = true;
+	for (
+		part->parts.len = 0;
+		part->parts.len < list.len && list.ptr[part->parts.len].type == List;
+		part->parts.len++
+	);
+	part->parts.ptr = calloc(part->parts.len, sizeof(*part->parts.ptr));
+	if (!part->parts.ptr) err(EX_OSERR, "calloc");
+
+	for (size_t i = 0; i < part->parts.len; ++i) {
+		parseBodyPart(&part->parts.ptr[i], list.ptr[i].list);
+	}
+	list.len -= part->parts.len;
+	list.ptr += part->parts.len;
+
+	if (!list.len) errx(EX_PROTOCOL, "missing multipart subtype");
+	part->subtype = dataCheck(list.ptr[0], String).string;
+	list.len--;
+	list.ptr++;
+
+	enum { Params, Disposition, Language, Location };
+	if (Params < list.len && list.ptr[Params].type == List) {
+		part->params = list.ptr[Params].list;
+	}
+	if (Disposition < list.len && list.ptr[Disposition].type == List) {
+		parseDisposition(part, list.ptr[Disposition].list);
+	}
+	if (Language < list.len && list.ptr[Language].type == List) {
+		part->language = list.ptr[Language].list;
+	}
+	if (Location < list.len && list.ptr[Location].type == List) {
+		part->location = list.ptr[Location].list;
+	}
+}
+
+void parseBodyPart(struct BodyPart *part, struct List list) {
+	if (!list.len) errx(EX_PROTOCOL, "empty body part");
+	if (list.ptr[0].type != List) {
+		parseNonMultipart(part, list);
+	} else {
+		parseMultipart(part, list);
+	}
+}