Decode quoted-printable and 7bit/8bit

author: June McEnroe <june@causal.agency> 2020-04-16 20:03:12 -0400
committer: June McEnroe <june@causal.agency> 2020-04-16 20:03:12 -0400
commit: 6b0d631a171192d4d9d5ccc6acabb307fee5fa22 (patch)
tree: 1d406ef9428199bdd56c7a5dcd26bb0d90cd9fab
parent: Decode Q encoding (diff)
download: bubger-6b0d631a171192d4d9d5ccc6acabb307fee5fa22.tar.gz
bubger-6b0d631a171192d4d9d5ccc6acabb307fee5fa22.zip
6 files changed, 108 insertions, 60 deletions
diff --git a/archive.h b/archive.h
index 8b991cb..a7377ab 100644
--- a/archive.h
+++ b/archive.h
@@ -143,7 +143,6 @@ struct Variable {
 
 typedef int EscapeFn(FILE *file, const char *str);
 
-int escapeNull(FILE *file, const char *str);
 int escapeURL(FILE *file, const char *str);
 int escapeXML(FILE *file, const char *str);
 
@@ -154,10 +153,7 @@ int templateRender(
 char *templateURL(const char *template, const struct Variable vars[]);
 
 char *decodeHeader(const char *header);
-int decodeContent(
-	FILE *file, EscapeFn *escape,
-	const struct BodyPart *part, const char *content
-);
+char *decodeToString(const struct BodyPart *part, const char *content);
 
 static inline const char *pathUID(uint32_t uid, const char *type) {
 	static char buf[PATH_MAX];
@@ -198,16 +194,14 @@ int mboxBody(FILE *file, const char *body);
 
 extern const char *atomBaseURL;
 int atomEntryOpen(FILE *file, const struct Envelope *envelope);
-int atomContentOpen(FILE *file);
-int atomContentClose(FILE *file);
+int atomContent(FILE *file, const char *content);
 int atomEntryClose(FILE *file);
 int atomFeedOpen(FILE *file, const struct Envelope *envelope);
 int atomFeedClose(FILE *file);
 
 extern const char *htmlTitle;
 int htmlMessageOpen(FILE *file, const struct Envelope *envelope);
-int htmlInlineOpen(FILE *file, const struct BodyPart *part);
-int htmlInlineClose(FILE *file);
+int htmlInline(FILE *file, const struct BodyPart *part, const char *content);
 int htmlMessageClose(FILE *file);
 int htmlThreadHead(FILE *file, const struct Envelope *envelope);
 int htmlThreadOpen(FILE *file, const struct Envelope *envelope);
diff --git a/atom.c b/atom.c
index 5f7e3c7..1c4d0de 100644
--- a/atom.c
+++ b/atom.c
@@ -85,12 +85,15 @@ int atomEntryOpen(FILE *file, const struct Envelope *envelope) {
 	return error;
 }
 
-int atomContentOpen(FILE *file) {
-	return templateRender(file, TEMPLATE(<content type="text">), NULL, NULL);
-}
-
-int atomContentClose(FILE *file) {
-	return templateRender(file, TEMPLATE(</content>), NULL, NULL);
+int atomContent(FILE *file, const char *content) {
+	const char *template = TEMPLATE(
+		<content type="text">[content]</content>
+	);
+	struct Variable vars[] = {
+		{ "content", content },
+		{0},
+	};
+	return templateRender(file, template, vars, escapeXML);
 }
 
 int atomEntryClose(FILE *file) {
diff --git a/decode.c b/decode.c
index 1645a66..abee7f5 100644
--- a/decode.c
+++ b/decode.c
@@ -85,46 +85,96 @@ static void convertCharset(
 	iconv_close(conv);
 }
 
-static void decodeQ(struct Buffer *dst, const char *src, size_t len) {
-	while (len) {
-		if (*src == '=') {
-			if (len < 3) break;
-			*bufferDest(dst, 1) = strtoul(&src[1], NULL, 16);
-			len -= 3;
+static char unhex(char ch) {
+	if (ch <= '9') return ch - '0';
+	if (ch <= 'F') return 0xA + ch - 'A';
+	if (ch <= 'f') return 0xA + ch - 'a';
+	return 0;
+}
+
+static void decodeQ(struct Buffer *dst, const char *src) {
+	while (*src) {
+		if (src[0] == '=' && src[1] && src[2]) {
+			*bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]);
+			src += 3;
+		} else if (*src == '=') {
+			src++;
+		} else if (*src == '_') {
+			*bufferDest(dst, 1) = ' ';
+			src++;
+		} else {
+			size_t len = strcspn(src, "=_");
+			bufferCopy(dst, src, len);
+			src += len;
+		}
+	}
+}
+
+static void decodeQuotedPrintable(struct Buffer *dst, const char *src) {
+	while (*src) {
+		if (src[0] == '=' && src[1] == '\r' && src[2] == '\n') {
+			src += 3;
+		} else if (src[0] == '=' && src[1] && src[2]) {
+			*bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]);
 			src += 3;
+		} else if (src[0] == '=') {
+			src++;
+		} else if (src[0] == '\r' && src[1] == '\n') {
+			*bufferDest(dst, 1) = '\n';
+			src += 2;
+		} else if (src[0] == '\r') {
+			src++;
 		} else {
-			*bufferDest(dst, 1) = (*src == '_' ? ' ' : *src);
-			len--;
+			size_t len = strcspn(src, "=\r");
+			bufferCopy(dst, src, len);
+			src += len;
+		}
+	}
+}
+
+static void decode8Bit(struct Buffer *dst, const char *src) {
+	while (*src) {
+		if (src[0] == '\r' && src[1] == '\n') {
+			*bufferDest(dst, 1) = '\n';
+			src += 2;
+		} else if (src[0] == '\r') {
 			src++;
+		} else {
+			size_t len = strcspn(src, "\r");
+			bufferCopy(dst, src, len);
+			src += len;
 		}
 	}
 }
 
-static void decodeEncoding(
-	struct Buffer *dst, const char *encoding, const char *src, size_t len
-) {
+static void
+decodeEncoding(struct Buffer *dst, const char *encoding, const char *src) {
 	if (!strcasecmp(encoding, "Q")) {
-		decodeQ(dst, src, len);
+		decodeQ(dst, src);
+	} else if (!strcasecmp(encoding, "quoted-printable")) {
+		decodeQuotedPrintable(dst, src);
+	} else if (!strcasecmp(encoding, "7bit") || !strcasecmp(encoding, "8bit")) {
+		decode8Bit(dst, src);
+	} else if (!strcasecmp(encoding, "binary")) {
+		bufferCopy(dst, src, strlen(src));
 	} else {
-		// TODO
-		bufferCopy(dst, src, len);
+		warnx("unknown encoding %s", encoding);
 	}
 }
 
 static void decode(
-	struct Buffer *dst, const char *encoding, const char *charset,
-	const char *src, size_t len
+	struct Buffer *dst,
+	const char *encoding, const char *charset, const char *src
 ) {
 	if (
 		!charset ||
 		!strcasecmp(charset, "us-ascii") ||
 		!strcasecmp(charset, "utf-8")
 	) {
-		decodeEncoding(dst, encoding, src, len);
+		decodeEncoding(dst, encoding, src);
 	} else {
-		// TODO: Avoid copying if encoding is 8bit.
-		struct Buffer decoded = bufferAlloc(len);
-		decodeEncoding(&decoded, encoding, src, len);
+		struct Buffer decoded = bufferAlloc(strlen(src));
+		decodeEncoding(&decoded, encoding, src);
 		convertCharset(dst, charset, decoded.ptr, decoded.len);
 		free(decoded.ptr);
 	}
@@ -141,7 +191,7 @@ static void decodeWord(struct Buffer *dst, const char *src, size_t len) {
 	char *encoded = strsep(&ptr, "?");
 
 	if (charset && encoding && encoded && ptr && *ptr == '=') {
-		decode(dst, encoding, charset, encoded, strlen(encoded));
+		decode(dst, encoding, charset, encoded);
 	} else {
 		bufferCopy(dst, src, len);
 	}
@@ -164,11 +214,14 @@ char *decodeHeader(const char *header) {
 	return bufferString(&buf);
 }
 
-int decodeContent(
-	FILE *file, EscapeFn *escape,
-	const struct BodyPart *part, const char *content
-) {
-	if (!escape) escape = escapeNull;
-	// TODO
-	return escape(file, content);
+char *decodeToString(const struct BodyPart *part, const char *src) {
+	struct Buffer dst = bufferAlloc(strlen(src) + 1);
+	const char *charset = NULL;
+	for (size_t i = 0; i + 1 < part->params.len; i += 2) {
+		const char *key = dataCheck(part->params.ptr[i], String).string;
+		if (strcasecmp(key, "charset")) continue;
+		charset = dataCheck(part->params.ptr[i + 1], String).string;
+	}
+	decode(&dst, part->encoding, charset, src);
+	return bufferString(&dst);
 }
diff --git a/export.c b/export.c
index 1e07acd..ed594de 100644
--- a/export.c
+++ b/export.c
@@ -99,12 +99,10 @@ static void exportAtom(
 		}
 	}
 	if (bodyPartType(part, "text", "plain")) {
-		const char *content = dataCheck(body, String).string;
-		error = 0
-			|| atomContentOpen(file)
-			|| decodeContent(file, escapeXML, structure, content)
-			|| atomContentClose(file);
+		char *content = decodeToString(part, dataCheck(body, String).string);
+		error = atomContent(file, content);
 		if (error) err(EX_IOERR, "%s", path);
+		free(content);
 	}
 
 	error = atomEntryClose(file) || fclose(file);
@@ -135,11 +133,11 @@ static int exportHTMLBody(
 			|| htmlMessageClose(file);
 	} else if (bodyPartType(structure, "text", "plain")) {
 		// TODO: Check if not inline.
-		const char *content = dataCheck(body, String).string;
-		error = 0
-			|| htmlInlineOpen(file, structure)
-			|| decodeContent(file, escapeXML, structure, content)
-			|| htmlInlineClose(file);
+		char *content = decodeToString(
+			structure, dataCheck(body, String).string
+		);
+		error = htmlInline(file, structure, content);
+		free(content);
 	}
 	return error;
 }
diff --git a/html.c b/html.c
index 11bc244..677b4aa 100644
--- a/html.c
+++ b/html.c
@@ -143,10 +143,13 @@ int htmlMessageOpen(FILE *file, const struct Envelope *envelope) {
 	return error;
 }
 
-int htmlInlineOpen(FILE *file, const struct BodyPart *part) {
+int htmlInline(FILE *file, const struct BodyPart *part, const char *content) {
 	// TODO: Include Content-Id as id?
+	// TODO: format=flowed.
+	// TODO: Process quoting.
+	// TODO: Highlight patches.
 	const char *template = TEMPLATE(
-		<pre lang="[lang]">
+		<pre lang="[lang]">[content]</pre>
 	);
 	const char *lang = "";
 	// FIXME: part->language should be more structured.
@@ -155,15 +158,12 @@ int htmlInlineOpen(FILE *file, const struct BodyPart *part) {
 	}
 	struct Variable vars[] = {
 		{ "lang", lang },
+		{ "content", content },
 		{0},
 	};
 	return templateRender(file, template, vars, escapeXML);
 }
 
-int htmlInlineClose(FILE *file) {
-	return templateRender(file, TEMPLATE(</pre>), NULL, NULL);
-}
-
 int htmlMessageClose(FILE *file) {
 	return templateRender(file, TEMPLATE(</article>), NULL, NULL);
 }
diff --git a/template.c b/template.c
index e88b50f..8653769 100644
--- a/template.c
+++ b/template.c
@@ -24,7 +24,7 @@
 
 #include "archive.h"
 
-int escapeNull(FILE *file, const char *str) {
+static int escapeNull(FILE *file, const char *str) {
 	size_t n = fwrite(str, strlen(str), 1, file);
 	return (n ? 0 : -1);
 }
author	June McEnroe <june@causal.agency>	2020-04-16 20:03:12 -0400
committer	June McEnroe <june@causal.agency>	2020-04-16 20:03:12 -0400
commit	6b0d631a171192d4d9d5ccc6acabb307fee5fa22 (patch)
tree	1d406ef9428199bdd56c7a5dcd26bb0d90cd9fab
parent	Decode Q encoding (diff)
download	bubger-6b0d631a171192d4d9d5ccc6acabb307fee5fa22.tar.gz bubger-6b0d631a171192d4d9d5ccc6acabb307fee5fa22.zip