about summary refs log tree commit diff
path: root/decode.c
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2020-04-16 20:03:12 -0400
committerJune McEnroe <june@causal.agency>2020-04-16 20:03:12 -0400
commit6b0d631a171192d4d9d5ccc6acabb307fee5fa22 (patch)
tree1d406ef9428199bdd56c7a5dcd26bb0d90cd9fab /decode.c
parentDecode Q encoding (diff)
downloadbubger-6b0d631a171192d4d9d5ccc6acabb307fee5fa22.tar.gz
bubger-6b0d631a171192d4d9d5ccc6acabb307fee5fa22.zip
Decode quoted-printable and 7bit/8bit
Diffstat (limited to 'decode.c')
-rw-r--r--decode.c109
1 files changed, 81 insertions, 28 deletions
diff --git a/decode.c b/decode.c
index 1645a66..abee7f5 100644
--- a/decode.c
+++ b/decode.c
@@ -85,46 +85,96 @@ static void convertCharset(
 	iconv_close(conv);
 }
 
-static void decodeQ(struct Buffer *dst, const char *src, size_t len) {
-	while (len) {
-		if (*src == '=') {
-			if (len < 3) break;
-			*bufferDest(dst, 1) = strtoul(&src[1], NULL, 16);
-			len -= 3;
+static char unhex(char ch) {
+	if (ch <= '9') return ch - '0';
+	if (ch <= 'F') return 0xA + ch - 'A';
+	if (ch <= 'f') return 0xA + ch - 'a';
+	return 0;
+}
+
+static void decodeQ(struct Buffer *dst, const char *src) {
+	while (*src) {
+		if (src[0] == '=' && src[1] && src[2]) {
+			*bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]);
+			src += 3;
+		} else if (*src == '=') {
+			src++;
+		} else if (*src == '_') {
+			*bufferDest(dst, 1) = ' ';
+			src++;
+		} else {
+			size_t len = strcspn(src, "=_");
+			bufferCopy(dst, src, len);
+			src += len;
+		}
+	}
+}
+
+static void decodeQuotedPrintable(struct Buffer *dst, const char *src) {
+	while (*src) {
+		if (src[0] == '=' && src[1] == '\r' && src[2] == '\n') {
+			src += 3;
+		} else if (src[0] == '=' && src[1] && src[2]) {
+			*bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]);
 			src += 3;
+		} else if (src[0] == '=') {
+			src++;
+		} else if (src[0] == '\r' && src[1] == '\n') {
+			*bufferDest(dst, 1) = '\n';
+			src += 2;
+		} else if (src[0] == '\r') {
+			src++;
 		} else {
-			*bufferDest(dst, 1) = (*src == '_' ? ' ' : *src);
-			len--;
+			size_t len = strcspn(src, "=\r");
+			bufferCopy(dst, src, len);
+			src += len;
+		}
+	}
+}
+
+static void decode8Bit(struct Buffer *dst, const char *src) {
+	while (*src) {
+		if (src[0] == '\r' && src[1] == '\n') {
+			*bufferDest(dst, 1) = '\n';
+			src += 2;
+		} else if (src[0] == '\r') {
 			src++;
+		} else {
+			size_t len = strcspn(src, "\r");
+			bufferCopy(dst, src, len);
+			src += len;
 		}
 	}
 }
 
-static void decodeEncoding(
-	struct Buffer *dst, const char *encoding, const char *src, size_t len
-) {
+static void
+decodeEncoding(struct Buffer *dst, const char *encoding, const char *src) {
 	if (!strcasecmp(encoding, "Q")) {
-		decodeQ(dst, src, len);
+		decodeQ(dst, src);
+	} else if (!strcasecmp(encoding, "quoted-printable")) {
+		decodeQuotedPrintable(dst, src);
+	} else if (!strcasecmp(encoding, "7bit") || !strcasecmp(encoding, "8bit")) {
+		decode8Bit(dst, src);
+	} else if (!strcasecmp(encoding, "binary")) {
+		bufferCopy(dst, src, strlen(src));
 	} else {
-		// TODO
-		bufferCopy(dst, src, len);
+		warnx("unknown encoding %s", encoding);
 	}
 }
 
 static void decode(
-	struct Buffer *dst, const char *encoding, const char *charset,
-	const char *src, size_t len
+	struct Buffer *dst,
+	const char *encoding, const char *charset, const char *src
 ) {
 	if (
 		!charset ||
 		!strcasecmp(charset, "us-ascii") ||
 		!strcasecmp(charset, "utf-8")
 	) {
-		decodeEncoding(dst, encoding, src, len);
+		decodeEncoding(dst, encoding, src);
 	} else {
-		// TODO: Avoid copying if encoding is 8bit.
-		struct Buffer decoded = bufferAlloc(len);
-		decodeEncoding(&decoded, encoding, src, len);
+		struct Buffer decoded = bufferAlloc(strlen(src));
+		decodeEncoding(&decoded, encoding, src);
 		convertCharset(dst, charset, decoded.ptr, decoded.len);
 		free(decoded.ptr);
 	}
@@ -141,7 +191,7 @@ static void decodeWord(struct Buffer *dst, const char *src, size_t len) {
 	char *encoded = strsep(&ptr, "?");
 
 	if (charset && encoding && encoded && ptr && *ptr == '=') {
-		decode(dst, encoding, charset, encoded, strlen(encoded));
+		decode(dst, encoding, charset, encoded);
 	} else {
 		bufferCopy(dst, src, len);
 	}
@@ -164,11 +214,14 @@ char *decodeHeader(const char *header) {
 	return bufferString(&buf);
 }
 
-int decodeContent(
-	FILE *file, EscapeFn *escape,
-	const struct BodyPart *part, const char *content
-) {
-	if (!escape) escape = escapeNull;
-	// TODO
-	return escape(file, content);
+char *decodeToString(const struct BodyPart *part, const char *src) {
+	struct Buffer dst = bufferAlloc(strlen(src) + 1);
+	const char *charset = NULL;
+	for (size_t i = 0; i + 1 < part->params.len; i += 2) {
+		const char *key = dataCheck(part->params.ptr[i], String).string;
+		if (strcasecmp(key, "charset")) continue;
+		charset = dataCheck(part->params.ptr[i + 1], String).string;
+	}
+	decode(&dst, part->encoding, charset, src);
+	return bufferString(&dst);
 }