From 6b0d631a171192d4d9d5ccc6acabb307fee5fa22 Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Thu, 16 Apr 2020 20:03:12 -0400 Subject: Decode quoted-printable and 7bit/8bit --- archive.h | 12 ++----- atom.c | 15 +++++---- decode.c | 109 +++++++++++++++++++++++++++++++++++++++++++++---------------- export.c | 18 +++++----- html.c | 12 +++---- template.c | 2 +- 6 files changed, 108 insertions(+), 60 deletions(-) diff --git a/archive.h b/archive.h index 8b991cb..a7377ab 100644 --- a/archive.h +++ b/archive.h @@ -143,7 +143,6 @@ struct Variable { typedef int EscapeFn(FILE *file, const char *str); -int escapeNull(FILE *file, const char *str); int escapeURL(FILE *file, const char *str); int escapeXML(FILE *file, const char *str); @@ -154,10 +153,7 @@ int templateRender( char *templateURL(const char *template, const struct Variable vars[]); char *decodeHeader(const char *header); -int decodeContent( - FILE *file, EscapeFn *escape, - const struct BodyPart *part, const char *content -); +char *decodeToString(const struct BodyPart *part, const char *content); static inline const char *pathUID(uint32_t uid, const char *type) { static char buf[PATH_MAX]; @@ -198,16 +194,14 @@ int mboxBody(FILE *file, const char *body); extern const char *atomBaseURL; int atomEntryOpen(FILE *file, const struct Envelope *envelope); -int atomContentOpen(FILE *file); -int atomContentClose(FILE *file); +int atomContent(FILE *file, const char *content); int atomEntryClose(FILE *file); int atomFeedOpen(FILE *file, const struct Envelope *envelope); int atomFeedClose(FILE *file); extern const char *htmlTitle; int htmlMessageOpen(FILE *file, const struct Envelope *envelope); -int htmlInlineOpen(FILE *file, const struct BodyPart *part); -int htmlInlineClose(FILE *file); +int htmlInline(FILE *file, const struct BodyPart *part, const char *content); int htmlMessageClose(FILE *file); int htmlThreadHead(FILE *file, const struct Envelope *envelope); int htmlThreadOpen(FILE *file, const struct Envelope *envelope); diff --git a/atom.c b/atom.c index 5f7e3c7..1c4d0de 100644 --- a/atom.c +++ b/atom.c @@ -85,12 +85,15 @@ int atomEntryOpen(FILE *file, const struct Envelope *envelope) { return error; } -int atomContentOpen(FILE *file) { - return templateRender(file, TEMPLATE(), NULL, NULL); -} - -int atomContentClose(FILE *file) { - return templateRender(file, TEMPLATE(), NULL, NULL); +int atomContent(FILE *file, const char *content) { + const char *template = TEMPLATE( + [content] + ); + struct Variable vars[] = { + { "content", content }, + {0}, + }; + return templateRender(file, template, vars, escapeXML); } int atomEntryClose(FILE *file) { diff --git a/decode.c b/decode.c index 1645a66..abee7f5 100644 --- a/decode.c +++ b/decode.c @@ -85,46 +85,96 @@ static void convertCharset( iconv_close(conv); } -static void decodeQ(struct Buffer *dst, const char *src, size_t len) { - while (len) { - if (*src == '=') { - if (len < 3) break; - *bufferDest(dst, 1) = strtoul(&src[1], NULL, 16); - len -= 3; +static char unhex(char ch) { + if (ch <= '9') return ch - '0'; + if (ch <= 'F') return 0xA + ch - 'A'; + if (ch <= 'f') return 0xA + ch - 'a'; + return 0; +} + +static void decodeQ(struct Buffer *dst, const char *src) { + while (*src) { + if (src[0] == '=' && src[1] && src[2]) { + *bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]); + src += 3; + } else if (*src == '=') { + src++; + } else if (*src == '_') { + *bufferDest(dst, 1) = ' '; + src++; + } else { + size_t len = strcspn(src, "=_"); + bufferCopy(dst, src, len); + src += len; + } + } +} + +static void decodeQuotedPrintable(struct Buffer *dst, const char *src) { + while (*src) { + if (src[0] == '=' && src[1] == '\r' && src[2] == '\n') { + src += 3; + } else if (src[0] == '=' && src[1] && src[2]) { + *bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]); src += 3; + } else if (src[0] == '=') { + src++; + } else if (src[0] == '\r' && src[1] == '\n') { + *bufferDest(dst, 1) = '\n'; + src += 2; + } else if (src[0] == '\r') { + src++; } else { - *bufferDest(dst, 1) = (*src == '_' ? ' ' : *src); - len--; + size_t len = strcspn(src, "=\r"); + bufferCopy(dst, src, len); + src += len; + } + } +} + +static void decode8Bit(struct Buffer *dst, const char *src) { + while (*src) { + if (src[0] == '\r' && src[1] == '\n') { + *bufferDest(dst, 1) = '\n'; + src += 2; + } else if (src[0] == '\r') { src++; + } else { + size_t len = strcspn(src, "\r"); + bufferCopy(dst, src, len); + src += len; } } } -static void decodeEncoding( - struct Buffer *dst, const char *encoding, const char *src, size_t len -) { +static void +decodeEncoding(struct Buffer *dst, const char *encoding, const char *src) { if (!strcasecmp(encoding, "Q")) { - decodeQ(dst, src, len); + decodeQ(dst, src); + } else if (!strcasecmp(encoding, "quoted-printable")) { + decodeQuotedPrintable(dst, src); + } else if (!strcasecmp(encoding, "7bit") || !strcasecmp(encoding, "8bit")) { + decode8Bit(dst, src); + } else if (!strcasecmp(encoding, "binary")) { + bufferCopy(dst, src, strlen(src)); } else { - // TODO - bufferCopy(dst, src, len); + warnx("unknown encoding %s", encoding); } } static void decode( - struct Buffer *dst, const char *encoding, const char *charset, - const char *src, size_t len + struct Buffer *dst, + const char *encoding, const char *charset, const char *src ) { if ( !charset || !strcasecmp(charset, "us-ascii") || !strcasecmp(charset, "utf-8") ) { - decodeEncoding(dst, encoding, src, len); + decodeEncoding(dst, encoding, src); } else { - // TODO: Avoid copying if encoding is 8bit. - struct Buffer decoded = bufferAlloc(len); - decodeEncoding(&decoded, encoding, src, len); + struct Buffer decoded = bufferAlloc(strlen(src)); + decodeEncoding(&decoded, encoding, src); convertCharset(dst, charset, decoded.ptr, decoded.len); free(decoded.ptr); } @@ -141,7 +191,7 @@ static void decodeWord(struct Buffer *dst, const char *src, size_t len) { char *encoded = strsep(&ptr, "?"); if (charset && encoding && encoded && ptr && *ptr == '=') { - decode(dst, encoding, charset, encoded, strlen(encoded)); + decode(dst, encoding, charset, encoded); } else { bufferCopy(dst, src, len); } @@ -164,11 +214,14 @@ char *decodeHeader(const char *header) { return bufferString(&buf); } -int decodeContent( - FILE *file, EscapeFn *escape, - const struct BodyPart *part, const char *content -) { - if (!escape) escape = escapeNull; - // TODO - return escape(file, content); +char *decodeToString(const struct BodyPart *part, const char *src) { + struct Buffer dst = bufferAlloc(strlen(src) + 1); + const char *charset = NULL; + for (size_t i = 0; i + 1 < part->params.len; i += 2) { + const char *key = dataCheck(part->params.ptr[i], String).string; + if (strcasecmp(key, "charset")) continue; + charset = dataCheck(part->params.ptr[i + 1], String).string; + } + decode(&dst, part->encoding, charset, src); + return bufferString(&dst); } diff --git a/export.c b/export.c index 1e07acd..ed594de 100644 --- a/export.c +++ b/export.c @@ -99,12 +99,10 @@ static void exportAtom( } } if (bodyPartType(part, "text", "plain")) { - const char *content = dataCheck(body, String).string; - error = 0 - || atomContentOpen(file) - || decodeContent(file, escapeXML, structure, content) - || atomContentClose(file); + char *content = decodeToString(part, dataCheck(body, String).string); + error = atomContent(file, content); if (error) err(EX_IOERR, "%s", path); + free(content); } error = atomEntryClose(file) || fclose(file); @@ -135,11 +133,11 @@ static int exportHTMLBody( || htmlMessageClose(file); } else if (bodyPartType(structure, "text", "plain")) { // TODO: Check if not inline. - const char *content = dataCheck(body, String).string; - error = 0 - || htmlInlineOpen(file, structure) - || decodeContent(file, escapeXML, structure, content) - || htmlInlineClose(file); + char *content = decodeToString( + structure, dataCheck(body, String).string + ); + error = htmlInline(file, structure, content); + free(content); } return error; } diff --git a/html.c b/html.c index 11bc244..677b4aa 100644 --- a/html.c +++ b/html.c @@ -143,10 +143,13 @@ int htmlMessageOpen(FILE *file, const struct Envelope *envelope) { return error; } -int htmlInlineOpen(FILE *file, const struct BodyPart *part) { +int htmlInline(FILE *file, const struct BodyPart *part, const char *content) { // TODO: Include Content-Id as id? + // TODO: format=flowed. + // TODO: Process quoting. + // TODO: Highlight patches. const char *template = TEMPLATE( -
+		
[content]
); const char *lang = ""; // FIXME: part->language should be more structured. @@ -155,15 +158,12 @@ int htmlInlineOpen(FILE *file, const struct BodyPart *part) { } struct Variable vars[] = { { "lang", lang }, + { "content", content }, {0}, }; return templateRender(file, template, vars, escapeXML); } -int htmlInlineClose(FILE *file) { - return templateRender(file, TEMPLATE(
), NULL, NULL); -} - int htmlMessageClose(FILE *file) { return templateRender(file, TEMPLATE(), NULL, NULL); } diff --git a/template.c b/template.c index e88b50f..8653769 100644 --- a/template.c +++ b/template.c @@ -24,7 +24,7 @@ #include "archive.h" -int escapeNull(FILE *file, const char *str) { +static int escapeNull(FILE *file, const char *str) { size_t n = fwrite(str, strlen(str), 1, file); return (n ? 0 : -1); } -- cgit 1.4.1