From 6b0d631a171192d4d9d5ccc6acabb307fee5fa22 Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Thu, 16 Apr 2020 20:03:12 -0400 Subject: Decode quoted-printable and 7bit/8bit --- decode.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 28 deletions(-) (limited to 'decode.c') diff --git a/decode.c b/decode.c index 1645a66..abee7f5 100644 --- a/decode.c +++ b/decode.c @@ -85,46 +85,96 @@ static void convertCharset( iconv_close(conv); } -static void decodeQ(struct Buffer *dst, const char *src, size_t len) { - while (len) { - if (*src == '=') { - if (len < 3) break; - *bufferDest(dst, 1) = strtoul(&src[1], NULL, 16); - len -= 3; +static char unhex(char ch) { + if (ch <= '9') return ch - '0'; + if (ch <= 'F') return 0xA + ch - 'A'; + if (ch <= 'f') return 0xA + ch - 'a'; + return 0; +} + +static void decodeQ(struct Buffer *dst, const char *src) { + while (*src) { + if (src[0] == '=' && src[1] && src[2]) { + *bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]); + src += 3; + } else if (*src == '=') { + src++; + } else if (*src == '_') { + *bufferDest(dst, 1) = ' '; + src++; + } else { + size_t len = strcspn(src, "=_"); + bufferCopy(dst, src, len); + src += len; + } + } +} + +static void decodeQuotedPrintable(struct Buffer *dst, const char *src) { + while (*src) { + if (src[0] == '=' && src[1] == '\r' && src[2] == '\n') { + src += 3; + } else if (src[0] == '=' && src[1] && src[2]) { + *bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]); src += 3; + } else if (src[0] == '=') { + src++; + } else if (src[0] == '\r' && src[1] == '\n') { + *bufferDest(dst, 1) = '\n'; + src += 2; + } else if (src[0] == '\r') { + src++; } else { - *bufferDest(dst, 1) = (*src == '_' ? ' ' : *src); - len--; + size_t len = strcspn(src, "=\r"); + bufferCopy(dst, src, len); + src += len; + } + } +} + +static void decode8Bit(struct Buffer *dst, const char *src) { + while (*src) { + if (src[0] == '\r' && src[1] == '\n') { + *bufferDest(dst, 1) = '\n'; + src += 2; + } else if (src[0] == '\r') { src++; + } else { + size_t len = strcspn(src, "\r"); + bufferCopy(dst, src, len); + src += len; } } } -static void decodeEncoding( - struct Buffer *dst, const char *encoding, const char *src, size_t len -) { +static void +decodeEncoding(struct Buffer *dst, const char *encoding, const char *src) { if (!strcasecmp(encoding, "Q")) { - decodeQ(dst, src, len); + decodeQ(dst, src); + } else if (!strcasecmp(encoding, "quoted-printable")) { + decodeQuotedPrintable(dst, src); + } else if (!strcasecmp(encoding, "7bit") || !strcasecmp(encoding, "8bit")) { + decode8Bit(dst, src); + } else if (!strcasecmp(encoding, "binary")) { + bufferCopy(dst, src, strlen(src)); } else { - // TODO - bufferCopy(dst, src, len); + warnx("unknown encoding %s", encoding); } } static void decode( - struct Buffer *dst, const char *encoding, const char *charset, - const char *src, size_t len + struct Buffer *dst, + const char *encoding, const char *charset, const char *src ) { if ( !charset || !strcasecmp(charset, "us-ascii") || !strcasecmp(charset, "utf-8") ) { - decodeEncoding(dst, encoding, src, len); + decodeEncoding(dst, encoding, src); } else { - // TODO: Avoid copying if encoding is 8bit. - struct Buffer decoded = bufferAlloc(len); - decodeEncoding(&decoded, encoding, src, len); + struct Buffer decoded = bufferAlloc(strlen(src)); + decodeEncoding(&decoded, encoding, src); convertCharset(dst, charset, decoded.ptr, decoded.len); free(decoded.ptr); } @@ -141,7 +191,7 @@ static void decodeWord(struct Buffer *dst, const char *src, size_t len) { char *encoded = strsep(&ptr, "?"); if (charset && encoding && encoded && ptr && *ptr == '=') { - decode(dst, encoding, charset, encoded, strlen(encoded)); + decode(dst, encoding, charset, encoded); } else { bufferCopy(dst, src, len); } @@ -164,11 +214,14 @@ char *decodeHeader(const char *header) { return bufferString(&buf); } -int decodeContent( - FILE *file, EscapeFn *escape, - const struct BodyPart *part, const char *content -) { - if (!escape) escape = escapeNull; - // TODO - return escape(file, content); +char *decodeToString(const struct BodyPart *part, const char *src) { + struct Buffer dst = bufferAlloc(strlen(src) + 1); + const char *charset = NULL; + for (size_t i = 0; i + 1 < part->params.len; i += 2) { + const char *key = dataCheck(part->params.ptr[i], String).string; + if (strcasecmp(key, "charset")) continue; + charset = dataCheck(part->params.ptr[i + 1], String).string; + } + decode(&dst, part->encoding, charset, src); + return bufferString(&dst); } -- cgit 1.4.1