about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2021-06-10 13:06:32 -0400
committerJune McEnroe <june@causal.agency>2021-06-10 13:06:32 -0400
commit4b23326e717201ca8a08fcbc3e4a1881b1abbbc9 (patch)
tree7fdfadf47772a3aa712ab109f867f84bbc5db3cb
parentLimit number of entries in index Atom feed (diff)
downloadbubger-4b23326e717201ca8a08fcbc3e4a1881b1abbbc9.tar.gz
bubger-4b23326e717201ca8a08fcbc3e4a1881b1abbbc9.zip
Fix decoding adjacent encoded-words
RFC 2047 6.2. Display of 'encoded-word's:

> When displaying a particular header field that contains multiple
> 'encoded-word's, any 'linear-white-space' that separates a pair of
> adjacent 'encoded-word's is ignored.  (This is to allow the use of
> multiple 'encoded-word's to represent long strings of unencoded text,
> without having to separate 'encoded-word's where spaces occur in the
> unencoded text.)
-rw-r--r--decode.c14
1 files changed, 11 insertions, 3 deletions
diff --git a/decode.c b/decode.c
index d158c5d..0dbed27 100644
--- a/decode.c
+++ b/decode.c
@@ -28,6 +28,7 @@
 #include <err.h>
 #include <errno.h>
 #include <iconv.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -243,13 +244,20 @@ static void decodeWord(struct Buffer *dst, const char *src, size_t len) {
 
 char *decodeHeader(const char *header) {
 	struct Buffer buf = bufferAlloc(strlen(header) + 1);
+	bool pair = false;
 	while (*header) {
-		size_t len = strcspn(header, " ");
-		if (!strncmp(header, "=?", 2)) {
+		size_t len = strcspn(header, " \t");
+		if (!len) {
+			len = strspn(header, " \t");
+			if (!pair || strncmp(&header[len], "=?", 2)) {
+				bufferCopy(&buf, header, len);
+			}
+		} else if (!strncmp(header, "=?", 2)) {
 			decodeWord(&buf, header, len);
+			pair = true;
 		} else {
-			if (header[len]) len++;
 			bufferCopy(&buf, header, len);
+			pair = false;
 		}
 		header += len;
 	}