summary refs log blame commit diff
path: root/decode.c
blob: d158c5da3903eae839a62fb8544b3554972cd9a7 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14












                                                                         


                                                                 
                                                                     


                                                                       
                                                                   
                

                
                  
                   

                   
                    


                     
















                                                         
                                                                







































                                                                            





                                                                                  
                                                             
                                             
                 
 
                                                      
                                                     

                                 

                                             
                                                       






                                                                   

                                             
                              
























                                                                                   
                                 





                                                              
                        











                                                             
                              


                                                        


                 
                                                                           

                                                                            





                                                                                    
                
                                                       
         

                   
                                                                  




                                                   
                                                   
                
                                                                 














                                                                         
                                                        


















                                                            
 
                                                                    
                                                                             
                                  
 

                                                                            
                                                                             


                                                     
/* Copyright (C) 2020  C. McEnroe <june@causal.agency>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * Additional permission under GNU GPL version 3 section 7:
 *
 * If you modify this Program, or any covered work, by linking or
 * combining it with OpenSSL (or a modified version of that library),
 * containing parts covered by the terms of the OpenSSL License and the
 * original SSLeay license, the licensors of this Program grant you
 * additional permission to convey the resulting work. Corresponding
 * Source for a non-source form of such a combination shall include the
 * source code for the parts of OpenSSL used as well as that of the
 * covered work.
 */

#include <err.h>
#include <errno.h>
#include <iconv.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sysexits.h>

#include "archive.h"

struct Buffer {
	size_t cap;
	size_t len;
	char *ptr;
};

static struct Buffer bufferAlloc(size_t cap) {
	struct Buffer buf = {
		.cap = cap,
		.len = 0,
		.ptr = malloc(cap),
	};
	if (!buf.ptr) err(EX_OSERR, "malloc");
	return buf;
}

static char *bufferDest(struct Buffer *buf, size_t len) {
	if (buf->len + len > buf->cap) {
		while (buf->len + len > buf->cap) buf->cap *= 2;
		buf->ptr = realloc(buf->ptr, buf->cap);
		if (!buf->ptr) err(EX_OSERR, "realloc");
	}
	char *dest = &buf->ptr[buf->len];
	buf->len += len;
	return dest;
}

static void bufferCopy(struct Buffer *buf, const char *src, size_t len) {
	char *dst = bufferDest(buf, len);
	memcpy(dst, src, len);
}

static char *bufferString(struct Buffer *buf) {
	*bufferDest(buf, 1) = '\0';
	return buf->ptr;
}

static void convertCharset(
	struct Buffer *dst, const char *charset, const char *src, size_t len
) {
	iconv_t conv = iconv_open("utf-8", charset);
	if (conv == (iconv_t)-1) {
		warn("cannot convert from %s to utf-8", charset);
		return;
	}

	for (size_t pad = 0; len; ++pad) {
		char *ptr = bufferDest(dst, len + pad);
		size_t cap = dst->cap - (ptr - dst->ptr);
		size_t n = iconv(conv, (char **)&src, &len, &ptr, &cap);
		if (n == (size_t)-1 && errno != E2BIG) {
			warn("iconv");
			break;
		}
		dst->len = dst->cap - cap;
	}

	iconv_close(conv);
}

static void decodeBase64(struct Buffer *dst, const char *src) {
	static const uint8_t Base64[64] = {
		"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
	};
	static uint8_t table[256];
	if (!table[0]) {
		memset(table, 0xFF, sizeof(table));
		for (size_t i = 0; i < sizeof(Base64); ++i) {
			table[Base64[i]] = i;
		}
	}

	while (src[0] && src[1] && src[2] && src[3]) {
		if (table[(uint8_t)src[0]] == 0xFF) {
			src++;
			continue;
		}
		uint32_t bits = 0;
		for (int i = 0; i < 4; ++i) {
			bits <<= 6;
			bits |= table[(uint8_t)src[i]];
		}
		*bufferDest(dst, 1) = bits >> 16;
		if (src[2] != '=') *bufferDest(dst, 1) = bits >> 8;
		if (src[3] != '=') *bufferDest(dst, 1) = bits;
		src += 4;
	}
}

static char unhex(char ch) {
	if (ch <= '9') return ch - '0';
	if (ch <= 'F') return 0xA + ch - 'A';
	return 0xA + ch - 'a';
}

static void decodeQ(struct Buffer *dst, const char *src) {
	while (*src) {
		if (src[0] == '=' && src[1] && src[2]) {
			*bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]);
			src += 3;
		} else if (*src == '=') {
			src++;
		} else if (*src == '_') {
			*bufferDest(dst, 1) = ' ';
			src++;
		} else {
			size_t len = strcspn(src, "=_");
			bufferCopy(dst, src, len);
			src += len;
		}
	}
}

static void decodeQuotedPrintable(struct Buffer *dst, const char *src) {
	while (*src) {
		if (src[0] == '=' && src[1] == '\r' && src[2] == '\n') {
			src += 3;
		} else if (src[0] == '=' && src[1] && src[2]) {
			*bufferDest(dst, 1) = 0x10 * unhex(src[1]) + unhex(src[2]);
			src += 3;
		} else if (src[0] == '=') {
			src++;
		} else if (src[0] == '\r' && src[1] == '\n') {
			*bufferDest(dst, 1) = '\n';
			src += 2;
		} else if (src[0] == '\r') {
			src++;
		} else {
			size_t len = strcspn(src, "=\r");
			bufferCopy(dst, src, len);
			src += len;
		}
	}
}

static void decode8Bit(struct Buffer *dst, const char *src) {
	while (*src) {
		if (src[0] == '\r' && src[1] == '\n') {
			*bufferDest(dst, 1) = '\n';
			src += 2;
		} else if (src[0] == '\r') {
			src++;
		} else {
			size_t len = strcspn(src, "\r");
			bufferCopy(dst, src, len);
			src += len;
		}
	}
}

static void
decodeEncoding(struct Buffer *dst, const char *encoding, const char *src) {
	if (!strcasecmp(encoding, "base64") || !strcasecmp(encoding, "B")) {
		decodeBase64(dst, src);
	} else if (!strcasecmp(encoding, "Q")) {
		decodeQ(dst, src);
	} else if (!strcasecmp(encoding, "quoted-printable")) {
		decodeQuotedPrintable(dst, src);
	} else if (!strcasecmp(encoding, "7bit") || !strcasecmp(encoding, "8bit")) {
		decode8Bit(dst, src);
	} else if (!strcasecmp(encoding, "binary")) {
		bufferCopy(dst, src, strlen(src));
	} else {
		warnx("unknown encoding %s", encoding);
	}
}

static void decode(
	struct Buffer *dst,
	const char *encoding, const char *charset, const char *src
) {
	if (
		!charset ||
		!strcasecmp(charset, "us-ascii") ||
		!strcasecmp(charset, "utf-8")
	) {
		decodeEncoding(dst, encoding, src);
	} else {
		struct Buffer decoded = bufferAlloc(strlen(src));
		decodeEncoding(&decoded, encoding, src);
		convertCharset(dst, charset, decoded.ptr, decoded.len);
		free(decoded.ptr);
	}
}

static void decodeWord(struct Buffer *dst, const char *src, size_t len) {
	struct Buffer word = bufferAlloc(len + 1);
	bufferCopy(&word, src, len);

	char *ptr = bufferString(&word);
	strsep(&ptr, "?");
	char *charset = strsep(&ptr, "?");
	char *encoding = strsep(&ptr, "?");
	char *encoded = strsep(&ptr, "?");

	if (charset && encoding && encoded && ptr && *ptr == '=') {
		decode(dst, encoding, charset, encoded);
	} else {
		bufferCopy(dst, src, len);
	}

	free(word.ptr);
}

char *decodeHeader(const char *header) {
	struct Buffer buf = bufferAlloc(strlen(header) + 1);
	while (*header) {
		size_t len = strcspn(header, " ");
		if (!strncmp(header, "=?", 2)) {
			decodeWord(&buf, header, len);
		} else {
			if (header[len]) len++;
			bufferCopy(&buf, header, len);
		}
		header += len;
	}
	return bufferString(&buf);
}

char *decodeToString(const struct BodyPart *part, const char *src) {
	struct Buffer dst = bufferAlloc(strlen(src) + 1);
	decode(&dst, part->encoding, paramGet(part->params, "charset"), src);
	return bufferString(&dst);
}

int decodeToFile(FILE *file, const struct BodyPart *part, const char *src) {
	struct Buffer dst = bufferAlloc(strlen(src));
	decode(&dst, part->encoding, paramGet(part->params, "charset"), src);
	size_t n = fwrite(dst.ptr, dst.len, 1, file);
	free(dst.ptr);
	return (n ? 0 : -1);
}