summary refs log tree commit diff
path: root/bin/hilex.c
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2021-01-12 22:06:56 -0500
committerJune McEnroe <june@causal.agency>2021-01-12 22:06:56 -0500
commitd5727a5964c859359c0b6e4fcf5b4e91ec8f4569 (patch)
tree8ae2001f305e343a1f73dd34cb2b9d545de571ec /bin/hilex.c
parentConsolidate hilex formatters into hilex.c (diff)
downloadsrc-d5727a5964c859359c0b6e4fcf5b4e91ec8f4569.tar.gz
src-d5727a5964c859359c0b6e4fcf5b4e91ec8f4569.zip
Move hilex out of hilex directory
Diffstat (limited to 'bin/hilex.c')
-rw-r--r--bin/hilex.c316
1 files changed, 316 insertions, 0 deletions
diff --git a/bin/hilex.c b/bin/hilex.c
new file mode 100644
index 00000000..ccbfa8ad
--- /dev/null
+++ b/bin/hilex.c
@@ -0,0 +1,316 @@
+/* Copyright (C) 2020  C. McEnroe <june@causal.agency>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <err.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "hilex.h"
+
+#define ARRAY_LEN(a) (sizeof(a) / sizeof(a[0]))
+
+static const char *Class[] = {
+#define X(class) [class] = #class,
+	ENUM_CLASS
+#undef X
+};
+
+static const struct {
+	const struct Lexer *lexer;
+	const char *name;
+	const char *pattern;
+} Lexers[] = {
+	{ &LexC, "c", "[.][chlmy]$" },
+	{ &LexMake, "make", "[.]mk$|^Makefile$" },
+	{ &LexMdoc, "mdoc", "[.][1-9]$" },
+	{ &LexText, "text", "[.]txt$" },
+};
+
+static const struct Lexer *parseLexer(const char *name) {
+	for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) {
+		if (!strcmp(name, Lexers[i].name)) return Lexers[i].lexer;
+	}
+	errx(EX_USAGE, "unknown lexer %s", name);
+}
+
+static const struct Lexer *matchLexer(const char *name) {
+	regex_t regex;
+	for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) {
+		int error = regcomp(
+			&regex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB
+		);
+		assert(!error);
+		error = regexec(&regex, name, 0, NULL, 0);
+		regfree(&regex);
+		if (!error) return Lexers[i].lexer;
+	}
+	return NULL;
+}
+
+#define ENUM_OPTION \
+	X(Document, "document") \
+	X(Inline, "inline") \
+	X(Monospace, "monospace") \
+	X(Style, "style") \
+	X(Tab, "tab") \
+	X(Title, "title")
+
+enum Option {
+#define X(option, key) option,
+	ENUM_OPTION
+#undef X
+	OptionCap,
+};
+
+typedef void Header(const char *opts[]);
+typedef void Format(const char *opts[], enum Class class, const char *text);
+
+static const char *SGR[ClassCap] = {
+	[Keyword]       = "37",
+	[Macro]         = "32",
+	[Comment]       = "34",
+	[String]        = "36",
+	[StringFormat]  = "36;1;96",
+	[Interpolation] = "33",
+};
+
+static void ansiFormat(const char *opts[], enum Class class, const char *text) {
+	(void)opts;
+	if (!SGR[class]) {
+		printf("%s", text);
+		return;
+	}
+	// Set color on each line for piping to less -R:
+	for (const char *nl; (nl = strchr(text, '\n')); text = &nl[1]) {
+		printf("\33[%sm%.*s\33[m\n", SGR[class], (int)(nl - text), text);
+	}
+	if (*text) printf("\33[%sm%s\33[m", SGR[class], text);
+}
+
+static void
+debugFormat(const char *opts[], enum Class class, const char *text) {
+	if (class != Normal) {
+		printf("%s(", Class[class]);
+		ansiFormat(opts, class, text);
+		printf(")");
+	} else {
+		printf("%s", text);
+	}
+}
+
+static const char *IRC[ClassCap] = {
+	[Keyword]       = "\00315",
+	[Macro]         = "\0033",
+	[Comment]       = "\0032",
+	[String]        = "\00310",
+	[StringFormat]  = "\00311",
+	[Interpolation] = "\0037",
+};
+
+static void ircHeader(const char *opts[]) {
+	if (opts[Monospace]) printf("\21");
+}
+
+static const char *stop(const char *text) {
+	return (*text == ',' || isdigit(*text) ? "\2\2" : "");
+}
+
+static void ircFormat(const char *opts[], enum Class class, const char *text) {
+	for (const char *nl; (nl = strchr(text, '\n')); text = &nl[1]) {
+		if (IRC[class]) printf("%s%s", IRC[class], stop(text));
+		printf("%.*s\n", (int)(nl - text), text);
+		if (opts[Monospace]) printf("\21");
+	}
+	if (*text) {
+		if (IRC[class]) {
+			printf("%s%s%s\17", IRC[class], stop(text), text);
+			if (opts[Monospace]) printf("\21");
+		} else {
+			printf("%s", text);
+		}
+	}
+}
+
+static void htmlEscape(const char *text) {
+	while (*text) {
+		switch (*text) {
+			break; case '"': text++; printf("&quot;");
+			break; case '&': text++; printf("&amp;");
+			break; case '<': text++; printf("&lt;");
+		}
+		size_t len = strcspn(text, "\"&<");
+		if (len) fwrite(text, len, 1, stdout);
+		text += len;
+	}
+}
+
+static const char *Styles[ClassCap] = {
+	[Keyword]       = "color: dimgray;",
+	[Macro]         = "color: green;",
+	[Comment]       = "color: navy;",
+	[String]        = "color: teal;",
+	[StringFormat]  = "color: teal; font-weight: bold;",
+	[Interpolation] = "color: olive;",
+};
+
+static void styleTabSize(const char *tab) {
+	printf("-moz-tab-size: ");
+	htmlEscape(tab);
+	printf("; tab-size: ");
+	htmlEscape(tab);
+	printf(";");
+}
+
+static void htmlHeader(const char *opts[]) {
+	if (!opts[Document]) goto body;
+
+	printf("<!DOCTYPE html>\n<title>");
+	if (opts[Title]) htmlEscape(opts[Title]);
+	printf("</title>\n");
+
+	if (opts[Style]) {
+		printf("<link rel=\"stylesheet\" href=\"");
+		htmlEscape(opts[Style]);
+		printf("\">\n");
+	} else if (!opts[Inline]) {
+		printf("<style>\n");
+		if (opts[Tab]) {
+			printf("pre.hilex { ");
+			styleTabSize(opts[Tab]);
+			printf(" }\n");
+		}
+		for (enum Class class = 0; class < ClassCap; ++class) {
+			if (!Styles[class]) continue;
+			printf(".hilex.%s { %s }\n", Class[class], Styles[class]);
+		}
+		printf("</style>\n");
+	}
+
+body:
+	if (opts[Inline] && opts[Tab]) {
+		printf("<pre class=\"hilex\" style=\"");
+		styleTabSize(opts[Tab]);
+		printf("\">");
+	} else {
+		printf("<pre class=\"hilex\">");
+	}
+}
+
+static void htmlFooter(const char *opts[]) {
+	printf("</pre>");
+	if (opts[Document]) printf("\n");
+}
+
+static void htmlFormat(const char *opts[], enum Class class, const char *text) {
+	if (class != Normal) {
+		if (opts[Inline]) {
+			printf("<span style=\"%s\">", Styles[class] ? Styles[class] : "");
+		} else {
+			printf("<span class=\"hilex %s\">", Class[class]);
+		}
+		htmlEscape(text);
+		printf("</span>");
+	} else {
+		htmlEscape(text);
+	}
+}
+
+static const struct Formatter {
+	const char *name;
+	Header *header;
+	Format *format;
+	Header *footer;
+} Formatters[] = {
+	{ "ansi", NULL, ansiFormat, NULL },
+	{ "debug", NULL, debugFormat, NULL },
+	{ "html", htmlHeader, htmlFormat, htmlFooter },
+	{ "irc", ircHeader, ircFormat, NULL },
+};
+
+static const struct Formatter *parseFormatter(const char *name) {
+	for (size_t i = 0; i < ARRAY_LEN(Formatters); ++i) {
+		if (!strcmp(name, Formatters[i].name)) return &Formatters[i];
+	}
+	errx(EX_USAGE, "unknown formatter %s", name);
+}
+
+static char *const OptionKeys[OptionCap + 1] = {
+#define X(option, key) [option] = key,
+	ENUM_OPTION
+#undef X
+	NULL,
+};
+
+int main(int argc, char *argv[]) {
+	bool text = false;
+	const char *name = NULL;
+	const struct Lexer *lexer = NULL;
+	const struct Formatter *formatter = &Formatters[0];
+	const char *opts[OptionCap] = {0};
+
+	for (int opt; 0 < (opt = getopt(argc, argv, "f:l:n:o:t"));) {
+		switch (opt) {
+			break; case 'f': formatter = parseFormatter(optarg);
+			break; case 'l': lexer = parseLexer(optarg);
+			break; case 'n': name = optarg;
+			break; case 'o': {
+				while (*optarg) {
+					char *val;
+					int key = getsubopt(&optarg, OptionKeys, &val);
+					if (key < 0) errx(EX_USAGE, "no such option %s", val);
+					opts[key] = (val ? val : "");
+				}
+			}
+			break; case 't': text = true;
+			break; default:  return EX_USAGE;
+		}
+	}
+
+	const char *path = "(stdin)";
+	FILE *file = stdin;
+	if (optind < argc) {
+		path = argv[optind];
+		file = fopen(path, "r");
+		if (!file) err(EX_NOINPUT, "%s", path);
+	}
+
+	if (!name) {
+		if (NULL != (name = strrchr(path, '/'))) {
+			name++;
+		} else {
+			name = path;
+		}
+	}
+	if (!opts[Title]) opts[Title] = name;
+	if (!lexer) lexer = matchLexer(name);
+	if (!lexer && text) lexer = &LexText;
+	if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name);
+
+	*lexer->in = file;
+	if (formatter->header) formatter->header(opts);
+	for (enum Class class; None != (class = lexer->lex());) {
+		assert(class < ClassCap);
+		formatter->format(opts, class, *lexer->text);
+	}
+	if (formatter->footer) formatter->footer(opts);
+}