summary refs log tree commit diff
path: root/bin/hilex/hilex.c
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2020-12-28 20:06:44 -0500
committerJune McEnroe <june@causal.agency>2020-12-28 20:11:17 -0500
commit593137c52dc77cf113c1d9561d1b27b98a550bb5 (patch)
treef4f9d7c939bcd8fe527193c7d2fa3e8b3ed67ed8 /bin/hilex/hilex.c
parentAdd TOUR.7 (diff)
downloadsrc-593137c52dc77cf113c1d9561d1b27b98a550bb5.tar.gz
src-593137c52dc77cf113c1d9561d1b27b98a550bb5.zip
Add initial version of hilex
hilex is meant to replace hi, based on lex rather than a mess of
overlapping regexps. I want to preserve hi's tagging abilities, but that
will require some amount of parsing/post-processing, which I'm not sure
how to approach yet.

Macro lexing for C still needs work, as I want to match strings and
comments inside macros.
Diffstat (limited to '')
-rw-r--r--bin/hilex/hilex.c129
1 files changed, 129 insertions, 0 deletions
diff --git a/bin/hilex/hilex.c b/bin/hilex/hilex.c
new file mode 100644
index 00000000..5b40f280
--- /dev/null
+++ b/bin/hilex/hilex.c
@@ -0,0 +1,129 @@
+/* Copyright (C) 2020  June McEnroe <june@causal.agency>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <err.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "hilex.h"
+
+static const struct {
+	const struct Lexer *lexer;
+	const char *name;
+	const char *pattern;
+} Lexers[] = {
+	{ &LexC, "c", "[.][chlmy]$" },
+	{ &LexText, "text", "[.]txt$" },
+};
+
+static const struct Lexer *parseLexer(const char *name) {
+	for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) {
+		if (!strcmp(name, Lexers[i].name)) return Lexers[i].lexer;
+	}
+	errx(EX_USAGE, "unknown lexer %s", name);
+}
+
+static const struct Lexer *matchLexer(const char *name) {
+	regex_t regex;
+	for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) {
+		int error = regcomp(
+			&regex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB
+		);
+		assert(!error);
+		error = regexec(&regex, name, 0, NULL, 0);
+		regfree(&regex);
+		if (!error) return Lexers[i].lexer;
+	}
+	return NULL;
+}
+
+static const struct {
+	const struct Formatter *formatter;
+	const char *name;
+} Formatters[] = {
+	{ &FormatANSI, "ansi" },
+	{ &FormatDebug, "debug" },
+};
+
+static const struct Formatter *parseFormatter(const char *name) {
+	for (size_t i = 0; i < ARRAY_LEN(Formatters); ++i) {
+		if (!strcmp(name, Formatters[i].name)) return Formatters[i].formatter;
+	}
+	errx(EX_USAGE, "unknown formatter %s", name);
+}
+
+static const char *ClassName[] = {
+#define X(class) [class] = #class,
+	ENUM_CLASS
+#undef X
+};
+
+static void
+debugFormat(const char *opts[], enum Class class, const char *text) {
+	printf("%s(\33[3m", ClassName[class]);
+	FormatANSI.format(opts, class, text);
+	printf("\33[m)");
+}
+
+const struct Formatter FormatDebug = { .format = debugFormat };
+
+int main(int argc, char *argv[]) {
+	bool text = false;
+	const char *name = NULL;
+	const struct Lexer *lexer = NULL;
+	const struct Formatter *formatter = &FormatANSI;
+
+	for (int opt; 0 < (opt = getopt(argc, argv, "f:l:n:t"));) {
+		switch (opt) {
+			break; case 'f': formatter = parseFormatter(optarg);
+			break; case 'l': lexer = parseLexer(optarg);
+			break; case 'n': name = optarg;
+			break; case 't': text = true;
+		}
+	}
+
+	const char *path = "(stdin)";
+	FILE *file = stdin;
+	if (optind < argc) {
+		path = argv[optind];
+		file = fopen(path, "r");
+		if (!file) err(EX_NOINPUT, "%s", path);
+	}
+
+	if (!name) {
+		if (NULL != (name = strrchr(path, '/'))) {
+			name++;
+		} else {
+			name = path;
+		}
+	}
+	if (!lexer) lexer = matchLexer(name);
+	if (!lexer && text) lexer = &LexText;
+	if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name);
+
+	*lexer->in = file;
+	if (formatter->header) formatter->header(NULL);
+	for (enum Class class; None != (class = lexer->lex());) {
+		formatter->format(NULL, class, *lexer->text);
+	}
+	if (formatter->footer) formatter->footer(NULL);
+}