From d5727a5964c859359c0b6e4fcf5b4e91ec8f4569 Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Tue, 12 Jan 2021 22:06:56 -0500 Subject: Move hilex out of hilex directory --- bin/hilex.c | 316 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) create mode 100644 bin/hilex.c (limited to 'bin/hilex.c') diff --git a/bin/hilex.c b/bin/hilex.c new file mode 100644 index 00000000..ccbfa8ad --- /dev/null +++ b/bin/hilex.c @@ -0,0 +1,316 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hilex.h" + +#define ARRAY_LEN(a) (sizeof(a) / sizeof(a[0])) + +static const char *Class[] = { +#define X(class) [class] = #class, + ENUM_CLASS +#undef X +}; + +static const struct { + const struct Lexer *lexer; + const char *name; + const char *pattern; +} Lexers[] = { + { &LexC, "c", "[.][chlmy]$" }, + { &LexMake, "make", "[.]mk$|^Makefile$" }, + { &LexMdoc, "mdoc", "[.][1-9]$" }, + { &LexText, "text", "[.]txt$" }, +}; + +static const struct Lexer *parseLexer(const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + if (!strcmp(name, Lexers[i].name)) return Lexers[i].lexer; + } + errx(EX_USAGE, "unknown lexer %s", name); +} + +static const struct Lexer *matchLexer(const char *name) { + regex_t regex; + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + int error = regcomp( + ®ex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB + ); + assert(!error); + error = regexec(®ex, name, 0, NULL, 0); + regfree(®ex); + if (!error) return Lexers[i].lexer; + } + return NULL; +} + +#define ENUM_OPTION \ + X(Document, "document") \ + X(Inline, "inline") \ + X(Monospace, "monospace") \ + X(Style, "style") \ + X(Tab, "tab") \ + X(Title, "title") + +enum Option { +#define X(option, key) option, + ENUM_OPTION +#undef X + OptionCap, +}; + +typedef void Header(const char *opts[]); +typedef void Format(const char *opts[], enum Class class, const char *text); + +static const char *SGR[ClassCap] = { + [Keyword] = "37", + [Macro] = "32", + [Comment] = "34", + [String] = "36", + [StringFormat] = "36;1;96", + [Interpolation] = "33", +}; + +static void ansiFormat(const char *opts[], enum Class class, const char *text) { + (void)opts; + if (!SGR[class]) { + printf("%s", text); + return; + } + // Set color on each line for piping to less -R: + for (const char *nl; (nl = strchr(text, '\n')); text = &nl[1]) { + printf("\33[%sm%.*s\33[m\n", SGR[class], (int)(nl - text), text); + } + if (*text) printf("\33[%sm%s\33[m", SGR[class], text); +} + +static void +debugFormat(const char *opts[], enum Class class, const char *text) { + if (class != Normal) { + printf("%s(", Class[class]); + ansiFormat(opts, class, text); + printf(")"); + } else { + printf("%s", text); + } +} + +static const char *IRC[ClassCap] = { + [Keyword] = "\00315", + [Macro] = "\0033", + [Comment] = "\0032", + [String] = "\00310", + [StringFormat] = "\00311", + [Interpolation] = "\0037", +}; + +static void ircHeader(const char *opts[]) { + if (opts[Monospace]) printf("\21"); +} + +static const char *stop(const char *text) { + return (*text == ',' || isdigit(*text) ? "\2\2" : ""); +} + +static void ircFormat(const char *opts[], enum Class class, const char *text) { + for (const char *nl; (nl = strchr(text, '\n')); text = &nl[1]) { + if (IRC[class]) printf("%s%s", IRC[class], stop(text)); + printf("%.*s\n", (int)(nl - text), text); + if (opts[Monospace]) printf("\21"); + } + if (*text) { + if (IRC[class]) { + printf("%s%s%s\17", IRC[class], stop(text), text); + if (opts[Monospace]) printf("\21"); + } else { + printf("%s", text); + } + } +} + +static void htmlEscape(const char *text) { + while (*text) { + switch (*text) { + break; case '"': text++; printf("""); + break; case '&': text++; printf("&"); + break; case '<': text++; printf("<"); + } + size_t len = strcspn(text, "\"&<"); + if (len) fwrite(text, len, 1, stdout); + text += len; + } +} + +static const char *Styles[ClassCap] = { + [Keyword] = "color: dimgray;", + [Macro] = "color: green;", + [Comment] = "color: navy;", + [String] = "color: teal;", + [StringFormat] = "color: teal; font-weight: bold;", + [Interpolation] = "color: olive;", +}; + +static void styleTabSize(const char *tab) { + printf("-moz-tab-size: "); + htmlEscape(tab); + printf("; tab-size: "); + htmlEscape(tab); + printf(";"); +} + +static void htmlHeader(const char *opts[]) { + if (!opts[Document]) goto body; + + printf("\n"); + if (opts[Title]) htmlEscape(opts[Title]); + printf("\n"); + + if (opts[Style]) { + printf("\n"); + } else if (!opts[Inline]) { + printf("\n"); + } + +body: + if (opts[Inline] && opts[Tab]) { + printf("
");
+	} else {
+		printf("
");
+	}
+}
+
+static void htmlFooter(const char *opts[]) {
+	printf("
"); + if (opts[Document]) printf("\n"); +} + +static void htmlFormat(const char *opts[], enum Class class, const char *text) { + if (class != Normal) { + if (opts[Inline]) { + printf("", Styles[class] ? Styles[class] : ""); + } else { + printf("", Class[class]); + } + htmlEscape(text); + printf(""); + } else { + htmlEscape(text); + } +} + +static const struct Formatter { + const char *name; + Header *header; + Format *format; + Header *footer; +} Formatters[] = { + { "ansi", NULL, ansiFormat, NULL }, + { "debug", NULL, debugFormat, NULL }, + { "html", htmlHeader, htmlFormat, htmlFooter }, + { "irc", ircHeader, ircFormat, NULL }, +}; + +static const struct Formatter *parseFormatter(const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Formatters); ++i) { + if (!strcmp(name, Formatters[i].name)) return &Formatters[i]; + } + errx(EX_USAGE, "unknown formatter %s", name); +} + +static char *const OptionKeys[OptionCap + 1] = { +#define X(option, key) [option] = key, + ENUM_OPTION +#undef X + NULL, +}; + +int main(int argc, char *argv[]) { + bool text = false; + const char *name = NULL; + const struct Lexer *lexer = NULL; + const struct Formatter *formatter = &Formatters[0]; + const char *opts[OptionCap] = {0}; + + for (int opt; 0 < (opt = getopt(argc, argv, "f:l:n:o:t"));) { + switch (opt) { + break; case 'f': formatter = parseFormatter(optarg); + break; case 'l': lexer = parseLexer(optarg); + break; case 'n': name = optarg; + break; case 'o': { + while (*optarg) { + char *val; + int key = getsubopt(&optarg, OptionKeys, &val); + if (key < 0) errx(EX_USAGE, "no such option %s", val); + opts[key] = (val ? val : ""); + } + } + break; case 't': text = true; + break; default: return EX_USAGE; + } + } + + const char *path = "(stdin)"; + FILE *file = stdin; + if (optind < argc) { + path = argv[optind]; + file = fopen(path, "r"); + if (!file) err(EX_NOINPUT, "%s", path); + } + + if (!name) { + if (NULL != (name = strrchr(path, '/'))) { + name++; + } else { + name = path; + } + } + if (!opts[Title]) opts[Title] = name; + if (!lexer) lexer = matchLexer(name); + if (!lexer && text) lexer = &LexText; + if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name); + + *lexer->in = file; + if (formatter->header) formatter->header(opts); + for (enum Class class; None != (class = lexer->lex());) { + assert(class < ClassCap); + formatter->format(opts, class, *lexer->text); + } + if (formatter->footer) formatter->footer(opts); +} -- cgit 1.4.1