From 593137c52dc77cf113c1d9561d1b27b98a550bb5 Mon Sep 17 00:00:00 2001 From: June McEnroe Date: Mon, 28 Dec 2020 20:06:44 -0500 Subject: Add initial version of hilex hilex is meant to replace hi, based on lex rather than a mess of overlapping regexps. I want to preserve hi's tagging abilities, but that will require some amount of parsing/post-processing, which I'm not sure how to approach yet. Macro lexing for C still needs work, as I want to match strings and comments inside macros. --- bin/hilex/hilex.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 bin/hilex/hilex.c (limited to 'bin/hilex/hilex.c') diff --git a/bin/hilex/hilex.c b/bin/hilex/hilex.c new file mode 100644 index 00000000..5b40f280 --- /dev/null +++ b/bin/hilex/hilex.c @@ -0,0 +1,129 @@ +/* Copyright (C) 2020 June McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hilex.h" + +static const struct { + const struct Lexer *lexer; + const char *name; + const char *pattern; +} Lexers[] = { + { &LexC, "c", "[.][chlmy]$" }, + { &LexText, "text", "[.]txt$" }, +}; + +static const struct Lexer *parseLexer(const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + if (!strcmp(name, Lexers[i].name)) return Lexers[i].lexer; + } + errx(EX_USAGE, "unknown lexer %s", name); +} + +static const struct Lexer *matchLexer(const char *name) { + regex_t regex; + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + int error = regcomp( + ®ex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB + ); + assert(!error); + error = regexec(®ex, name, 0, NULL, 0); + regfree(®ex); + if (!error) return Lexers[i].lexer; + } + return NULL; +} + +static const struct { + const struct Formatter *formatter; + const char *name; +} Formatters[] = { + { &FormatANSI, "ansi" }, + { &FormatDebug, "debug" }, +}; + +static const struct Formatter *parseFormatter(const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Formatters); ++i) { + if (!strcmp(name, Formatters[i].name)) return Formatters[i].formatter; + } + errx(EX_USAGE, "unknown formatter %s", name); +} + +static const char *ClassName[] = { +#define X(class) [class] = #class, + ENUM_CLASS +#undef X +}; + +static void +debugFormat(const char *opts[], enum Class class, const char *text) { + printf("%s(\33[3m", ClassName[class]); + FormatANSI.format(opts, class, text); + printf("\33[m)"); +} + +const struct Formatter FormatDebug = { .format = debugFormat }; + +int main(int argc, char *argv[]) { + bool text = false; + const char *name = NULL; + const struct Lexer *lexer = NULL; + const struct Formatter *formatter = &FormatANSI; + + for (int opt; 0 < (opt = getopt(argc, argv, "f:l:n:t"));) { + switch (opt) { + break; case 'f': formatter = parseFormatter(optarg); + break; case 'l': lexer = parseLexer(optarg); + break; case 'n': name = optarg; + break; case 't': text = true; + } + } + + const char *path = "(stdin)"; + FILE *file = stdin; + if (optind < argc) { + path = argv[optind]; + file = fopen(path, "r"); + if (!file) err(EX_NOINPUT, "%s", path); + } + + if (!name) { + if (NULL != (name = strrchr(path, '/'))) { + name++; + } else { + name = path; + } + } + if (!lexer) lexer = matchLexer(name); + if (!lexer && text) lexer = &LexText; + if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name); + + *lexer->in = file; + if (formatter->header) formatter->header(NULL); + for (enum Class class; None != (class = lexer->lex());) { + formatter->format(NULL, class, *lexer->text); + } + if (formatter->footer) formatter->footer(NULL); +} -- cgit 1.4.1