From ab54d9c9f162d07a62004bc0e9b958c345a03c07 Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Mon, 28 Dec 2020 20:06:44 -0500 Subject: Add initial version of hilex hilex is meant to replace hi, based on lex rather than a mess of overlapping regexps. I want to preserve hi's tagging abilities, but that will require some amount of parsing/post-processing, which I'm not sure how to approach yet. Macro lexing for C still needs work, as I want to match strings and comments inside macros. --- bin/hilex/.gitignore | 2 + bin/hilex/Makefile | 14 ++++++ bin/hilex/ansi.c | 44 +++++++++++++++++ bin/hilex/c.l | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++ bin/hilex/hilex.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++ bin/hilex/hilex.h | 60 +++++++++++++++++++++++ bin/hilex/text.l | 35 ++++++++++++++ 7 files changed, 415 insertions(+) create mode 100644 bin/hilex/.gitignore create mode 100644 bin/hilex/Makefile create mode 100644 bin/hilex/ansi.c create mode 100644 bin/hilex/c.l create mode 100644 bin/hilex/hilex.c create mode 100644 bin/hilex/hilex.h create mode 100644 bin/hilex/text.l (limited to 'bin') diff --git a/bin/hilex/.gitignore b/bin/hilex/.gitignore new file mode 100644 index 00000000..f4c89460 --- /dev/null +++ b/bin/hilex/.gitignore @@ -0,0 +1,2 @@ +*.o +hilex diff --git a/bin/hilex/Makefile b/bin/hilex/Makefile new file mode 100644 index 00000000..4a930757 --- /dev/null +++ b/bin/hilex/Makefile @@ -0,0 +1,14 @@ +CFLAGS += -std=c11 -Wall -Wextra -Wpedantic + +OBJS += ansi.o +OBJS += c.o +OBJS += hilex.o +OBJS += text.o + +hilex: ${OBJS} + ${CC} ${LDFLAGS} ${OBJS} ${LDLIBS} -o $@ + +${OBJS}: hilex.h + +clean: + rm -f hilex ${OBJS} diff --git a/bin/hilex/ansi.c b/bin/hilex/ansi.c new file mode 100644 index 00000000..5ecd1f2a --- /dev/null +++ b/bin/hilex/ansi.c @@ -0,0 +1,44 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include "hilex.h" + +static const char *Color[ClassCap] = { + [Keyword] = "37", + [Macro] = "32", + [Comment] = "34", + [String] = "36", + [StringFormat] = "36;1;96", +}; + +static void format(const char *opts[], enum Class class, const char *text) { + (void)opts; + if (!Color[class]) { + printf("%s", text); + return; + } + // Set color on each line for piping to less -R: + for (const char *nl; (nl = strchr(text, '\n')); text = &nl[1]) { + printf("\33[%sm%.*s\33[m\n", Color[class], (int)(nl - text), text); + } + if (*text) printf("\33[%sm%s\33[m", Color[class], text); +} + +const struct Formatter FormatANSI = { .format = format }; diff --git a/bin/hilex/c.l b/bin/hilex/c.l new file mode 100644 index 00000000..159980e2 --- /dev/null +++ b/bin/hilex/c.l @@ -0,0 +1,131 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +%option prefix="c11" +%option noyywrap + +%{ +#include "hilex.h" +%} + +%x MacroLine MacroInclude +%x CharLiteral StringLiteral + +width "*"|[0-9]+ + +%% + +[[:space:]]+ { return Normal; } + +([-+*/%&|^=!<>]|"<<"|">>")"="? | +[=~.?:]|"++"|"--"|"&&"|"||"|"->" | +sizeof|(_A|alignof) { + return Operator; +} + +([1-9][0-9]*|"0"[0-7]*|"0x"[[:xdigit:]]+)([ulUL]{0,3}) | +[0-9]*("."[0-9]*)?([eE][+-]?[0-9]+)?[flFL]? | +"0x"[[:xdigit:]]*("."[[:xdigit:]]*)?([pP][+-]?[0-9]+)[flFL]? { + return Number; +} + +auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|inline | +register|restrict|return|static|struct|switch|typedef|union|volatile|while | +(_A|a)lignas|_Atomic|_Generic|(_N|n)oreturn|(_S|s)tatic_assert | +(_T|t)hread_local { + return Keyword; +} + +[_[:alpha:]][_[:alnum:]]* { return Identifier; } + +^"#" { + BEGIN(MacroLine); + return Macro; +} +^"#"[[:blank:]]*"include" { + BEGIN(MacroInclude); + return Macro; +} +{ + "\n" { + BEGIN(0); + return Macro; + } + "\\\n" { return Macro; } + [^\\\n<"]+|. { return Macro; } +} +{ + "<"[^>]+">" | + "\""[^"]+"\"" { + return String; + } +} + +"//"([^\n]|"\\\n")* | +"/*"([^*]|"*"[^/])*"*"+"/" { + return Comment; +} + +[LUu]?"'" { + BEGIN(CharLiteral); + return String; +} +([LU]|u8?)?"\"" { + BEGIN(StringLiteral); + return String; +} + +{ + "\\"['""?\\abfnrtv] | + "\\"([0-7]{1,3}) | + "\\x"([[:xdigit:]]{2}) | + "\\u"([[:xdigit:]]{4}) | + "\\U"([[:xdigit:]]{8}) { + return StringEscape; + } +} +{ + "%%" | + "%"[ #+-0]*{width}?("."{width})?([Lhjltz]|hh|ll)?[AEFGXacdefginopsux] { + return StringFormat; + } +} + +{ + "'" { + BEGIN(0); + return String; + } + [^\\']+|. { return String; } +} +{ + "\"" { + BEGIN(0); + return String; + } + [^%\\"]+|. { return String; } +} + +. { return Normal; } + +%{ + (void)yyunput; + (void)input; +%} + +%% + +const struct Lexer LexC = { yylex, &yyin, &yytext }; diff --git a/bin/hilex/hilex.c b/bin/hilex/hilex.c new file mode 100644 index 00000000..e973f0cd --- /dev/null +++ b/bin/hilex/hilex.c @@ -0,0 +1,129 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hilex.h" + +static const struct { + const struct Lexer *lexer; + const char *name; + const char *pattern; +} Lexers[] = { + { &LexC, "c", "[.][chlmy]$" }, + { &LexText, "text", "[.]txt$" }, +}; + +static const struct Lexer *parseLexer(const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + if (!strcmp(name, Lexers[i].name)) return Lexers[i].lexer; + } + errx(EX_USAGE, "unknown lexer %s", name); +} + +static const struct Lexer *matchLexer(const char *name) { + regex_t regex; + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + int error = regcomp( + ®ex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB + ); + assert(!error); + error = regexec(®ex, name, 0, NULL, 0); + regfree(®ex); + if (!error) return Lexers[i].lexer; + } + return NULL; +} + +static const struct { + const struct Formatter *formatter; + const char *name; +} Formatters[] = { + { &FormatANSI, "ansi" }, + { &FormatDebug, "debug" }, +}; + +static const struct Formatter *parseFormatter(const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Formatters); ++i) { + if (!strcmp(name, Formatters[i].name)) return Formatters[i].formatter; + } + errx(EX_USAGE, "unknown formatter %s", name); +} + +static const char *ClassName[] = { +#define X(class) [class] = #class, + ENUM_CLASS +#undef X +}; + +static void +debugFormat(const char *opts[], enum Class class, const char *text) { + printf("%s(\33[3m", ClassName[class]); + FormatANSI.format(opts, class, text); + printf("\33[m)"); +} + +const struct Formatter FormatDebug = { .format = debugFormat }; + +int main(int argc, char *argv[]) { + bool text = false; + const char *name = NULL; + const struct Lexer *lexer = NULL; + const struct Formatter *formatter = &FormatANSI; + + for (int opt; 0 < (opt = getopt(argc, argv, "f:l:n:t"));) { + switch (opt) { + break; case 'f': formatter = parseFormatter(optarg); + break; case 'l': lexer = parseLexer(optarg); + break; case 'n': name = optarg; + break; case 't': text = true; + } + } + + const char *path = "(stdin)"; + FILE *file = stdin; + if (optind < argc) { + path = argv[optind]; + file = fopen(path, "r"); + if (!file) err(EX_NOINPUT, "%s", path); + } + + if (!name) { + if (NULL != (name = strrchr(path, '/'))) { + name++; + } else { + name = path; + } + } + if (!lexer) lexer = matchLexer(name); + if (!lexer && text) lexer = &LexText; + if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name); + + *lexer->in = file; + if (formatter->header) formatter->header(NULL); + for (enum Class class; None != (class = lexer->lex());) { + formatter->format(NULL, class, *lexer->text); + } + if (formatter->footer) formatter->footer(NULL); +} diff --git a/bin/hilex/hilex.h b/bin/hilex/hilex.h new file mode 100644 index 00000000..63e5a43b --- /dev/null +++ b/bin/hilex/hilex.h @@ -0,0 +1,60 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include + +#define ARRAY_LEN(a) (sizeof(a) / sizeof(a[0])) + +#define ENUM_CLASS \ + X(None) \ + X(Normal) \ + X(Operator) \ + X(Number) \ + X(Keyword) \ + X(Identifier) \ + X(Macro) \ + X(Comment) \ + X(String) \ + X(StringEscape) \ + X(StringFormat) + +enum Class { +#define X(class) class, + ENUM_CLASS +#undef X + ClassCap, +}; + +typedef int Lex(void); +struct Lexer { + Lex *lex; + FILE **in; + char **text; +}; + +extern const struct Lexer LexC; +extern const struct Lexer LexText; + +typedef void Header(const char *opts[]); +typedef void Format(const char *opts[], enum Class class, const char *text); +struct Formatter { + Header *header; + Format *format; + Header *footer; +}; + +extern const struct Formatter FormatANSI; +extern const struct Formatter FormatDebug; diff --git a/bin/hilex/text.l b/bin/hilex/text.l new file mode 100644 index 00000000..e3d64814 --- /dev/null +++ b/bin/hilex/text.l @@ -0,0 +1,35 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +%option prefix="text" +%option noyywrap + +%{ +#include "hilex.h" +%} + +%% + +.*"\n"? { return Normal; } + +%{ + (void)yyunput; + (void)input; +%} + +%% + +const struct Lexer LexText = { yylex, &yyin, &yytext }; -- cgit 1.4.1