From d5727a5964c859359c0b6e4fcf5b4e91ec8f4569 Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Tue, 12 Jan 2021 22:06:56 -0500 Subject: Move hilex out of hilex directory --- bin/.gitignore | 1 + bin/Makefile | 12 ++ bin/README.7 | 2 + bin/c11.l | 139 ++++++++++++++++++++++ bin/hilex.c | 316 +++++++++++++++++++++++++++++++++++++++++++++++++++ bin/hilex.h | 50 ++++++++ bin/hilex/.gitignore | 2 - bin/hilex/Makefile | 15 --- bin/hilex/c.l | 139 ---------------------- bin/hilex/hilex.1 | 177 ----------------------------- bin/hilex/hilex.c | 316 --------------------------------------------------- bin/hilex/hilex.h | 50 -------- bin/hilex/make.l | 105 ----------------- bin/hilex/mdoc.l | 65 ----------- bin/hilex/text.l | 35 ------ bin/make.l | 105 +++++++++++++++++ bin/man1/hilex.1 | 177 +++++++++++++++++++++++++++++ bin/mdoc.l | 65 +++++++++++ bin/text.l | 35 ++++++ 19 files changed, 902 insertions(+), 904 deletions(-) create mode 100644 bin/c11.l create mode 100644 bin/hilex.c create mode 100644 bin/hilex.h delete mode 100644 bin/hilex/.gitignore delete mode 100644 bin/hilex/Makefile delete mode 100644 bin/hilex/c.l delete mode 100644 bin/hilex/hilex.1 delete mode 100644 bin/hilex/hilex.c delete mode 100644 bin/hilex/hilex.h delete mode 100644 bin/hilex/make.l delete mode 100644 bin/hilex/mdoc.l delete mode 100644 bin/hilex/text.l create mode 100644 bin/make.l create mode 100644 bin/man1/hilex.1 create mode 100644 bin/mdoc.l create mode 100644 bin/text.l diff --git a/bin/.gitignore b/bin/.gitignore index 02f47efe..4fe5b9b2 100644 --- a/bin/.gitignore +++ b/bin/.gitignore @@ -12,6 +12,7 @@ fbatt fbclock glitch hi +hilex hnel htagml htmltags diff --git a/bin/Makefile b/bin/Makefile index 8e0cb324..9b2c06d4 100644 --- a/bin/Makefile +++ b/bin/Makefile @@ -26,6 +26,7 @@ BINS += c BINS += dtch BINS += glitch BINS += hi +BINS += hilex BINS += hnel BINS += htagml BINS += modem @@ -83,6 +84,17 @@ hi: hi.c ${CC} ${CFLAGS} ${LDFLAGS} hi.c ${LDLIBS.$@} -o $@ ./hi -c +OBJS.hilex += c11.o +OBJS.hilex += hilex.o +OBJS.hilex += make.o +OBJS.hilex += mdoc.o +OBJS.hilex += text.o + +hilex: ${OBJS.hilex} + ${CC} ${LDFLAGS} ${OBJS.$@} ${LDLIBS.$@} -o $@ + +${OBJS.hilex}: hilex.h + open pbcopy pbpaste: pbd ln -f pbd $@ diff --git a/bin/README.7 b/bin/README.7 index 31acdde5..d4cf26e8 100644 --- a/bin/README.7 +++ b/bin/README.7 @@ -38,6 +38,8 @@ framebuffer clock PNG glitcher .It Xr hi 1 syntax highlighter +.It Xr hilex 1 +syntax highlighter .It Xr hnel 1 PTY input remapper .It Xr htagml 1 diff --git a/bin/c11.l b/bin/c11.l new file mode 100644 index 00000000..21e7d44b --- /dev/null +++ b/bin/c11.l @@ -0,0 +1,139 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +%option prefix="c11" +%option noyywrap + +%{ +#include "hilex.h" +%} + +%s MacroLine MacroInclude +%x CharLiteral StringLiteral + +ident [_[:alpha:]][_[:alnum:]]* +width "*"|[0-9]+ + +%% + static int pop = INITIAL; + +[[:blank:]]+ { return Normal; } + +^"%"[%{}]? { + BEGIN(pop = MacroLine); + return Macro; +} + +([-+*/%&|^=!<>]|"<<"|">>")"="? | +[=~.?:]|"++"|"--"|"&&"|"||"|"->" | +sizeof|(_A|alignof) { + return Operator; +} + +([1-9][0-9]*|"0"[0-7]*|"0x"[[:xdigit:]]+)([ulUL]{0,3}) | +[0-9]*("."[0-9]*)?([eE][+-]?[0-9]+)?[flFL]? | +"0x"[[:xdigit:]]*("."[[:xdigit:]]*)?([pP][+-]?[0-9]+)[flFL]? { + return Number; +} + +auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|inline | +register|restrict|return|static|struct|switch|typedef|union|volatile|while | +(_A|a)lignas|_Atomic|_Generic|(_N|n)oreturn|(_S|s)tatic_assert | +(_T|t)hread_local { + return Keyword; +} + +^"#"[[:blank:]]*(include|import) { + BEGIN(pop = MacroInclude); + return Macro; +} +^"#"[[:blank:]]*{ident} { + BEGIN(pop = MacroLine); + return Macro; +} +"<"[^>]+">" { + return String; +} +{ + "\n" { + BEGIN(pop = INITIAL); + return Normal; + } + "\\\n" { return Macro; } + {ident} { return Macro; } +} + +{ident} { return Identifier; } + +"//"([^\n]|"\\\n")* | +"/*"([^*]|"*"[^/])*"*"+"/" { + return Comment; +} + +[LUu]?"'" { + BEGIN(CharLiteral); + return String; +} +([LU]|u8?)?"\"" { + BEGIN(StringLiteral); + return String; +} + +{ + "\\"[''""?\\abfnrtv] | + "\\"([0-7]{1,3}) | + "\\x"([[:xdigit:]]{2}) | + "\\u"([[:xdigit:]]{4}) | + "\\U"([[:xdigit:]]{8}) { + return StringEscape; + } +} +{ + "%%" | + "%"[ #+-0]*{width}?("."{width})?([Lhjltz]|hh|ll)?[AEFGXacdefginopsux] { + return StringFormat; + } +} + +{ + "'" { + BEGIN(pop); + return String; + } + [^\\'']+|. { return String; } +} +{ + "\"" { + BEGIN(pop); + return String; + } + [^%\\""]+|. { return String; } +} + +. { + return Macro; +} + +.|\n { return Normal; } + +%{ + (void)yyunput; + (void)input; +%} + +%% + +const struct Lexer LexC = { yylex, &yyin, &yytext }; diff --git a/bin/hilex.c b/bin/hilex.c new file mode 100644 index 00000000..ccbfa8ad --- /dev/null +++ b/bin/hilex.c @@ -0,0 +1,316 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hilex.h" + +#define ARRAY_LEN(a) (sizeof(a) / sizeof(a[0])) + +static const char *Class[] = { +#define X(class) [class] = #class, + ENUM_CLASS +#undef X +}; + +static const struct { + const struct Lexer *lexer; + const char *name; + const char *pattern; +} Lexers[] = { + { &LexC, "c", "[.][chlmy]$" }, + { &LexMake, "make", "[.]mk$|^Makefile$" }, + { &LexMdoc, "mdoc", "[.][1-9]$" }, + { &LexText, "text", "[.]txt$" }, +}; + +static const struct Lexer *parseLexer(const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + if (!strcmp(name, Lexers[i].name)) return Lexers[i].lexer; + } + errx(EX_USAGE, "unknown lexer %s", name); +} + +static const struct Lexer *matchLexer(const char *name) { + regex_t regex; + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + int error = regcomp( + ®ex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB + ); + assert(!error); + error = regexec(®ex, name, 0, NULL, 0); + regfree(®ex); + if (!error) return Lexers[i].lexer; + } + return NULL; +} + +#define ENUM_OPTION \ + X(Document, "document") \ + X(Inline, "inline") \ + X(Monospace, "monospace") \ + X(Style, "style") \ + X(Tab, "tab") \ + X(Title, "title") + +enum Option { +#define X(option, key) option, + ENUM_OPTION +#undef X + OptionCap, +}; + +typedef void Header(const char *opts[]); +typedef void Format(const char *opts[], enum Class class, const char *text); + +static const char *SGR[ClassCap] = { + [Keyword] = "37", + [Macro] = "32", + [Comment] = "34", + [String] = "36", + [StringFormat] = "36;1;96", + [Interpolation] = "33", +}; + +static void ansiFormat(const char *opts[], enum Class class, const char *text) { + (void)opts; + if (!SGR[class]) { + printf("%s", text); + return; + } + // Set color on each line for piping to less -R: + for (const char *nl; (nl = strchr(text, '\n')); text = &nl[1]) { + printf("\33[%sm%.*s\33[m\n", SGR[class], (int)(nl - text), text); + } + if (*text) printf("\33[%sm%s\33[m", SGR[class], text); +} + +static void +debugFormat(const char *opts[], enum Class class, const char *text) { + if (class != Normal) { + printf("%s(", Class[class]); + ansiFormat(opts, class, text); + printf(")"); + } else { + printf("%s", text); + } +} + +static const char *IRC[ClassCap] = { + [Keyword] = "\00315", + [Macro] = "\0033", + [Comment] = "\0032", + [String] = "\00310", + [StringFormat] = "\00311", + [Interpolation] = "\0037", +}; + +static void ircHeader(const char *opts[]) { + if (opts[Monospace]) printf("\21"); +} + +static const char *stop(const char *text) { + return (*text == ',' || isdigit(*text) ? "\2\2" : ""); +} + +static void ircFormat(const char *opts[], enum Class class, const char *text) { + for (const char *nl; (nl = strchr(text, '\n')); text = &nl[1]) { + if (IRC[class]) printf("%s%s", IRC[class], stop(text)); + printf("%.*s\n", (int)(nl - text), text); + if (opts[Monospace]) printf("\21"); + } + if (*text) { + if (IRC[class]) { + printf("%s%s%s\17", IRC[class], stop(text), text); + if (opts[Monospace]) printf("\21"); + } else { + printf("%s", text); + } + } +} + +static void htmlEscape(const char *text) { + while (*text) { + switch (*text) { + break; case '"': text++; printf("""); + break; case '&': text++; printf("&"); + break; case '<': text++; printf("<"); + } + size_t len = strcspn(text, "\"&<"); + if (len) fwrite(text, len, 1, stdout); + text += len; + } +} + +static const char *Styles[ClassCap] = { + [Keyword] = "color: dimgray;", + [Macro] = "color: green;", + [Comment] = "color: navy;", + [String] = "color: teal;", + [StringFormat] = "color: teal; font-weight: bold;", + [Interpolation] = "color: olive;", +}; + +static void styleTabSize(const char *tab) { + printf("-moz-tab-size: "); + htmlEscape(tab); + printf("; tab-size: "); + htmlEscape(tab); + printf(";"); +} + +static void htmlHeader(const char *opts[]) { + if (!opts[Document]) goto body; + + printf("\n"); + if (opts[Title]) htmlEscape(opts[Title]); + printf("\n"); + + if (opts[Style]) { + printf("\n"); + } else if (!opts[Inline]) { + printf("\n"); + } + +body: + if (opts[Inline] && opts[Tab]) { + printf("
");
+	} else {
+		printf("
");
+	}
+}
+
+static void htmlFooter(const char *opts[]) {
+	printf("
"); + if (opts[Document]) printf("\n"); +} + +static void htmlFormat(const char *opts[], enum Class class, const char *text) { + if (class != Normal) { + if (opts[Inline]) { + printf("", Styles[class] ? Styles[class] : ""); + } else { + printf("", Class[class]); + } + htmlEscape(text); + printf(""); + } else { + htmlEscape(text); + } +} + +static const struct Formatter { + const char *name; + Header *header; + Format *format; + Header *footer; +} Formatters[] = { + { "ansi", NULL, ansiFormat, NULL }, + { "debug", NULL, debugFormat, NULL }, + { "html", htmlHeader, htmlFormat, htmlFooter }, + { "irc", ircHeader, ircFormat, NULL }, +}; + +static const struct Formatter *parseFormatter(const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Formatters); ++i) { + if (!strcmp(name, Formatters[i].name)) return &Formatters[i]; + } + errx(EX_USAGE, "unknown formatter %s", name); +} + +static char *const OptionKeys[OptionCap + 1] = { +#define X(option, key) [option] = key, + ENUM_OPTION +#undef X + NULL, +}; + +int main(int argc, char *argv[]) { + bool text = false; + const char *name = NULL; + const struct Lexer *lexer = NULL; + const struct Formatter *formatter = &Formatters[0]; + const char *opts[OptionCap] = {0}; + + for (int opt; 0 < (opt = getopt(argc, argv, "f:l:n:o:t"));) { + switch (opt) { + break; case 'f': formatter = parseFormatter(optarg); + break; case 'l': lexer = parseLexer(optarg); + break; case 'n': name = optarg; + break; case 'o': { + while (*optarg) { + char *val; + int key = getsubopt(&optarg, OptionKeys, &val); + if (key < 0) errx(EX_USAGE, "no such option %s", val); + opts[key] = (val ? val : ""); + } + } + break; case 't': text = true; + break; default: return EX_USAGE; + } + } + + const char *path = "(stdin)"; + FILE *file = stdin; + if (optind < argc) { + path = argv[optind]; + file = fopen(path, "r"); + if (!file) err(EX_NOINPUT, "%s", path); + } + + if (!name) { + if (NULL != (name = strrchr(path, '/'))) { + name++; + } else { + name = path; + } + } + if (!opts[Title]) opts[Title] = name; + if (!lexer) lexer = matchLexer(name); + if (!lexer && text) lexer = &LexText; + if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name); + + *lexer->in = file; + if (formatter->header) formatter->header(opts); + for (enum Class class; None != (class = lexer->lex());) { + assert(class < ClassCap); + formatter->format(opts, class, *lexer->text); + } + if (formatter->footer) formatter->footer(opts); +} diff --git a/bin/hilex.h b/bin/hilex.h new file mode 100644 index 00000000..5998e6a4 --- /dev/null +++ b/bin/hilex.h @@ -0,0 +1,50 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include + +#define ENUM_CLASS \ + X(None) \ + X(Normal) \ + X(Operator) \ + X(Number) \ + X(Keyword) \ + X(Identifier) \ + X(Macro) \ + X(Comment) \ + X(String) \ + X(StringEscape) \ + X(StringFormat) \ + X(Interpolation) + +enum Class { +#define X(class) class, + ENUM_CLASS +#undef X + ClassCap, +}; + +typedef int Lex(void); +struct Lexer { + Lex *lex; + FILE **in; + char **text; +}; + +extern const struct Lexer LexC; +extern const struct Lexer LexMake; +extern const struct Lexer LexMdoc; +extern const struct Lexer LexText; diff --git a/bin/hilex/.gitignore b/bin/hilex/.gitignore deleted file mode 100644 index f4c89460..00000000 --- a/bin/hilex/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.o -hilex diff --git a/bin/hilex/Makefile b/bin/hilex/Makefile deleted file mode 100644 index e7972425..00000000 --- a/bin/hilex/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -CFLAGS += -std=c11 -Wall -Wextra -Wpedantic - -OBJS += c.o -OBJS += hilex.o -OBJS += make.o -OBJS += mdoc.o -OBJS += text.o - -hilex: ${OBJS} - ${CC} ${LDFLAGS} ${OBJS} ${LDLIBS} -o $@ - -${OBJS}: hilex.h - -clean: - rm -f hilex ${OBJS} diff --git a/bin/hilex/c.l b/bin/hilex/c.l deleted file mode 100644 index 21e7d44b..00000000 --- a/bin/hilex/c.l +++ /dev/null @@ -1,139 +0,0 @@ -/* Copyright (C) 2020 C. McEnroe - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -%option prefix="c11" -%option noyywrap - -%{ -#include "hilex.h" -%} - -%s MacroLine MacroInclude -%x CharLiteral StringLiteral - -ident [_[:alpha:]][_[:alnum:]]* -width "*"|[0-9]+ - -%% - static int pop = INITIAL; - -[[:blank:]]+ { return Normal; } - -^"%"[%{}]? { - BEGIN(pop = MacroLine); - return Macro; -} - -([-+*/%&|^=!<>]|"<<"|">>")"="? | -[=~.?:]|"++"|"--"|"&&"|"||"|"->" | -sizeof|(_A|alignof) { - return Operator; -} - -([1-9][0-9]*|"0"[0-7]*|"0x"[[:xdigit:]]+)([ulUL]{0,3}) | -[0-9]*("."[0-9]*)?([eE][+-]?[0-9]+)?[flFL]? | -"0x"[[:xdigit:]]*("."[[:xdigit:]]*)?([pP][+-]?[0-9]+)[flFL]? { - return Number; -} - -auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|inline | -register|restrict|return|static|struct|switch|typedef|union|volatile|while | -(_A|a)lignas|_Atomic|_Generic|(_N|n)oreturn|(_S|s)tatic_assert | -(_T|t)hread_local { - return Keyword; -} - -^"#"[[:blank:]]*(include|import) { - BEGIN(pop = MacroInclude); - return Macro; -} -^"#"[[:blank:]]*{ident} { - BEGIN(pop = MacroLine); - return Macro; -} -"<"[^>]+">" { - return String; -} -{ - "\n" { - BEGIN(pop = INITIAL); - return Normal; - } - "\\\n" { return Macro; } - {ident} { return Macro; } -} - -{ident} { return Identifier; } - -"//"([^\n]|"\\\n")* | -"/*"([^*]|"*"[^/])*"*"+"/" { - return Comment; -} - -[LUu]?"'" { - BEGIN(CharLiteral); - return String; -} -([LU]|u8?)?"\"" { - BEGIN(StringLiteral); - return String; -} - -{ - "\\"[''""?\\abfnrtv] | - "\\"([0-7]{1,3}) | - "\\x"([[:xdigit:]]{2}) | - "\\u"([[:xdigit:]]{4}) | - "\\U"([[:xdigit:]]{8}) { - return StringEscape; - } -} -{ - "%%" | - "%"[ #+-0]*{width}?("."{width})?([Lhjltz]|hh|ll)?[AEFGXacdefginopsux] { - return StringFormat; - } -} - -{ - "'" { - BEGIN(pop); - return String; - } - [^\\'']+|. { return String; } -} -{ - "\"" { - BEGIN(pop); - return String; - } - [^%\\""]+|. { return String; } -} - -. { - return Macro; -} - -.|\n { return Normal; } - -%{ - (void)yyunput; - (void)input; -%} - -%% - -const struct Lexer LexC = { yylex, &yyin, &yytext }; diff --git a/bin/hilex/hilex.1 b/bin/hilex/hilex.1 deleted file mode 100644 index 858c8565..00000000 --- a/bin/hilex/hilex.1 +++ /dev/null @@ -1,177 +0,0 @@ -.Dd January 12, 2021 -.Dt HILEX 1 -.Os -. -.Sh NAME -.Nm hilex -.Nd syntax highlighter -. -.Sh SYNOPSIS -.Nm -.Op Fl t -.Op Fl f Ar format -.Op Fl l Ar lexer -.Op Fl n Ar name -.Op Fl o Ar opts -.Op Ar file -. -.Sh DESCRIPTION -The -.Nm -utility -syntax highlights -the contents of -.Ar file -or standard input -and formats it on standard output. -. -.Pp -The arguments are as follows: -.Bl -tag -width "-f format" -.It Fl f Ar format -Set the output format. -See -.Sx Output Formats . -The default format is -.Cm ansi . -. -.It Fl l Ar lexer -Set the input lexer. -See -.Sx Input Lexers . -The default input lexer is inferred from -.Ar name . -. -.It Fl n Ar name -Set the name used to infer the input lexer. -The default is the final component of -.Ar file . -. -.It Fl o Ar opts -Set output format options. -.Ar opts -is a comma-separated list of options. -Options for each output format are documented in -.Sx Output Formats . -. -.It Fl t -Default to the -.Cm text -input lexer if one cannot be inferred. -.El -. -.Ss Output Formats -.Bl -tag -width Ds -.It Cm ansi -Output ANSI terminal control sequences. -. -.It Cm html -Output HTML -.Sy -elements -within a -.Sy
-element.
-Each element has the classes
-.Sy hilex
-and one of the following:
-.Sy Operator ,
-.Sy Number ,
-.Sy Keyword ,
-.Sy Identifier ,
-.Sy Macro ,
-.Sy Comment ,
-.Sy String ,
-.Sy StringEscape ,
-.Sy StringFormat ,
-.Sy Interpolation .
-.
-.Pp
-The options are as follows:
-.Bl -tag -width "title=..."
-.It Cm style Ns = Ns Ar url
-With
-.Cm document ,
-output a
-.Sy 
-element for the external stylesheet
-.Ar url .
-If unset,
-output default styles in a
-.Sy \n");
-	}
-
-body:
-	if (opts[Inline] && opts[Tab]) {
-		printf("
");
-	} else {
-		printf("
");
-	}
-}
-
-static void htmlFooter(const char *opts[]) {
-	printf("
"); - if (opts[Document]) printf("\n"); -} - -static void htmlFormat(const char *opts[], enum Class class, const char *text) { - if (class != Normal) { - if (opts[Inline]) { - printf("", Styles[class] ? Styles[class] : ""); - } else { - printf("", Class[class]); - } - htmlEscape(text); - printf(""); - } else { - htmlEscape(text); - } -} - -static const struct Formatter { - const char *name; - Header *header; - Format *format; - Header *footer; -} Formatters[] = { - { "ansi", NULL, ansiFormat, NULL }, - { "debug", NULL, debugFormat, NULL }, - { "html", htmlHeader, htmlFormat, htmlFooter }, - { "irc", ircHeader, ircFormat, NULL }, -}; - -static const struct Formatter *parseFormatter(const char *name) { - for (size_t i = 0; i < ARRAY_LEN(Formatters); ++i) { - if (!strcmp(name, Formatters[i].name)) return &Formatters[i]; - } - errx(EX_USAGE, "unknown formatter %s", name); -} - -static char *const OptionKeys[OptionCap + 1] = { -#define X(option, key) [option] = key, - ENUM_OPTION -#undef X - NULL, -}; - -int main(int argc, char *argv[]) { - bool text = false; - const char *name = NULL; - const struct Lexer *lexer = NULL; - const struct Formatter *formatter = &Formatters[0]; - const char *opts[OptionCap] = {0}; - - for (int opt; 0 < (opt = getopt(argc, argv, "f:l:n:o:t"));) { - switch (opt) { - break; case 'f': formatter = parseFormatter(optarg); - break; case 'l': lexer = parseLexer(optarg); - break; case 'n': name = optarg; - break; case 'o': { - while (*optarg) { - char *val; - int key = getsubopt(&optarg, OptionKeys, &val); - if (key < 0) errx(EX_USAGE, "no such option %s", val); - opts[key] = (val ? val : ""); - } - } - break; case 't': text = true; - break; default: return EX_USAGE; - } - } - - const char *path = "(stdin)"; - FILE *file = stdin; - if (optind < argc) { - path = argv[optind]; - file = fopen(path, "r"); - if (!file) err(EX_NOINPUT, "%s", path); - } - - if (!name) { - if (NULL != (name = strrchr(path, '/'))) { - name++; - } else { - name = path; - } - } - if (!opts[Title]) opts[Title] = name; - if (!lexer) lexer = matchLexer(name); - if (!lexer && text) lexer = &LexText; - if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name); - - *lexer->in = file; - if (formatter->header) formatter->header(opts); - for (enum Class class; None != (class = lexer->lex());) { - assert(class < ClassCap); - formatter->format(opts, class, *lexer->text); - } - if (formatter->footer) formatter->footer(opts); -} diff --git a/bin/hilex/hilex.h b/bin/hilex/hilex.h deleted file mode 100644 index 5998e6a4..00000000 --- a/bin/hilex/hilex.h +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright (C) 2020 C. McEnroe - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include - -#define ENUM_CLASS \ - X(None) \ - X(Normal) \ - X(Operator) \ - X(Number) \ - X(Keyword) \ - X(Identifier) \ - X(Macro) \ - X(Comment) \ - X(String) \ - X(StringEscape) \ - X(StringFormat) \ - X(Interpolation) - -enum Class { -#define X(class) class, - ENUM_CLASS -#undef X - ClassCap, -}; - -typedef int Lex(void); -struct Lexer { - Lex *lex; - FILE **in; - char **text; -}; - -extern const struct Lexer LexC; -extern const struct Lexer LexMake; -extern const struct Lexer LexMdoc; -extern const struct Lexer LexText; diff --git a/bin/hilex/make.l b/bin/hilex/make.l deleted file mode 100644 index e7f3def5..00000000 --- a/bin/hilex/make.l +++ /dev/null @@ -1,105 +0,0 @@ -/* Copyright (C) 2020 C. McEnroe - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -%option prefix="make" -%option noyywrap - -%{ -#include "hilex.h" -%} - -%s Assign -%x Interp Shell - -ident [._[:alnum:]]+ -assign [+?:!]?= -target [-._/?*\[\][:alnum:]]+ -operator [:!]|:: - -%% - static int pop = INITIAL; - static int depth = 0; - -^"\t"+ { - BEGIN(pop = Shell); - return Normal; -} -{ - "\n" { - BEGIN(pop = INITIAL); - return Normal; - } - "\\\n" { return Normal; } - [^\\\n$]+|. { return Normal; } -} - -[[:blank:]]+ { return Normal; } - -{operator} { return Operator; } - -"."(PHONY|PRECIOUS|SUFFIXES)/{operator}? { - return Keyword; -} - -^{ident}/[[:blank:]]*{assign} { - return Identifier; -} - -{assign} { - BEGIN(pop = Assign); - return Operator; -} -{ - "\n" { - BEGIN(pop = INITIAL); - return Normal; - } - "\\\n" { return String; } - [^\\$[:space:]]+|. { return String; } -} - -^"-"?include { return Macro; } - -{target} { return Identifier; } - -"#"([^\\\n]|"\\"[^\n]|"\\\n")* { return Comment; } - -<*>{ - "$"("{"|"(") { - depth++; - BEGIN(Interp); - return Interpolation; - } - "$". { return Interpolation; } -} -{ - "}"|")" { - if (!--depth) BEGIN(pop); - return Interpolation; - } - [^${}()]+ { return Interpolation; } -} - -.|\n { return Normal; } - -%{ - (void)yyunput; - (void)input; -%} - -%% - -const struct Lexer LexMake = { yylex, &yyin, &yytext }; diff --git a/bin/hilex/mdoc.l b/bin/hilex/mdoc.l deleted file mode 100644 index a31b6a2e..00000000 --- a/bin/hilex/mdoc.l +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright (C) 2020 C. McEnroe - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -%option prefix="mdoc" -%option noyywrap - -%{ -#include "hilex.h" -%} - -%s MacroLine - -%% - -[[:blank:]]+ { return Normal; } - -^"." { - BEGIN(MacroLine); - return Keyword; -} - -^".\\\"".* { return Comment; } - -{ - "\n" { - BEGIN(0); - return Normal; - } - - %[ABCDIJNOPQRTUV]|A[cdnopqrt]|B[cdfkloqtx]|Br[coq]|Bsx|C[dm]|D[1bcdloqtvx] | - E[cdfklmnorsvx]|F[acdlnortx]|Hf|I[cnt]|L[bikp]|M[st]|N[dmosx]|O[copstx] | - P[acfopq]|Q[cloq]|R[esv]|S[chmoqstxy]|T[an]|U[dx]|V[at]|X[cor] { - return Keyword; - } - - "\""([^""]|"\\\"")*"\"" { return String; } -} - -"\\"(.|"("..|"["[^]]*"]") { return String; } - -[^.\\""[:space:]]+ { return Normal; } - -.|\n { return Normal; } - -%{ - (void)yyunput; - (void)input; -%} - -%% - -const struct Lexer LexMdoc = { yylex, &yyin, &yytext }; diff --git a/bin/hilex/text.l b/bin/hilex/text.l deleted file mode 100644 index e3d64814..00000000 --- a/bin/hilex/text.l +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (C) 2020 C. McEnroe - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -%option prefix="text" -%option noyywrap - -%{ -#include "hilex.h" -%} - -%% - -.*"\n"? { return Normal; } - -%{ - (void)yyunput; - (void)input; -%} - -%% - -const struct Lexer LexText = { yylex, &yyin, &yytext }; diff --git a/bin/make.l b/bin/make.l new file mode 100644 index 00000000..e7f3def5 --- /dev/null +++ b/bin/make.l @@ -0,0 +1,105 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +%option prefix="make" +%option noyywrap + +%{ +#include "hilex.h" +%} + +%s Assign +%x Interp Shell + +ident [._[:alnum:]]+ +assign [+?:!]?= +target [-._/?*\[\][:alnum:]]+ +operator [:!]|:: + +%% + static int pop = INITIAL; + static int depth = 0; + +^"\t"+ { + BEGIN(pop = Shell); + return Normal; +} +{ + "\n" { + BEGIN(pop = INITIAL); + return Normal; + } + "\\\n" { return Normal; } + [^\\\n$]+|. { return Normal; } +} + +[[:blank:]]+ { return Normal; } + +{operator} { return Operator; } + +"."(PHONY|PRECIOUS|SUFFIXES)/{operator}? { + return Keyword; +} + +^{ident}/[[:blank:]]*{assign} { + return Identifier; +} + +{assign} { + BEGIN(pop = Assign); + return Operator; +} +{ + "\n" { + BEGIN(pop = INITIAL); + return Normal; + } + "\\\n" { return String; } + [^\\$[:space:]]+|. { return String; } +} + +^"-"?include { return Macro; } + +{target} { return Identifier; } + +"#"([^\\\n]|"\\"[^\n]|"\\\n")* { return Comment; } + +<*>{ + "$"("{"|"(") { + depth++; + BEGIN(Interp); + return Interpolation; + } + "$". { return Interpolation; } +} +{ + "}"|")" { + if (!--depth) BEGIN(pop); + return Interpolation; + } + [^${}()]+ { return Interpolation; } +} + +.|\n { return Normal; } + +%{ + (void)yyunput; + (void)input; +%} + +%% + +const struct Lexer LexMake = { yylex, &yyin, &yytext }; diff --git a/bin/man1/hilex.1 b/bin/man1/hilex.1 new file mode 100644 index 00000000..858c8565 --- /dev/null +++ b/bin/man1/hilex.1 @@ -0,0 +1,177 @@ +.Dd January 12, 2021 +.Dt HILEX 1 +.Os +. +.Sh NAME +.Nm hilex +.Nd syntax highlighter +. +.Sh SYNOPSIS +.Nm +.Op Fl t +.Op Fl f Ar format +.Op Fl l Ar lexer +.Op Fl n Ar name +.Op Fl o Ar opts +.Op Ar file +. +.Sh DESCRIPTION +The +.Nm +utility +syntax highlights +the contents of +.Ar file +or standard input +and formats it on standard output. +. +.Pp +The arguments are as follows: +.Bl -tag -width "-f format" +.It Fl f Ar format +Set the output format. +See +.Sx Output Formats . +The default format is +.Cm ansi . +. +.It Fl l Ar lexer +Set the input lexer. +See +.Sx Input Lexers . +The default input lexer is inferred from +.Ar name . +. +.It Fl n Ar name +Set the name used to infer the input lexer. +The default is the final component of +.Ar file . +. +.It Fl o Ar opts +Set output format options. +.Ar opts +is a comma-separated list of options. +Options for each output format are documented in +.Sx Output Formats . +. +.It Fl t +Default to the +.Cm text +input lexer if one cannot be inferred. +.El +. +.Ss Output Formats +.Bl -tag -width Ds +.It Cm ansi +Output ANSI terminal control sequences. +. +.It Cm html +Output HTML +.Sy +elements +within a +.Sy
+element.
+Each element has the classes
+.Sy hilex
+and one of the following:
+.Sy Operator ,
+.Sy Number ,
+.Sy Keyword ,
+.Sy Identifier ,
+.Sy Macro ,
+.Sy Comment ,
+.Sy String ,
+.Sy StringEscape ,
+.Sy StringFormat ,
+.Sy Interpolation .
+.
+.Pp
+The options are as follows:
+.Bl -tag -width "title=..."
+.It Cm style Ns = Ns Ar url
+With
+.Cm document ,
+output a
+.Sy 
+element for the external stylesheet
+.Ar url .
+If unset,
+output default styles in a
+.Sy