diff options
Diffstat (limited to '')
-rw-r--r-- | bin/.gitignore | 1 | ||||
-rw-r--r-- | bin/Makefile | 5 | ||||
-rw-r--r-- | bin/README.7 | 4 | ||||
-rw-r--r-- | bin/hi.c | 766 | ||||
-rw-r--r-- | bin/man1/hi.1 | 173 |
5 files changed, 1 insertions, 948 deletions
diff --git a/bin/.gitignore b/bin/.gitignore index 4fe5b9b2..5c577eb9 100644 --- a/bin/.gitignore +++ b/bin/.gitignore @@ -11,7 +11,6 @@ ever fbatt fbclock glitch -hi hilex hnel htagml diff --git a/bin/Makefile b/bin/Makefile index e7fe8f8b..69eadc0a 100644 --- a/bin/Makefile +++ b/bin/Makefile @@ -25,7 +25,6 @@ BINS += bit BINS += c BINS += dtch BINS += glitch -BINS += hi BINS += hilex BINS += hnel BINS += htagml @@ -80,10 +79,6 @@ meta: .gitignore tags cp -f $< $@ chmod a+x $@ -hi: hi.c - ${CC} ${CFLAGS} ${LDFLAGS} hi.c ${LDLIBS.$@} -o $@ - ./hi -c - OBJS.hilex += c11.o OBJS.hilex += hilex.o OBJS.hilex += make.o diff --git a/bin/README.7 b/bin/README.7 index d4cf26e8..dc5a2a32 100644 --- a/bin/README.7 +++ b/bin/README.7 @@ -1,4 +1,4 @@ -.Dd January 12, 2021 +.Dd January 13, 2021 .Dt BIN 7 .Os "Causal Agency" . @@ -36,8 +36,6 @@ framebuffer battery indicator framebuffer clock .It Xr glitch 1 PNG glitcher -.It Xr hi 1 -syntax highlighter .It Xr hilex 1 syntax highlighter .It Xr hnel 1 diff --git a/bin/hi.c b/bin/hi.c deleted file mode 100644 index 23d69574..00000000 --- a/bin/hi.c +++ /dev/null @@ -1,766 +0,0 @@ -/* vim: set foldmethod=marker foldlevel=0: */ -/* Copyright (C) 2019 June McEnroe <june@causal.agency> - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <assert.h> -#include <err.h> -#include <locale.h> -#include <regex.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/stat.h> -#include <sysexits.h> -#include <unistd.h> - -#define ARRAY_LEN(a) (sizeof(a) / sizeof(a[0])) - -typedef unsigned Set; -#define SET(x) ((Set)1 << (x)) - -#define ENUM_CLASS \ - X(Normal) \ - X(Keyword) \ - X(Macro) \ - X(Tag) \ - X(String) \ - X(Escape) \ - X(Format) \ - X(Interp) \ - X(Comment) \ - X(Todo) \ - X(DiffOld) \ - X(DiffNew) - -enum Class { -#define X(class) class, - ENUM_CLASS -#undef X - ClassLen, -}; - -static const char *ClassName[ClassLen] = { -#define X(class) [class] = #class, - ENUM_CLASS -#undef X -}; - -struct Syntax { - enum Class class; - Set parent; - bool newline; - size_t subexp; - const char *pattern; -}; - -#define WB "(^|[^_[:alnum:]]|\n)" -#define BL0 "[[:blank:]]*" -#define BL1 "[[:blank:]]+" -#define SP0 "[[:space:]]*" -#define SP1 "[[:space:]]+" -#define PATTERN_ID "[_[:alpha:]][_[:alnum:]]*" -#define PATTERN_SQ "'([^']|[\\]')*'" -#define PATTERN_DQ "\"([^\"]|[\\]\")*\"" -#define PATTERN_BC "/[*]" "([^*]|[*][^/])*" "[*]+/" -#define PATTERN_TODO "FIXME|TODO|XXX" - -// C syntax {{{ -static const struct Syntax CSyntax[] = { - { Keyword, .subexp = 2, .pattern = WB - "(" "auto|extern|register|static|(_T|t)hread_local|typedef" - "|" "_Atomic|const|restrict|volatile" - "|" "inline|(_N|n)oreturn" - "|" "(_A|a)lignas" - "|" "enum|struct|union" - "|" "do|else|for|if|switch|while" - "|" "break|case|continue|default|goto|return" - ")" WB }, - { Macro, - .pattern = "^" BL0 "#(.|[\\]\n)*" }, - { Tag, .parent = SET(Macro), .subexp = 1, - .pattern = "define" BL1 "(" PATTERN_ID ")" "[(]" }, - { Tag, .subexp = 2, - .pattern = "(enum|struct|union)" SP1 "(" PATTERN_ID ")" SP0 "[{]" }, - { Tag, .parent = ~SET(Keyword), .newline = true, .subexp = 1, - .pattern = "(" PATTERN_ID ")" SP0 "[(][^()]*[)]" SP0 "[{]" }, - { Tag, .newline = true, .subexp = 3, .pattern = - "(static|typedef)" SP1 - "(" "(" PATTERN_ID ")" SP0 - "(" "[*]" "|" "[[][^]]*[]]" "|" "[{][^}]*[}]" "|" SP0 ")*" ")+" }, - { String, .parent = SET(Macro), .subexp = 2, - .pattern = "(include|import)" BL0 "(<[^>]*>)" }, - { String, - .pattern = "[LUu]?" PATTERN_SQ }, - { String, .parent = ~SET(String), - .pattern = "([LU]|u8?)?" PATTERN_DQ }, - { Escape, .parent = SET(String), - .pattern = "[\\]([\"'?\\abfnrtv]|[0-7]{1,3}|x[0-9A-Fa-f]+)" }, - { Escape, .parent = SET(String), - .pattern = "[\\](U[0-9A-Fa-f]{8}|u[0-9A-Fa-f]{4})" }, - { Format, .parent = SET(String), .pattern = - "%%|%[ #+-0]*" // flags - "([*]|[0-9]+)?" // field width - "([.]([*]|[0-9]+))?" // precision - "([Lhjltz]|hh|ll)?" // length modifier - "[AEFGXacdefginopsux]" // format specifier - }, - { Comment, .parent = ~SET(String), - .pattern = "//(.|[\\]\n)*" }, - { Comment, .parent = ~SET(String), .newline = true, - .pattern = PATTERN_BC }, - { Todo, .parent = SET(Comment), - .pattern = PATTERN_TODO }, -}; -// }}} - -// diff syntax {{{ -static const struct Syntax DiffSyntax[] = { - { Keyword, .pattern = "^[^ ].*" }, - { Comment, .pattern = "^@@.*" }, - { DiffOld, .pattern = "^[-].*" }, - { DiffNew, .pattern = "^[+].*" }, -}; -// }}} - -// make syntax {{{ -#define MAKE_TARGET "[-./_[:alnum:]]+" -static const struct Syntax MakeSyntax[] = { - { Keyword, .subexp = 2, - .pattern = WB "([.](PHONY|PRECIOUS|SUFFIXES))" WB }, - { Macro, - .pattern = "^ *-?include" }, - { Tag, .parent = ~SET(Keyword), .subexp = 1, .pattern = - "(" MAKE_TARGET ")" "(" BL1 MAKE_TARGET ")*" BL0 ":([^=]|$)" }, - { String, .subexp = 1, - .pattern = "[._[:alnum:]]+" BL0 "[!+:?]?=" BL0 "(.*)" }, - { Normal, - .pattern = "^\t.*" }, - { String, - .pattern = PATTERN_SQ }, - { String, - .pattern = PATTERN_DQ }, - { Interp, - .pattern = "[$]." }, - // Support one level of nesting with the same delimiter. - { Interp, - .pattern = "[$][(](" "[^$)]" "|" "[$]." "|" "[$][(][^)]*[)]" ")*[)]" }, - { Interp, - .pattern = "[$][{](" "[^$}]" "|" "[$]." "|" "[$][{][^}]*[}]" ")*[}]" }, - { Escape, - .pattern = "[$][$]" }, - { Comment, .parent = ~SET(String), - .pattern = "#.*" }, - { Todo, .parent = SET(Comment), - .pattern = PATTERN_TODO }, -}; -// }}} - -// mdoc syntax {{{ -static const struct Syntax MdocSyntax[] = { - { Keyword, .subexp = 2, .pattern = WB - "(" "D[dt]|N[dm]|Os" - "|" "S[hsx]|[LP]p|Xr" - "|" "%[ABCDIJNOPQRTUV]|[BE][dl]|D[1l]|It|Ql|R[es]|Ta" - "|" "Ap|[BE]k|Ns|Pf|Sm" - "|" "Ar|Cm|Ev|Fl|O[cop]|Pa" - "|" "Dv|Er|F[acdnot]|In|Lb|V[at]" - "|" "A[dn]|Cd|Lk|M[st]" - "|" "[BE]f|Em|Li|No|Sy" - "|" "(Br|[ABDPQS])[coq]|E[co]" - "|" "At|(Bs|[BDEFNO])x|Rv|St" - ")" WB }, - { Tag, .subexp = 1, - .pattern = "^[.]S[hs]" BL1 "(.+)" }, - { String, - .pattern = PATTERN_DQ }, - { Normal, - .pattern = "^[^.].*" }, - { String, - .pattern = "[\\](" "." "|" "[(].{2}" "|" "[[][^]]*[]]" ")" }, - { Comment, - .pattern = "^[.][\\]\".*" }, - { Todo, .parent = SET(Comment), - .pattern = PATTERN_TODO }, -}; -// }}} - -// Rust syntax {{{ -static const struct Syntax RustSyntax[] = { - { Keyword, .subexp = 2, .pattern = WB - "(" "'?static|[Ss]elf|abstract|as|async|await|become|box|break|const" - "|" "continue|crate|do|dyn|else|enum|extern|false|final|fn|for|if" - "|" "impl|in|let|loop|macro|match|mod|move|mut|override|priv|pub|ref" - "|" "return|struct|super|trait|true|try|type(of)?|union|uns(afe|ized)" - "|" "use|virtual|where|while|yield" - ")" WB }, - { Tag, .subexp = 2, .pattern = - "(enum|fn|macro_rules!|mod|struct|type|union)" SP1 "(" PATTERN_ID ")" }, - { Macro, .newline = true, - .pattern = "#!?[[][^]]*[]]" }, - { Macro, - .pattern = PATTERN_ID "!" }, - { Interp, - .pattern = "[$]" PATTERN_ID }, - { String, - .pattern = "b?'([^']|[\\]')'" }, - { String, - .pattern = "b?" "\"([^\"]|[\\][\n\"])*\"" }, - { Escape, .parent = SET(String), - .pattern = "[\\]([\"'0\\nrt]|u[{][0-9A-Fa-f]{1,6}[}]|x[0-9A-Fa-f]{2})" }, - { Format, .parent = SET(String), - .pattern = "[{][{]|[{][^{}]*[}]|[}][}]" }, - { String, .parent = ~SET(String), .newline = true, - .pattern = "b?r\"[^\"]*\"" }, - { String, .parent = ~SET(String), .newline = true, - .pattern = "b?r#+\"" "([^\"]|\"[^#])*" "\"+#+" }, - { Comment, .parent = ~SET(String), - .pattern = "//.*" }, - { Comment, .parent = ~SET(String), .newline = true, - .pattern = PATTERN_BC }, - { Todo, .parent = SET(Comment), - .pattern = PATTERN_TODO }, -}; -// }}} - -// sh syntax {{{ -static const struct Syntax ShSyntax[] = { - { Keyword, .subexp = 2, .pattern = WB - "(" "!|case|do|done|elif|else|esac|fi|for|if|in|then|until|while" - "|" "alias|bg|cd|command|false|fc|fg|getopts|jobs|kill|newgrp|pwd|read" - "|" "true|type|ulimit|umask|unalias|wait" - "|" "[.:]|break|continue|eval|exec|exit|export|local|readonly|return" - "|" "set|shift|times|trap|unset" - ")" WB }, - { Tag, .subexp = 2, - .pattern = WB "(" PATTERN_ID ")" BL0 "[(]" BL0 "[)]" }, - { String, .newline = true, .subexp = 1, .pattern = - "<<-?" BL0 "EOF[^\n]*\n" - "(([^\n]|\n\t*[^E]|\n\t*E[^O]|\n\t*EO[^F]|\n\t*EOF[^\n])*)" - "\n\t*EOF\n" }, - { String, .parent = ~SET(String), .newline = true, - .pattern = PATTERN_DQ }, - { Escape, .parent = SET(String), - .pattern = "[\\][\"$\\`]" }, - { Interp, .parent = ~SET(Escape), - .pattern = "[$][(][^)]*[)]" "|" "`[^`]*`" }, - { Interp, .parent = ~SET(Escape), - .pattern = "[$][(][(]([^)]|[)][^)])*[)][)]" }, - { String, .parent = SET(Interp), - .pattern = PATTERN_DQ }, - { Interp, .parent = ~SET(Escape), - .pattern = "[$]([!#$*?@-]|[_[:alnum:]]+|[{][^}]*[}])" }, - { String, .parent = ~SET(Escape), - .pattern = "[\\]." }, - { String, .subexp = 1, .newline = true, .pattern = - "<<-?" BL0 "'EOF'[^\n]*\n" - "(([^\n]|\n\t*[^E]|\n\t*E[^O]|\n\t*EO[^F]|\n\t*EOF[^\n])*)" - "\n\t*EOF\n" }, - { String, .parent = ~SET(String), .newline = true, - .pattern = "'[^']*'" }, - { Comment, .parent = ~SET(String), .subexp = 2, - .pattern = "(^|[[:blank:]]+)(#.*)" }, - { Todo, .parent = SET(Comment), - .pattern = PATTERN_TODO }, -}; -// }}} - -static const struct Language { - const char *name; - const char *pattern; - const struct Syntax *syntax; - size_t len; -} Languages[] = { - { "c", "[.][chlmy]$", CSyntax, ARRAY_LEN(CSyntax) }, - { "diff", "[.](diff|patch)$", DiffSyntax, ARRAY_LEN(DiffSyntax) }, - { "make", "[.]mk$|^Makefile$", MakeSyntax, ARRAY_LEN(MakeSyntax) }, - { "mdoc", "[.][1-9]$", MdocSyntax, ARRAY_LEN(MdocSyntax) }, - { "rust", "[.]rs$", RustSyntax, ARRAY_LEN(RustSyntax) }, - { "sh", "[.](sh|in)$|^[.](profile|shrc)$", ShSyntax, ARRAY_LEN(ShSyntax) }, - { "text", "[.]txt$", NULL, 0 }, -}; - -static regex_t compile(const char *pattern, int flags) { - regex_t regex; - int error = regcomp(®ex, pattern, REG_EXTENDED | flags); - if (!error) return regex; - char buf[256]; - regerror(error, ®ex, buf, sizeof(buf)); - errx(EX_SOFTWARE, "regcomp: %s: %s", buf, pattern); -} - -enum { SubsLen = 8 }; -static void highlight(struct Language lang, enum Class *hi, const char *str) { - for (size_t i = 0; i < lang.len; ++i) { - struct Syntax syn = lang.syntax[i]; - regex_t regex = compile(syn.pattern, syn.newline ? 0 : REG_NEWLINE); - assert(syn.subexp < SubsLen); - assert(syn.subexp <= regex.re_nsub); - regmatch_t subs[SubsLen] = {{0}}; - for (size_t offset = 0; str[offset]; offset += subs[syn.subexp].rm_eo) { - int error = regexec( - ®ex, &str[offset], SubsLen, subs, offset ? REG_NOTBOL : 0 - ); - if (error == REG_NOMATCH) break; - if (error) errx(EX_SOFTWARE, "regexec: %d", error); - regmatch_t *sub = &subs[syn.subexp]; - if (syn.parent && !(syn.parent & SET(hi[offset + sub->rm_so]))) { - sub->rm_eo = sub->rm_so + 1; - continue; - } - for (regoff_t j = sub->rm_so; j < sub->rm_eo; ++j) { - hi[offset + j] = lang.syntax[i].class; - } - } - regfree(®ex); - } -} - -static void check(void) { - for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { - regex_t regex = compile(Languages[i].pattern, REG_NOSUB); - regfree(®ex); - for (size_t j = 0; j < Languages[i].len; ++j) { - struct Syntax syn = Languages[i].syntax[j]; - regex = compile(syn.pattern, 0); - if (syn.subexp >= SubsLen || syn.subexp > regex.re_nsub) { - errx( - EX_SOFTWARE, "subexpression %zu out of bounds: %s", - syn.subexp, syn.pattern - ); - } - regfree(®ex); - } - } -} - -#define ENUM_OPTION \ - X(Anchor, "anchor") \ - X(CSS, "css") \ - X(Document, "document") \ - X(Inline, "inline") \ - X(Monospace, "monospace") \ - X(Tab, "tab") \ - X(Title, "title") - -enum Option { -#define X(option, _) option, - ENUM_OPTION -#undef X - OptionLen, -}; - -static const char *OptionKey[OptionLen + 1] = { -#define X(option, key) [option] = key, - ENUM_OPTION -#undef X - NULL, -}; - -typedef void HeaderFn(const char *opts[]); -typedef void -OutputFn(const char *opts[], enum Class class, const char *str, size_t len); - -// ANSI format {{{ - -enum SGR { - SGRBoldOn = 1, - SGRUnderlineOn = 4, - SGRBoldOff = 22, - SGRUnderlineOff = 24, - SGRBlack = 30, - SGRRed, - SGRGreen, - SGRYellow, - SGRBlue, - SGRMagenta, - SGRCyan, - SGRWhite, - SGRDefault = 39, -}; - -static const enum SGR ANSIStyle[ClassLen][3] = { - [Normal] = { SGRDefault }, - [Keyword] = { SGRWhite }, - [Macro] = { SGRGreen }, - [Tag] = { SGRDefault, SGRUnderlineOn, SGRUnderlineOff }, - [String] = { SGRCyan }, - [Escape] = { SGRDefault }, - [Format] = { SGRCyan, SGRBoldOn, SGRBoldOff }, - [Interp] = { SGRYellow }, - [Comment] = { SGRBlue }, - [Todo] = { SGRBlue, SGRBoldOn, SGRBoldOff }, - [DiffOld] = { SGRRed }, - [DiffNew] = { SGRGreen }, -}; - -static void -ansiOutput(const char *opts[], enum Class class, const char *str, size_t len) { - (void)opts; - if (ANSIStyle[class][1]) { - printf( - "\x1B[%d;%dm%.*s\x1B[%dm", - ANSIStyle[class][0], ANSIStyle[class][1], - (int)len, str, - ANSIStyle[class][2] - ); - } else { - printf("\x1B[%dm%.*s", ANSIStyle[class][0], (int)len, str); - } -} - -// }}} - -// IRC format {{{ - -enum IRC { - IRCWhite, - IRCBlack, - IRCBlue, - IRCGreen, - IRCRed, - IRCBrown, - IRCMagenta, - IRCOrange, - IRCYellow, - IRCLightGreen, - IRCCyan, - IRCLightCyan, - IRCLightBlue, - IRCPink, - IRCGray, - IRCLightGray, - IRCBold = 0x02, - IRCColor = 0x03, - IRCMonospace = 0x11, - IRCUnderline = 0x1F, -}; - -static const enum IRC SGRIRC[] = { - [SGRBoldOn] = IRCBold, - [SGRBoldOff] = IRCBold, - [SGRUnderlineOn] = IRCUnderline, - [SGRUnderlineOff] = IRCUnderline, - [SGRBlack] = IRCBlack, - [SGRRed] = IRCRed, - [SGRGreen] = IRCGreen, - [SGRYellow] = IRCYellow, - [SGRBlue] = IRCBlue, - [SGRMagenta] = IRCMagenta, - [SGRCyan] = IRCCyan, - [SGRWhite] = IRCGray, - [SGRDefault] = 0, -}; - -static void ircHeader(const char *opts[]) { - if (opts[Monospace]) printf("%c", IRCMonospace); -} - -static void -ircOutput(const char *opts[], enum Class class, const char *str, size_t len) { - char cc[3] = ""; - if (ANSIStyle[class][0] != SGRDefault) { - snprintf(cc, sizeof(cc), "%d", SGRIRC[ANSIStyle[class][0]]); - } - // Prevent trailing formatting after newline ... - bool newline = (str[len - 1] == '\n'); - if (ANSIStyle[class][1]) { - printf( - "%c%s%c%.*s%c%s", - IRCColor, cc, SGRIRC[ANSIStyle[class][1]], - (int)(newline ? len - 1 : len), str, - SGRIRC[ANSIStyle[class][2]], - (newline ? "\n" : "") - ); - } else { - // Double-toggle bold to prevent str being interpreted as color. - printf("%c%s%c%c%.*s", IRCColor, cc, IRCBold, IRCBold, (int)len, str); - } - // ... except for monospace, at the beginning of each line. - if (newline && opts[Monospace]) printf("%c", IRCMonospace); -} - -// }}} - -// HTML format {{{ - -static void htmlEscape(const char *str, size_t len) { - while (len) { - size_t run = strcspn(str, "\"&<>"); - if (run > len) run = len; - switch (str[0]) { - break; case '"': run = 1; printf("""); - break; case '&': run = 1; printf("&"); - break; case '<': run = 1; printf("<"); - break; case '>': run = 1; printf(">"); - break; default: printf("%.*s", (int)run, str); - } - str += run; - len -= run; - } -} - -static const char *HTMLStyle[ClassLen] = { - [Keyword] = "color: dimgray;", - [Macro] = "color: green;", - [Tag] = "color: inherit; text-decoration: underline;", - [String] = "color: teal;", - [Format] = "color: teal; font-weight: bold;", - [Interp] = "color: olive;", - [Comment] = "color: navy;", - [Todo] = "color: navy; font-weight: bold;", - [DiffOld] = "color: red;", - [DiffNew] = "color: green;", -}; - -static void htmlTabSize(const char *tab) { - printf("-moz-tab-size: "); - htmlEscape(tab, strlen(tab)); - printf("; tab-size: "); - htmlEscape(tab, strlen(tab)); - printf(";"); -} - -static void htmlHeader(const char *opts[]) { - if (!opts[Document]) goto body; - printf("<!DOCTYPE html>\n<title>"); - if (opts[Title]) htmlEscape(opts[Title], strlen(opts[Title])); - printf("</title>\n"); - if (opts[CSS]) { - printf("<link rel=\"stylesheet\" href=\""); - htmlEscape(opts[CSS], strlen(opts[CSS])); - printf("\">\n"); - } else if (!opts[Inline]) { - printf("<style>\n"); - if (opts[Tab]) { - printf("pre.hi { "); - htmlTabSize(opts[Tab]); - printf(" }\n"); - } - for (enum Class class = 0; class < ClassLen; ++class) { - if (!HTMLStyle[class]) continue; - printf(".hi.%s { %s }\n", ClassName[class], HTMLStyle[class]); - } - printf( - ".hi.%s:target { color: goldenrod; outline: none; }\n", - ClassName[Tag] - ); - printf("</style>\n"); - } -body: - if (opts[Inline] && opts[Tab]) { - printf("<pre class=\"hi\" style=\""); - htmlTabSize(opts[Tab]); - printf("\">"); - } else { - printf("<pre class=\"hi\">"); - } -} - -static void htmlFooter(const char *opts[]) { - (void)opts; - printf("</pre>\n"); -} - -static void htmlAnchor(const char *opts[], const char *str, size_t len) { - if (opts[Inline]) { - printf("<a style=\"%s\" id=\"", HTMLStyle[Tag] ? HTMLStyle[Tag] : ""); - } else { - printf("<a class=\"hi %s\" id=\"", ClassName[Tag]); - } - htmlEscape(str, len); - printf("\" href=\"#"); - htmlEscape(str, len); - printf("\">"); - htmlEscape(str, len); - printf("</a>"); -} - -static void -htmlOutput(const char *opts[], enum Class class, const char *str, size_t len) { - if (opts[Anchor] && class == Tag) { - htmlAnchor(opts, str, len); - return; - } - if (opts[Inline]) { - printf("<span style=\"%s\">", HTMLStyle[class] ? HTMLStyle[class] : ""); - } else { - printf("<span class=\"hi %s\">", ClassName[class]); - } - htmlEscape(str, len); - printf("</span>"); -} - -// }}} - -// Debug format {{{ -static void -debugOutput(const char *opts[], enum Class class, const char *str, size_t len) { - (void)opts; - printf("%s\t\"", ClassName[class]); - while (len) { - size_t run = strcspn(str, "\t\n\"\\"); - if (run > len) run = len; - switch (str[0]) { - break; case '\t': run = 1; printf("\\t"); - break; case '\n': run = 1; printf("\\n"); - break; case '"': run = 1; printf("\\\""); - break; case '\\': run = 1; printf("\\\\"); - break; default: printf("%.*s", (int)run, str); - } - str += run; - len -= run; - } - printf("\"\n"); -} -// }}} - -static const struct Format { - const char *name; - OutputFn *output; - HeaderFn *header; - HeaderFn *footer; -} Formats[] = { - { "ansi", ansiOutput, NULL, NULL }, - { "irc", ircOutput, ircHeader, NULL }, - { "html", htmlOutput, htmlHeader, htmlFooter }, - { "debug", debugOutput, NULL, NULL }, -}; - -static bool findLanguage(struct Language *lang, const char *name) { - for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { - if (strcmp(name, Languages[i].name)) continue; - *lang = Languages[i]; - return true; - } - return false; -} - -static bool matchLanguage(struct Language *lang, const char *name) { - for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { - regex_t regex = compile(Languages[i].pattern, REG_NOSUB); - int error = regexec(®ex, name, 0, NULL, 0); - regfree(®ex); - if (error == REG_NOMATCH) continue; - if (error) errx(EX_SOFTWARE, "regexec: %d", error); - *lang = Languages[i]; - return true; - } - return false; -} - -static bool findFormat(struct Format *format, const char *name) { - for (size_t i = 0; i < ARRAY_LEN(Formats); ++i) { - if (strcmp(name, Formats[i].name)) continue; - *format = Formats[i]; - return true; - } - return false; -} - -int main(int argc, char *argv[]) { - setlocale(LC_CTYPE, ""); - - bool text = false; - const char *name = NULL; - struct Language lang = {0}; - struct Format format = Formats[0]; - const char *opts[OptionLen] = {0}; - - int opt; - while (0 < (opt = getopt(argc, argv, "cf:l:n:o:t"))) { - switch (opt) { - break; case 'c': check(); return EX_OK; - break; case 'f': { - if (!findFormat(&format, optarg)) { - errx(EX_USAGE, "no such format %s", optarg); - } - } - break; case 'l': { - if (!findLanguage(&lang, optarg)) { - errx(EX_USAGE, "no such language %s", optarg); - } - } - break; case 'n': name = optarg; - break; case 'o': { - char *val; - enum Option key; - while (optarg[0]) { - key = getsubopt(&optarg, (char *const *)OptionKey, &val); - if (key >= OptionLen) { - errx(EX_USAGE, "no such option %s", val); - } - opts[key] = (val ? val : ""); - } - } - break; case 't': text = true; - break; default: return EX_USAGE; - } - } - - const char *path = "(stdin)"; - FILE *file = stdin; - if (optind < argc) { - path = argv[optind]; - file = fopen(path, "r"); - if (!file) err(EX_NOINPUT, "%s", path); - } - - if (!name) { - name = strrchr(path, '/'); - name = (name ? &name[1] : path); - } - if (!lang.name && !matchLanguage(&lang, name) && !text) { - errx(EX_USAGE, "cannot infer language for %s", name); - } - if (!opts[Title]) opts[Title] = name; - - struct stat stat; - int error = fstat(fileno(file), &stat); - if (error) err(EX_IOERR, "fstat"); - - size_t cap = (stat.st_mode & S_IFREG ? stat.st_size + 1 : 4096); - char *str = malloc(cap); - if (!str) err(EX_OSERR, "malloc"); - - size_t len = 0, read; - while (0 < (read = fread(&str[len], 1, cap - len - 1, file))) { - len += read; - if (len + 1 < cap) continue; - cap *= 2; - str = realloc(str, cap); - if (!str) err(EX_OSERR, "realloc"); - } - if (ferror(file)) err(EX_IOERR, "fread"); - if (memchr(str, 0, len)) errx(EX_DATAERR, "input is binary"); - str[len] = '\0'; - - enum Class *hi = calloc(len, sizeof(*hi)); - if (!hi) err(EX_OSERR, "calloc"); - - highlight(lang, hi, str); - - size_t run = 0; - if (format.header) format.header(opts); - for (size_t i = 0; i < len; i += run) { - for (run = 1; i + run < len; ++run) { - if (hi[i + run] != hi[i]) break; - if (str[i + run - 1] == '\n') break; - } - format.output(opts, hi[i], &str[i], run); - } - if (format.footer) format.footer(opts); -} diff --git a/bin/man1/hi.1 b/bin/man1/hi.1 deleted file mode 100644 index 517fbab9..00000000 --- a/bin/man1/hi.1 +++ /dev/null @@ -1,173 +0,0 @@ -.Dd December 15, 2019 -.Dt HI 1 -.Os -. -.Sh NAME -.Nm hi -.Nd syntax highlighter -. -.Sh SYNOPSIS -.Nm -.Op Fl t -.Op Fl f Ar format -.Op Fl l Ar lang -.Op Fl n Ar name -.Op Fl o Ar opts -.Op Ar file -.Nm -.Fl c -. -.Sh DESCRIPTION -.Nm -highlights the contents of a -.Ar file -or standard input -and formats it -on standard output. -. -.Pp -The arguments are as follows: -.Bl -tag -width "-f format" -.It Fl c -Compile all regular expressions and exit. -.It Fl f Ar format -Set the output format. -.It Fl l Ar lang -Set the input language. -.It Fl n Ar name -Override the name used -to infer the input language. -.It Fl o Ar opts -Set output format options. -.Ar opts -is a comma-separated list of options. -.It Fl t -Default to -.Cm text -if the input language cannot be inferred. -.El -. -.Ss Output Formats -The default output format is -.Cm ansi . -. -.Bl -tag -width Ds -.It Cm ansi -Output ANSI terminal escape codes. -. -.It Cm irc -Output IRC formatting codes. -.Pp -The options are as follows: -.Bl -tag -width "monospace" -.It Cm monospace -Use the monospace formatting code -introduced by IRCCloud. -.El -. -.It Cm html -Output HTML -.Sy <pre> -with -.Sy <span> -classes. -. -.Pp -The options are as follows: -.Bl -tag -width "title=..." -.It Cm anchor -Output tags -(top-level definition names) -as anchor links. -. -.It Cm css Ns = Ns Ar url -With -.Cm document , -output a -.Sy <link> -element for the external stylesheet -.Ar url . -If unset, -output default styles in a -.Sy <style> -element. -. -.It Cm document -Output an HTML document. -. -.It Cm inline -Output inline -.Sy style -attributes rather than classes. -. -.It Cm tab Ns = Ns Ar n -With -.Cm document -or -.Cm inline , -set the -.Sy tab-size -property to -.Ar n . -. -.It Cm title Ns = Ns Ar ... -With -.Cm document , -set the -.Sy <title> -element text. -The default title is the -.Ar file -name. -.El -.El -. -.Ss Input Languages -If no input language is set with -.Fl l , -it may be inferred from the name set by -.Fl m -or from the provided -.Ar file -name. -. -.Bl -tag -width Ds -.It Cm c -The C11 language. -. -.It Cm diff -The output of -.Xr diff 1 -with the -.Fl u -flag. -. -.It Cm make -The portable subset of -.Xr make 1 . -Variable substitution supports -one level of nesting with the same delimiter. -. -.It Cm mdoc -The -.Xr mdoc 7 -language. -. -.It Cm rust -The Rust 2018 language. -Nested raw strings and block comments -are not highlighted correctly. -. -.It Cm sh -The POSIX -.Xr sh 1 -language. -Here-documents are correctly highlighted -only with a delimiter of -.Ql EOF . -Arbitrarily nested strings and command substitutions -are not highlighted correctly. -. -.It Cm text -Plain text. -.El |