diff options
Diffstat (limited to '')
-rw-r--r-- | bin/hi.c | 766 |
1 files changed, 766 insertions, 0 deletions
diff --git a/bin/hi.c b/bin/hi.c new file mode 100644 index 00000000..23d69574 --- /dev/null +++ b/bin/hi.c @@ -0,0 +1,766 @@ +/* vim: set foldmethod=marker foldlevel=0: */ +/* Copyright (C) 2019 June McEnroe <june@causal.agency> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <err.h> +#include <locale.h> +#include <regex.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sysexits.h> +#include <unistd.h> + +#define ARRAY_LEN(a) (sizeof(a) / sizeof(a[0])) + +typedef unsigned Set; +#define SET(x) ((Set)1 << (x)) + +#define ENUM_CLASS \ + X(Normal) \ + X(Keyword) \ + X(Macro) \ + X(Tag) \ + X(String) \ + X(Escape) \ + X(Format) \ + X(Interp) \ + X(Comment) \ + X(Todo) \ + X(DiffOld) \ + X(DiffNew) + +enum Class { +#define X(class) class, + ENUM_CLASS +#undef X + ClassLen, +}; + +static const char *ClassName[ClassLen] = { +#define X(class) [class] = #class, + ENUM_CLASS +#undef X +}; + +struct Syntax { + enum Class class; + Set parent; + bool newline; + size_t subexp; + const char *pattern; +}; + +#define WB "(^|[^_[:alnum:]]|\n)" +#define BL0 "[[:blank:]]*" +#define BL1 "[[:blank:]]+" +#define SP0 "[[:space:]]*" +#define SP1 "[[:space:]]+" +#define PATTERN_ID "[_[:alpha:]][_[:alnum:]]*" +#define PATTERN_SQ "'([^']|[\\]')*'" +#define PATTERN_DQ "\"([^\"]|[\\]\")*\"" +#define PATTERN_BC "/[*]" "([^*]|[*][^/])*" "[*]+/" +#define PATTERN_TODO "FIXME|TODO|XXX" + +// C syntax {{{ +static const struct Syntax CSyntax[] = { + { Keyword, .subexp = 2, .pattern = WB + "(" "auto|extern|register|static|(_T|t)hread_local|typedef" + "|" "_Atomic|const|restrict|volatile" + "|" "inline|(_N|n)oreturn" + "|" "(_A|a)lignas" + "|" "enum|struct|union" + "|" "do|else|for|if|switch|while" + "|" "break|case|continue|default|goto|return" + ")" WB }, + { Macro, + .pattern = "^" BL0 "#(.|[\\]\n)*" }, + { Tag, .parent = SET(Macro), .subexp = 1, + .pattern = "define" BL1 "(" PATTERN_ID ")" "[(]" }, + { Tag, .subexp = 2, + .pattern = "(enum|struct|union)" SP1 "(" PATTERN_ID ")" SP0 "[{]" }, + { Tag, .parent = ~SET(Keyword), .newline = true, .subexp = 1, + .pattern = "(" PATTERN_ID ")" SP0 "[(][^()]*[)]" SP0 "[{]" }, + { Tag, .newline = true, .subexp = 3, .pattern = + "(static|typedef)" SP1 + "(" "(" PATTERN_ID ")" SP0 + "(" "[*]" "|" "[[][^]]*[]]" "|" "[{][^}]*[}]" "|" SP0 ")*" ")+" }, + { String, .parent = SET(Macro), .subexp = 2, + .pattern = "(include|import)" BL0 "(<[^>]*>)" }, + { String, + .pattern = "[LUu]?" PATTERN_SQ }, + { String, .parent = ~SET(String), + .pattern = "([LU]|u8?)?" PATTERN_DQ }, + { Escape, .parent = SET(String), + .pattern = "[\\]([\"'?\\abfnrtv]|[0-7]{1,3}|x[0-9A-Fa-f]+)" }, + { Escape, .parent = SET(String), + .pattern = "[\\](U[0-9A-Fa-f]{8}|u[0-9A-Fa-f]{4})" }, + { Format, .parent = SET(String), .pattern = + "%%|%[ #+-0]*" // flags + "([*]|[0-9]+)?" // field width + "([.]([*]|[0-9]+))?" // precision + "([Lhjltz]|hh|ll)?" // length modifier + "[AEFGXacdefginopsux]" // format specifier + }, + { Comment, .parent = ~SET(String), + .pattern = "//(.|[\\]\n)*" }, + { Comment, .parent = ~SET(String), .newline = true, + .pattern = PATTERN_BC }, + { Todo, .parent = SET(Comment), + .pattern = PATTERN_TODO }, +}; +// }}} + +// diff syntax {{{ +static const struct Syntax DiffSyntax[] = { + { Keyword, .pattern = "^[^ ].*" }, + { Comment, .pattern = "^@@.*" }, + { DiffOld, .pattern = "^[-].*" }, + { DiffNew, .pattern = "^[+].*" }, +}; +// }}} + +// make syntax {{{ +#define MAKE_TARGET "[-./_[:alnum:]]+" +static const struct Syntax MakeSyntax[] = { + { Keyword, .subexp = 2, + .pattern = WB "([.](PHONY|PRECIOUS|SUFFIXES))" WB }, + { Macro, + .pattern = "^ *-?include" }, + { Tag, .parent = ~SET(Keyword), .subexp = 1, .pattern = + "(" MAKE_TARGET ")" "(" BL1 MAKE_TARGET ")*" BL0 ":([^=]|$)" }, + { String, .subexp = 1, + .pattern = "[._[:alnum:]]+" BL0 "[!+:?]?=" BL0 "(.*)" }, + { Normal, + .pattern = "^\t.*" }, + { String, + .pattern = PATTERN_SQ }, + { String, + .pattern = PATTERN_DQ }, + { Interp, + .pattern = "[$]." }, + // Support one level of nesting with the same delimiter. + { Interp, + .pattern = "[$][(](" "[^$)]" "|" "[$]." "|" "[$][(][^)]*[)]" ")*[)]" }, + { Interp, + .pattern = "[$][{](" "[^$}]" "|" "[$]." "|" "[$][{][^}]*[}]" ")*[}]" }, + { Escape, + .pattern = "[$][$]" }, + { Comment, .parent = ~SET(String), + .pattern = "#.*" }, + { Todo, .parent = SET(Comment), + .pattern = PATTERN_TODO }, +}; +// }}} + +// mdoc syntax {{{ +static const struct Syntax MdocSyntax[] = { + { Keyword, .subexp = 2, .pattern = WB + "(" "D[dt]|N[dm]|Os" + "|" "S[hsx]|[LP]p|Xr" + "|" "%[ABCDIJNOPQRTUV]|[BE][dl]|D[1l]|It|Ql|R[es]|Ta" + "|" "Ap|[BE]k|Ns|Pf|Sm" + "|" "Ar|Cm|Ev|Fl|O[cop]|Pa" + "|" "Dv|Er|F[acdnot]|In|Lb|V[at]" + "|" "A[dn]|Cd|Lk|M[st]" + "|" "[BE]f|Em|Li|No|Sy" + "|" "(Br|[ABDPQS])[coq]|E[co]" + "|" "At|(Bs|[BDEFNO])x|Rv|St" + ")" WB }, + { Tag, .subexp = 1, + .pattern = "^[.]S[hs]" BL1 "(.+)" }, + { String, + .pattern = PATTERN_DQ }, + { Normal, + .pattern = "^[^.].*" }, + { String, + .pattern = "[\\](" "." "|" "[(].{2}" "|" "[[][^]]*[]]" ")" }, + { Comment, + .pattern = "^[.][\\]\".*" }, + { Todo, .parent = SET(Comment), + .pattern = PATTERN_TODO }, +}; +// }}} + +// Rust syntax {{{ +static const struct Syntax RustSyntax[] = { + { Keyword, .subexp = 2, .pattern = WB + "(" "'?static|[Ss]elf|abstract|as|async|await|become|box|break|const" + "|" "continue|crate|do|dyn|else|enum|extern|false|final|fn|for|if" + "|" "impl|in|let|loop|macro|match|mod|move|mut|override|priv|pub|ref" + "|" "return|struct|super|trait|true|try|type(of)?|union|uns(afe|ized)" + "|" "use|virtual|where|while|yield" + ")" WB }, + { Tag, .subexp = 2, .pattern = + "(enum|fn|macro_rules!|mod|struct|type|union)" SP1 "(" PATTERN_ID ")" }, + { Macro, .newline = true, + .pattern = "#!?[[][^]]*[]]" }, + { Macro, + .pattern = PATTERN_ID "!" }, + { Interp, + .pattern = "[$]" PATTERN_ID }, + { String, + .pattern = "b?'([^']|[\\]')'" }, + { String, + .pattern = "b?" "\"([^\"]|[\\][\n\"])*\"" }, + { Escape, .parent = SET(String), + .pattern = "[\\]([\"'0\\nrt]|u[{][0-9A-Fa-f]{1,6}[}]|x[0-9A-Fa-f]{2})" }, + { Format, .parent = SET(String), + .pattern = "[{][{]|[{][^{}]*[}]|[}][}]" }, + { String, .parent = ~SET(String), .newline = true, + .pattern = "b?r\"[^\"]*\"" }, + { String, .parent = ~SET(String), .newline = true, + .pattern = "b?r#+\"" "([^\"]|\"[^#])*" "\"+#+" }, + { Comment, .parent = ~SET(String), + .pattern = "//.*" }, + { Comment, .parent = ~SET(String), .newline = true, + .pattern = PATTERN_BC }, + { Todo, .parent = SET(Comment), + .pattern = PATTERN_TODO }, +}; +// }}} + +// sh syntax {{{ +static const struct Syntax ShSyntax[] = { + { Keyword, .subexp = 2, .pattern = WB + "(" "!|case|do|done|elif|else|esac|fi|for|if|in|then|until|while" + "|" "alias|bg|cd|command|false|fc|fg|getopts|jobs|kill|newgrp|pwd|read" + "|" "true|type|ulimit|umask|unalias|wait" + "|" "[.:]|break|continue|eval|exec|exit|export|local|readonly|return" + "|" "set|shift|times|trap|unset" + ")" WB }, + { Tag, .subexp = 2, + .pattern = WB "(" PATTERN_ID ")" BL0 "[(]" BL0 "[)]" }, + { String, .newline = true, .subexp = 1, .pattern = + "<<-?" BL0 "EOF[^\n]*\n" + "(([^\n]|\n\t*[^E]|\n\t*E[^O]|\n\t*EO[^F]|\n\t*EOF[^\n])*)" + "\n\t*EOF\n" }, + { String, .parent = ~SET(String), .newline = true, + .pattern = PATTERN_DQ }, + { Escape, .parent = SET(String), + .pattern = "[\\][\"$\\`]" }, + { Interp, .parent = ~SET(Escape), + .pattern = "[$][(][^)]*[)]" "|" "`[^`]*`" }, + { Interp, .parent = ~SET(Escape), + .pattern = "[$][(][(]([^)]|[)][^)])*[)][)]" }, + { String, .parent = SET(Interp), + .pattern = PATTERN_DQ }, + { Interp, .parent = ~SET(Escape), + .pattern = "[$]([!#$*?@-]|[_[:alnum:]]+|[{][^}]*[}])" }, + { String, .parent = ~SET(Escape), + .pattern = "[\\]." }, + { String, .subexp = 1, .newline = true, .pattern = + "<<-?" BL0 "'EOF'[^\n]*\n" + "(([^\n]|\n\t*[^E]|\n\t*E[^O]|\n\t*EO[^F]|\n\t*EOF[^\n])*)" + "\n\t*EOF\n" }, + { String, .parent = ~SET(String), .newline = true, + .pattern = "'[^']*'" }, + { Comment, .parent = ~SET(String), .subexp = 2, + .pattern = "(^|[[:blank:]]+)(#.*)" }, + { Todo, .parent = SET(Comment), + .pattern = PATTERN_TODO }, +}; +// }}} + +static const struct Language { + const char *name; + const char *pattern; + const struct Syntax *syntax; + size_t len; +} Languages[] = { + { "c", "[.][chlmy]$", CSyntax, ARRAY_LEN(CSyntax) }, + { "diff", "[.](diff|patch)$", DiffSyntax, ARRAY_LEN(DiffSyntax) }, + { "make", "[.]mk$|^Makefile$", MakeSyntax, ARRAY_LEN(MakeSyntax) }, + { "mdoc", "[.][1-9]$", MdocSyntax, ARRAY_LEN(MdocSyntax) }, + { "rust", "[.]rs$", RustSyntax, ARRAY_LEN(RustSyntax) }, + { "sh", "[.](sh|in)$|^[.](profile|shrc)$", ShSyntax, ARRAY_LEN(ShSyntax) }, + { "text", "[.]txt$", NULL, 0 }, +}; + +static regex_t compile(const char *pattern, int flags) { + regex_t regex; + int error = regcomp(®ex, pattern, REG_EXTENDED | flags); + if (!error) return regex; + char buf[256]; + regerror(error, ®ex, buf, sizeof(buf)); + errx(EX_SOFTWARE, "regcomp: %s: %s", buf, pattern); +} + +enum { SubsLen = 8 }; +static void highlight(struct Language lang, enum Class *hi, const char *str) { + for (size_t i = 0; i < lang.len; ++i) { + struct Syntax syn = lang.syntax[i]; + regex_t regex = compile(syn.pattern, syn.newline ? 0 : REG_NEWLINE); + assert(syn.subexp < SubsLen); + assert(syn.subexp <= regex.re_nsub); + regmatch_t subs[SubsLen] = {{0}}; + for (size_t offset = 0; str[offset]; offset += subs[syn.subexp].rm_eo) { + int error = regexec( + ®ex, &str[offset], SubsLen, subs, offset ? REG_NOTBOL : 0 + ); + if (error == REG_NOMATCH) break; + if (error) errx(EX_SOFTWARE, "regexec: %d", error); + regmatch_t *sub = &subs[syn.subexp]; + if (syn.parent && !(syn.parent & SET(hi[offset + sub->rm_so]))) { + sub->rm_eo = sub->rm_so + 1; + continue; + } + for (regoff_t j = sub->rm_so; j < sub->rm_eo; ++j) { + hi[offset + j] = lang.syntax[i].class; + } + } + regfree(®ex); + } +} + +static void check(void) { + for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { + regex_t regex = compile(Languages[i].pattern, REG_NOSUB); + regfree(®ex); + for (size_t j = 0; j < Languages[i].len; ++j) { + struct Syntax syn = Languages[i].syntax[j]; + regex = compile(syn.pattern, 0); + if (syn.subexp >= SubsLen || syn.subexp > regex.re_nsub) { + errx( + EX_SOFTWARE, "subexpression %zu out of bounds: %s", + syn.subexp, syn.pattern + ); + } + regfree(®ex); + } + } +} + +#define ENUM_OPTION \ + X(Anchor, "anchor") \ + X(CSS, "css") \ + X(Document, "document") \ + X(Inline, "inline") \ + X(Monospace, "monospace") \ + X(Tab, "tab") \ + X(Title, "title") + +enum Option { +#define X(option, _) option, + ENUM_OPTION +#undef X + OptionLen, +}; + +static const char *OptionKey[OptionLen + 1] = { +#define X(option, key) [option] = key, + ENUM_OPTION +#undef X + NULL, +}; + +typedef void HeaderFn(const char *opts[]); +typedef void +OutputFn(const char *opts[], enum Class class, const char *str, size_t len); + +// ANSI format {{{ + +enum SGR { + SGRBoldOn = 1, + SGRUnderlineOn = 4, + SGRBoldOff = 22, + SGRUnderlineOff = 24, + SGRBlack = 30, + SGRRed, + SGRGreen, + SGRYellow, + SGRBlue, + SGRMagenta, + SGRCyan, + SGRWhite, + SGRDefault = 39, +}; + +static const enum SGR ANSIStyle[ClassLen][3] = { + [Normal] = { SGRDefault }, + [Keyword] = { SGRWhite }, + [Macro] = { SGRGreen }, + [Tag] = { SGRDefault, SGRUnderlineOn, SGRUnderlineOff }, + [String] = { SGRCyan }, + [Escape] = { SGRDefault }, + [Format] = { SGRCyan, SGRBoldOn, SGRBoldOff }, + [Interp] = { SGRYellow }, + [Comment] = { SGRBlue }, + [Todo] = { SGRBlue, SGRBoldOn, SGRBoldOff }, + [DiffOld] = { SGRRed }, + [DiffNew] = { SGRGreen }, +}; + +static void +ansiOutput(const char *opts[], enum Class class, const char *str, size_t len) { + (void)opts; + if (ANSIStyle[class][1]) { + printf( + "\x1B[%d;%dm%.*s\x1B[%dm", + ANSIStyle[class][0], ANSIStyle[class][1], + (int)len, str, + ANSIStyle[class][2] + ); + } else { + printf("\x1B[%dm%.*s", ANSIStyle[class][0], (int)len, str); + } +} + +// }}} + +// IRC format {{{ + +enum IRC { + IRCWhite, + IRCBlack, + IRCBlue, + IRCGreen, + IRCRed, + IRCBrown, + IRCMagenta, + IRCOrange, + IRCYellow, + IRCLightGreen, + IRCCyan, + IRCLightCyan, + IRCLightBlue, + IRCPink, + IRCGray, + IRCLightGray, + IRCBold = 0x02, + IRCColor = 0x03, + IRCMonospace = 0x11, + IRCUnderline = 0x1F, +}; + +static const enum IRC SGRIRC[] = { + [SGRBoldOn] = IRCBold, + [SGRBoldOff] = IRCBold, + [SGRUnderlineOn] = IRCUnderline, + [SGRUnderlineOff] = IRCUnderline, + [SGRBlack] = IRCBlack, + [SGRRed] = IRCRed, + [SGRGreen] = IRCGreen, + [SGRYellow] = IRCYellow, + [SGRBlue] = IRCBlue, + [SGRMagenta] = IRCMagenta, + [SGRCyan] = IRCCyan, + [SGRWhite] = IRCGray, + [SGRDefault] = 0, +}; + +static void ircHeader(const char *opts[]) { + if (opts[Monospace]) printf("%c", IRCMonospace); +} + +static void +ircOutput(const char *opts[], enum Class class, const char *str, size_t len) { + char cc[3] = ""; + if (ANSIStyle[class][0] != SGRDefault) { + snprintf(cc, sizeof(cc), "%d", SGRIRC[ANSIStyle[class][0]]); + } + // Prevent trailing formatting after newline ... + bool newline = (str[len - 1] == '\n'); + if (ANSIStyle[class][1]) { + printf( + "%c%s%c%.*s%c%s", + IRCColor, cc, SGRIRC[ANSIStyle[class][1]], + (int)(newline ? len - 1 : len), str, + SGRIRC[ANSIStyle[class][2]], + (newline ? "\n" : "") + ); + } else { + // Double-toggle bold to prevent str being interpreted as color. + printf("%c%s%c%c%.*s", IRCColor, cc, IRCBold, IRCBold, (int)len, str); + } + // ... except for monospace, at the beginning of each line. + if (newline && opts[Monospace]) printf("%c", IRCMonospace); +} + +// }}} + +// HTML format {{{ + +static void htmlEscape(const char *str, size_t len) { + while (len) { + size_t run = strcspn(str, "\"&<>"); + if (run > len) run = len; + switch (str[0]) { + break; case '"': run = 1; printf("""); + break; case '&': run = 1; printf("&"); + break; case '<': run = 1; printf("<"); + break; case '>': run = 1; printf(">"); + break; default: printf("%.*s", (int)run, str); + } + str += run; + len -= run; + } +} + +static const char *HTMLStyle[ClassLen] = { + [Keyword] = "color: dimgray;", + [Macro] = "color: green;", + [Tag] = "color: inherit; text-decoration: underline;", + [String] = "color: teal;", + [Format] = "color: teal; font-weight: bold;", + [Interp] = "color: olive;", + [Comment] = "color: navy;", + [Todo] = "color: navy; font-weight: bold;", + [DiffOld] = "color: red;", + [DiffNew] = "color: green;", +}; + +static void htmlTabSize(const char *tab) { + printf("-moz-tab-size: "); + htmlEscape(tab, strlen(tab)); + printf("; tab-size: "); + htmlEscape(tab, strlen(tab)); + printf(";"); +} + +static void htmlHeader(const char *opts[]) { + if (!opts[Document]) goto body; + printf("<!DOCTYPE html>\n<title>"); + if (opts[Title]) htmlEscape(opts[Title], strlen(opts[Title])); + printf("</title>\n"); + if (opts[CSS]) { + printf("<link rel=\"stylesheet\" href=\""); + htmlEscape(opts[CSS], strlen(opts[CSS])); + printf("\">\n"); + } else if (!opts[Inline]) { + printf("<style>\n"); + if (opts[Tab]) { + printf("pre.hi { "); + htmlTabSize(opts[Tab]); + printf(" }\n"); + } + for (enum Class class = 0; class < ClassLen; ++class) { + if (!HTMLStyle[class]) continue; + printf(".hi.%s { %s }\n", ClassName[class], HTMLStyle[class]); + } + printf( + ".hi.%s:target { color: goldenrod; outline: none; }\n", + ClassName[Tag] + ); + printf("</style>\n"); + } +body: + if (opts[Inline] && opts[Tab]) { + printf("<pre class=\"hi\" style=\""); + htmlTabSize(opts[Tab]); + printf("\">"); + } else { + printf("<pre class=\"hi\">"); + } +} + +static void htmlFooter(const char *opts[]) { + (void)opts; + printf("</pre>\n"); +} + +static void htmlAnchor(const char *opts[], const char *str, size_t len) { + if (opts[Inline]) { + printf("<a style=\"%s\" id=\"", HTMLStyle[Tag] ? HTMLStyle[Tag] : ""); + } else { + printf("<a class=\"hi %s\" id=\"", ClassName[Tag]); + } + htmlEscape(str, len); + printf("\" href=\"#"); + htmlEscape(str, len); + printf("\">"); + htmlEscape(str, len); + printf("</a>"); +} + +static void +htmlOutput(const char *opts[], enum Class class, const char *str, size_t len) { + if (opts[Anchor] && class == Tag) { + htmlAnchor(opts, str, len); + return; + } + if (opts[Inline]) { + printf("<span style=\"%s\">", HTMLStyle[class] ? HTMLStyle[class] : ""); + } else { + printf("<span class=\"hi %s\">", ClassName[class]); + } + htmlEscape(str, len); + printf("</span>"); +} + +// }}} + +// Debug format {{{ +static void +debugOutput(const char *opts[], enum Class class, const char *str, size_t len) { + (void)opts; + printf("%s\t\"", ClassName[class]); + while (len) { + size_t run = strcspn(str, "\t\n\"\\"); + if (run > len) run = len; + switch (str[0]) { + break; case '\t': run = 1; printf("\\t"); + break; case '\n': run = 1; printf("\\n"); + break; case '"': run = 1; printf("\\\""); + break; case '\\': run = 1; printf("\\\\"); + break; default: printf("%.*s", (int)run, str); + } + str += run; + len -= run; + } + printf("\"\n"); +} +// }}} + +static const struct Format { + const char *name; + OutputFn *output; + HeaderFn *header; + HeaderFn *footer; +} Formats[] = { + { "ansi", ansiOutput, NULL, NULL }, + { "irc", ircOutput, ircHeader, NULL }, + { "html", htmlOutput, htmlHeader, htmlFooter }, + { "debug", debugOutput, NULL, NULL }, +}; + +static bool findLanguage(struct Language *lang, const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { + if (strcmp(name, Languages[i].name)) continue; + *lang = Languages[i]; + return true; + } + return false; +} + +static bool matchLanguage(struct Language *lang, const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Languages); ++i) { + regex_t regex = compile(Languages[i].pattern, REG_NOSUB); + int error = regexec(®ex, name, 0, NULL, 0); + regfree(®ex); + if (error == REG_NOMATCH) continue; + if (error) errx(EX_SOFTWARE, "regexec: %d", error); + *lang = Languages[i]; + return true; + } + return false; +} + +static bool findFormat(struct Format *format, const char *name) { + for (size_t i = 0; i < ARRAY_LEN(Formats); ++i) { + if (strcmp(name, Formats[i].name)) continue; + *format = Formats[i]; + return true; + } + return false; +} + +int main(int argc, char *argv[]) { + setlocale(LC_CTYPE, ""); + + bool text = false; + const char *name = NULL; + struct Language lang = {0}; + struct Format format = Formats[0]; + const char *opts[OptionLen] = {0}; + + int opt; + while (0 < (opt = getopt(argc, argv, "cf:l:n:o:t"))) { + switch (opt) { + break; case 'c': check(); return EX_OK; + break; case 'f': { + if (!findFormat(&format, optarg)) { + errx(EX_USAGE, "no such format %s", optarg); + } + } + break; case 'l': { + if (!findLanguage(&lang, optarg)) { + errx(EX_USAGE, "no such language %s", optarg); + } + } + break; case 'n': name = optarg; + break; case 'o': { + char *val; + enum Option key; + while (optarg[0]) { + key = getsubopt(&optarg, (char *const *)OptionKey, &val); + if (key >= OptionLen) { + errx(EX_USAGE, "no such option %s", val); + } + opts[key] = (val ? val : ""); + } + } + break; case 't': text = true; + break; default: return EX_USAGE; + } + } + + const char *path = "(stdin)"; + FILE *file = stdin; + if (optind < argc) { + path = argv[optind]; + file = fopen(path, "r"); + if (!file) err(EX_NOINPUT, "%s", path); + } + + if (!name) { + name = strrchr(path, '/'); + name = (name ? &name[1] : path); + } + if (!lang.name && !matchLanguage(&lang, name) && !text) { + errx(EX_USAGE, "cannot infer language for %s", name); + } + if (!opts[Title]) opts[Title] = name; + + struct stat stat; + int error = fstat(fileno(file), &stat); + if (error) err(EX_IOERR, "fstat"); + + size_t cap = (stat.st_mode & S_IFREG ? stat.st_size + 1 : 4096); + char *str = malloc(cap); + if (!str) err(EX_OSERR, "malloc"); + + size_t len = 0, read; + while (0 < (read = fread(&str[len], 1, cap - len - 1, file))) { + len += read; + if (len + 1 < cap) continue; + cap *= 2; + str = realloc(str, cap); + if (!str) err(EX_OSERR, "realloc"); + } + if (ferror(file)) err(EX_IOERR, "fread"); + if (memchr(str, 0, len)) errx(EX_DATAERR, "input is binary"); + str[len] = '\0'; + + enum Class *hi = calloc(len, sizeof(*hi)); + if (!hi) err(EX_OSERR, "calloc"); + + highlight(lang, hi, str); + + size_t run = 0; + if (format.header) format.header(opts); + for (size_t i = 0; i < len; i += run) { + for (run = 1; i + run < len; ++run) { + if (hi[i + run] != hi[i]) break; + if (str[i + run - 1] == '\n') break; + } + format.output(opts, hi[i], &str[i], run); + } + if (format.footer) format.footer(opts); +} |