From 6c4a03e8fb0d6df2e244177500f4820efc6f7876 Mon Sep 17 00:00:00 2001 From: June McEnroe Date: Mon, 18 Jan 2021 20:24:30 -0500 Subject: Allow matching lexers using first input line Using ungetc(3) rather than rewind(3) to support piped input. --- bin/hilex.c | 42 ++++++++++++++++++++++++++++++++++-------- bin/man1/hilex.1 | 9 ++++++--- 2 files changed, 40 insertions(+), 11 deletions(-) (limited to 'bin') diff --git a/bin/hilex.c b/bin/hilex.c index 5bf7e9a6..6574b1b2 100644 --- a/bin/hilex.c +++ b/bin/hilex.c @@ -47,12 +47,13 @@ static const struct Lexer LexText = { yylex, &yyin, &yytext }; static const struct { const struct Lexer *lexer; const char *name; - const char *pattern; + const char *namePatt; + const char *linePatt; } Lexers[] = { - { &LexC, "c", "[.][chlmy]$" }, - { &LexMake, "make", "[.]mk$|^Makefile$" }, - { &LexMdoc, "mdoc", "[.][1-9]$" }, - { &LexText, "text", "[.]txt$" }, + { &LexC, "c", "[.][chlmy]$", NULL }, + { &LexMake, "make", "[.]mk$|^Makefile$", NULL }, + { &LexMdoc, "mdoc", "[.][1-9]$", "^[.]Dd" }, + { &LexText, "text", "[.]txt$", NULL }, }; static const struct Lexer *parseLexer(const char *name) { @@ -62,17 +63,42 @@ static const struct Lexer *parseLexer(const char *name) { errx(EX_USAGE, "unknown lexer %s", name); } -static const struct Lexer *matchLexer(const char *name) { +static void ungets(const char *str, FILE *file) { + size_t len = strlen(str); + for (size_t i = len-1; i < len; --i) { + int ch = ungetc(str[i], file); + if (ch == EOF) errx(EX_IOERR, "cannot push back string"); + } +} + +static const struct Lexer *matchLexer(const char *name, FILE *file) { + char buf[256]; regex_t regex; for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { int error = regcomp( - ®ex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB + ®ex, Lexers[i].namePatt, REG_EXTENDED | REG_NOSUB ); assert(!error); error = regexec(®ex, name, 0, NULL, 0); regfree(®ex); if (!error) return Lexers[i].lexer; } + char *line = fgets(buf, sizeof(buf), file); + if (!line) return NULL; + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + if (!Lexers[i].linePatt) continue; + int error = regcomp( + ®ex, Lexers[i].linePatt, REG_EXTENDED | REG_NOSUB + ); + assert(!error); + error = regexec(®ex, line, 0, NULL, 0); + regfree(®ex); + if (!error) { + ungets(line, file); + return Lexers[i].lexer; + } + } + ungets(line, file); return NULL; } @@ -349,7 +375,7 @@ int main(int argc, char *argv[]) { } } if (!opts[Title]) opts[Title] = name; - if (!lexer) lexer = matchLexer(name); + if (!lexer) lexer = matchLexer(name, file); if (!lexer && text) lexer = &LexText; if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name); diff --git a/bin/man1/hilex.1 b/bin/man1/hilex.1 index ace0b8cf..a151476a 100644 --- a/bin/man1/hilex.1 +++ b/bin/man1/hilex.1 @@ -1,4 +1,4 @@ -.Dd January 15, 2021 +.Dd January 18, 2021 .Dt HILEX 1 .Os . @@ -40,7 +40,8 @@ Set the input lexer. See .Sx Input Lexers . The default input lexer is inferred from -.Ar name . +.Ar name +or the first line of input. . .It Fl n Ar name Set the name used to infer the input lexer. @@ -179,7 +180,9 @@ The language. Inferred for .Pa *.[1-9] -files. +files +and files starting with +.Dq .Dd . . .It Cm text Plain text. -- cgit 1.4.1 Oof. 2020-02-13Clean up manual lintsJune McEnroe Wish I could use mandoc -T lint as part of the dev target but it complains about missing referenced pages without a way to turn that off that I can find. 2020-02-13Add -g for generating certificatesJune McEnroe Copied from pounce. 2020-02-13Explicitly close the TLS connectionJune McEnroe Just to be nice. 2020-02-12Add sandman wrapperJune McEnroe 2020-02-12Document scripts in READMEJune McEnroe 2020-02-12Add -N flag for notificationsJune McEnroe 2020-02-12Mark and unmark on uiHide, uiShowJune McEnroe 2020-02-12Hide debug prompt if buffer starts with /June McEnroe 2020-02-12Only exit on errorneous nick during registrationJune McEnroe 2020-02-12Add 378 to list of WHOIS responsesJune McEnroe It's the one (from freenode anyway) that tells you where you're connecting from. 2020-02-13Implement source address selectionmultiplexd This commit adds a '-S' command line option and a "bind" configuration file option to specify the source address to bind to when connecting to the IRC server. 2020-02-12Add /listJune McEnroe 2020-02-12Simplify transpose swapJune McEnroe 2020-02-12Add C-t transposeJune McEnroe Also in emacs, weechat. 2020-02-12Add C-v and M-vJune McEnroe I figure there should be some way to scroll without keypad, and apparently this is what emacs offers... 2020-02-12Allow for arguments to open/copy utilitiesJune McEnroe 2020-02-12Handle RPL_AWAYJune McEnroe 2020-02-11Support monochromatic terminalsJune McEnroe Oops, division by zero! 2020-02-11Add .gz to chroot-man scriptJune McEnroe 2020-02-11Add -R restricted flagJune McEnroe 2020-02-11Add chroot targetJune McEnroe 2020-02-11Exit focus and paste modes on err exitJune McEnroe 2020-02-11Add startup GPLv3 note and URLJune McEnroe I am a degenerate. 2020-02-11Make sure -D_GNU_SOURCE ends up in CFLAGS on LinuxJune McEnroe 2020-02-11Add note about setting PKG_CONFIG_PATHJune McEnroe 2020-02-11Rename query ID on nick changeJune McEnroe 2020-02-11Call completeClear when closing a windowJune McEnroe 2020-02-11Don't insert color codes for non-mentionsJune McEnroe 2020-02-11Take first two words in colorMentionsJune McEnroe This lets phrases like "hi june" get colored, but still doesn't get carried away. 2020-02-11Use time_t for save signatureJune McEnroe It's actually more likely to be 64-bit than size_t anyway, and it eliminates some helper functions. Also don't error when reading an empty save file. 2020-02-11Set self.nick to * initiallyJune McEnroe Allows removing a bunch of checks that self.nick is set, and it's what the server usually calls you before registration. Never highlight notices as mentions. 2020-02-11Define ColorCap instead of hardcoding 100June McEnroe 2020-02-11Move hash to top of chat.hJune McEnroe 2020-02-11Move base64 out of chat.hJune McEnroe 2020-02-11Move XDG_SUBDIR out of chat.hJune McEnroe 2020-02-11Fix whois idle unit calculationJune McEnroe Rookie mistake. 2020-02-11Cast towupper to wchar_tJune McEnroe For some reason it takes and returns wint_t... 2020-02-11Cast set but unused variables to voidJune McEnroe 2020-02-11Declare strlcatJune McEnroe 2020-02-11Check if VDSUSP existsJune McEnroe 2020-02-11Fix completeReplace iterationJune McEnroe 2020-02-11Use pkg(8) to configure on FreeBSDJune McEnroe 2020-02-11Remove legacy codeJune McEnroe 2020-02-11Add INSTALLING section to READMEJune McEnroe