diff options
author | June McEnroe <june@causal.agency> | 2021-01-18 20:24:30 -0500 |
---|---|---|
committer | June McEnroe <june@causal.agency> | 2021-01-18 21:33:21 -0500 |
commit | f1ffecec87e5734c50ac4a9cb9ddd93611ea8c47 (patch) | |
tree | abded7c8db904485e44023280ea68244fb6eb1cd /bin | |
parent | Add mdate script (diff) | |
download | src-f1ffecec87e5734c50ac4a9cb9ddd93611ea8c47.tar.gz src-f1ffecec87e5734c50ac4a9cb9ddd93611ea8c47.zip |
Allow matching lexers using first input line
Using ungetc(3) rather than rewind(3) to support piped input.
Diffstat (limited to '')
-rw-r--r-- | bin/hilex.c | 42 | ||||
-rw-r--r-- | bin/man1/hilex.1 | 9 |
2 files changed, 40 insertions, 11 deletions
diff --git a/bin/hilex.c b/bin/hilex.c index 133336bb..1f9b98a5 100644 --- a/bin/hilex.c +++ b/bin/hilex.c @@ -47,12 +47,13 @@ static const struct Lexer LexText = { yylex, &yyin, &yytext }; static const struct { const struct Lexer *lexer; const char *name; - const char *pattern; + const char *namePatt; + const char *linePatt; } Lexers[] = { - { &LexC, "c", "[.][chlmy]$" }, - { &LexMake, "make", "[.]mk$|^Makefile$" }, - { &LexMdoc, "mdoc", "[.][1-9]$" }, - { &LexText, "text", "[.]txt$" }, + { &LexC, "c", "[.][chlmy]$", NULL }, + { &LexMake, "make", "[.]mk$|^Makefile$", NULL }, + { &LexMdoc, "mdoc", "[.][1-9]$", "^[.]Dd" }, + { &LexText, "text", "[.]txt$", NULL }, }; static const struct Lexer *parseLexer(const char *name) { @@ -62,17 +63,42 @@ static const struct Lexer *parseLexer(const char *name) { errx(EX_USAGE, "unknown lexer %s", name); } -static const struct Lexer *matchLexer(const char *name) { +static void ungets(const char *str, FILE *file) { + size_t len = strlen(str); + for (size_t i = len-1; i < len; --i) { + int ch = ungetc(str[i], file); + if (ch == EOF) errx(EX_IOERR, "cannot push back string"); + } +} + +static const struct Lexer *matchLexer(const char *name, FILE *file) { + char buf[256]; regex_t regex; for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { int error = regcomp( - ®ex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB + ®ex, Lexers[i].namePatt, REG_EXTENDED | REG_NOSUB ); assert(!error); error = regexec(®ex, name, 0, NULL, 0); regfree(®ex); if (!error) return Lexers[i].lexer; } + char *line = fgets(buf, sizeof(buf), file); + if (!line) return NULL; + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + if (!Lexers[i].linePatt) continue; + int error = regcomp( + ®ex, Lexers[i].linePatt, REG_EXTENDED | REG_NOSUB + ); + assert(!error); + error = regexec(®ex, line, 0, NULL, 0); + regfree(®ex); + if (!error) { + ungets(line, file); + return Lexers[i].lexer; + } + } + ungets(line, file); return NULL; } @@ -349,7 +375,7 @@ int main(int argc, char *argv[]) { } } if (!opts[Title]) opts[Title] = name; - if (!lexer) lexer = matchLexer(name); + if (!lexer) lexer = matchLexer(name, file); if (!lexer && text) lexer = &LexText; if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name); diff --git a/bin/man1/hilex.1 b/bin/man1/hilex.1 index ace0b8cf..a151476a 100644 --- a/bin/man1/hilex.1 +++ b/bin/man1/hilex.1 @@ -1,4 +1,4 @@ -.Dd January 15, 2021 +.Dd January 18, 2021 .Dt HILEX 1 .Os . @@ -40,7 +40,8 @@ Set the input lexer. See .Sx Input Lexers . The default input lexer is inferred from -.Ar name . +.Ar name +or the first line of input. . .It Fl n Ar name Set the name used to infer the input lexer. @@ -179,7 +180,9 @@ The language. Inferred for .Pa *.[1-9] -files. +files +and files starting with +.Dq .Dd . . .It Cm text Plain text. |