diff options
author | June McEnroe <june@causal.agency> | 2021-01-18 20:24:30 -0500 |
---|---|---|
committer | June McEnroe <june@causal.agency> | 2021-01-18 21:33:21 -0500 |
commit | 6c4a03e8fb0d6df2e244177500f4820efc6f7876 (patch) | |
tree | e64925d2a5e45f1078b3251b32752138ec4a3476 /bin/hilex.c | |
parent | Add mdate script (diff) | |
download | src-6c4a03e8fb0d6df2e244177500f4820efc6f7876.tar.gz src-6c4a03e8fb0d6df2e244177500f4820efc6f7876.zip |
Allow matching lexers using first input line
Using ungetc(3) rather than rewind(3) to support piped input.
Diffstat (limited to '')
-rw-r--r-- | bin/hilex.c | 42 |
1 files changed, 34 insertions, 8 deletions
diff --git a/bin/hilex.c b/bin/hilex.c index 5bf7e9a6..6574b1b2 100644 --- a/bin/hilex.c +++ b/bin/hilex.c @@ -47,12 +47,13 @@ static const struct Lexer LexText = { yylex, &yyin, &yytext }; static const struct { const struct Lexer *lexer; const char *name; - const char *pattern; + const char *namePatt; + const char *linePatt; } Lexers[] = { - { &LexC, "c", "[.][chlmy]$" }, - { &LexMake, "make", "[.]mk$|^Makefile$" }, - { &LexMdoc, "mdoc", "[.][1-9]$" }, - { &LexText, "text", "[.]txt$" }, + { &LexC, "c", "[.][chlmy]$", NULL }, + { &LexMake, "make", "[.]mk$|^Makefile$", NULL }, + { &LexMdoc, "mdoc", "[.][1-9]$", "^[.]Dd" }, + { &LexText, "text", "[.]txt$", NULL }, }; static const struct Lexer *parseLexer(const char *name) { @@ -62,17 +63,42 @@ static const struct Lexer *parseLexer(const char *name) { errx(EX_USAGE, "unknown lexer %s", name); } -static const struct Lexer *matchLexer(const char *name) { +static void ungets(const char *str, FILE *file) { + size_t len = strlen(str); + for (size_t i = len-1; i < len; --i) { + int ch = ungetc(str[i], file); + if (ch == EOF) errx(EX_IOERR, "cannot push back string"); + } +} + +static const struct Lexer *matchLexer(const char *name, FILE *file) { + char buf[256]; regex_t regex; for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { int error = regcomp( - ®ex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB + ®ex, Lexers[i].namePatt, REG_EXTENDED | REG_NOSUB ); assert(!error); error = regexec(®ex, name, 0, NULL, 0); regfree(®ex); if (!error) return Lexers[i].lexer; } + char *line = fgets(buf, sizeof(buf), file); + if (!line) return NULL; + for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) { + if (!Lexers[i].linePatt) continue; + int error = regcomp( + ®ex, Lexers[i].linePatt, REG_EXTENDED | REG_NOSUB + ); + assert(!error); + error = regexec(®ex, line, 0, NULL, 0); + regfree(®ex); + if (!error) { + ungets(line, file); + return Lexers[i].lexer; + } + } + ungets(line, file); return NULL; } @@ -349,7 +375,7 @@ int main(int argc, char *argv[]) { } } if (!opts[Title]) opts[Title] = name; - if (!lexer) lexer = matchLexer(name); + if (!lexer) lexer = matchLexer(name, file); if (!lexer && text) lexer = &LexText; if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name); |