summary refs log tree commit diff
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2021-01-18 20:24:30 -0500
committerJune McEnroe <june@causal.agency>2021-01-18 21:33:21 -0500
commitf1ffecec87e5734c50ac4a9cb9ddd93611ea8c47 (patch)
treeabded7c8db904485e44023280ea68244fb6eb1cd
parentAdd mdate script (diff)
downloadsrc-f1ffecec87e5734c50ac4a9cb9ddd93611ea8c47.tar.gz
src-f1ffecec87e5734c50ac4a9cb9ddd93611ea8c47.zip
Allow matching lexers using first input line
Using ungetc(3) rather than rewind(3) to support piped input.
Diffstat (limited to '')
-rw-r--r--bin/hilex.c42
-rw-r--r--bin/man1/hilex.19
2 files changed, 40 insertions, 11 deletions
diff --git a/bin/hilex.c b/bin/hilex.c
index 133336bb..1f9b98a5 100644
--- a/bin/hilex.c
+++ b/bin/hilex.c
@@ -47,12 +47,13 @@ static const struct Lexer LexText = { yylex, &yyin, &yytext };
 static const struct {
 	const struct Lexer *lexer;
 	const char *name;
-	const char *pattern;
+	const char *namePatt;
+	const char *linePatt;
 } Lexers[] = {
-	{ &LexC, "c", "[.][chlmy]$" },
-	{ &LexMake, "make", "[.]mk$|^Makefile$" },
-	{ &LexMdoc, "mdoc", "[.][1-9]$" },
-	{ &LexText, "text", "[.]txt$" },
+	{ &LexC, "c", "[.][chlmy]$", NULL },
+	{ &LexMake, "make", "[.]mk$|^Makefile$", NULL },
+	{ &LexMdoc, "mdoc", "[.][1-9]$", "^[.]Dd" },
+	{ &LexText, "text", "[.]txt$", NULL },
 };
 
 static const struct Lexer *parseLexer(const char *name) {
@@ -62,17 +63,42 @@ static const struct Lexer *parseLexer(const char *name) {
 	errx(EX_USAGE, "unknown lexer %s", name);
 }
 
-static const struct Lexer *matchLexer(const char *name) {
+static void ungets(const char *str, FILE *file) {
+	size_t len = strlen(str);
+	for (size_t i = len-1; i < len; --i) {
+		int ch = ungetc(str[i], file);
+		if (ch == EOF) errx(EX_IOERR, "cannot push back string");
+	}
+}
+
+static const struct Lexer *matchLexer(const char *name, FILE *file) {
+	char buf[256];
 	regex_t regex;
 	for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) {
 		int error = regcomp(
-			&regex, Lexers[i].pattern, REG_EXTENDED | REG_NOSUB
+			&regex, Lexers[i].namePatt, REG_EXTENDED | REG_NOSUB
 		);
 		assert(!error);
 		error = regexec(&regex, name, 0, NULL, 0);
 		regfree(&regex);
 		if (!error) return Lexers[i].lexer;
 	}
+	char *line = fgets(buf, sizeof(buf), file);
+	if (!line) return NULL;
+	for (size_t i = 0; i < ARRAY_LEN(Lexers); ++i) {
+		if (!Lexers[i].linePatt) continue;
+		int error = regcomp(
+			&regex, Lexers[i].linePatt, REG_EXTENDED | REG_NOSUB
+		);
+		assert(!error);
+		error = regexec(&regex, line, 0, NULL, 0);
+		regfree(&regex);
+		if (!error) {
+			ungets(line, file);
+			return Lexers[i].lexer;
+		}
+	}
+	ungets(line, file);
 	return NULL;
 }
 
@@ -349,7 +375,7 @@ int main(int argc, char *argv[]) {
 		}
 	}
 	if (!opts[Title]) opts[Title] = name;
-	if (!lexer) lexer = matchLexer(name);
+	if (!lexer) lexer = matchLexer(name, file);
 	if (!lexer && text) lexer = &LexText;
 	if (!lexer) errx(EX_USAGE, "cannot infer lexer for %s", name);
 
diff --git a/bin/man1/hilex.1 b/bin/man1/hilex.1
index ace0b8cf..a151476a 100644
--- a/bin/man1/hilex.1
+++ b/bin/man1/hilex.1
@@ -1,4 +1,4 @@
-.Dd January 15, 2021
+.Dd January 18, 2021
 .Dt HILEX 1
 .Os
 .
@@ -40,7 +40,8 @@ Set the input lexer.
 See
 .Sx Input Lexers .
 The default input lexer is inferred from
-.Ar name .
+.Ar name
+or the first line of input.
 .
 .It Fl n Ar name
 Set the name used to infer the input lexer.
@@ -179,7 +180,9 @@ The
 language.
 Inferred for
 .Pa *.[1-9]
-files.
+files
+and files starting with
+.Dq .Dd .
 .
 .It Cm text
 Plain text.