summary refs log tree commit diff
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2021-01-20 18:01:04 -0500
committerJune McEnroe <june@causal.agency>2021-01-20 18:04:04 -0500
commit02a9d8a8b07530f50d27b6158329dd8d218d298b (patch)
tree0cf2b2c323758df778fcef13c9fa93f7a1c7c4d7
parentAdd all target to git.causal.agency Makefile (diff)
downloadsrc-02a9d8a8b07530f50d27b6158329dd8d218d298b.tar.gz
src-02a9d8a8b07530f50d27b6158329dd8d218d298b.zip
Add messy sh lexer
Surprisingly seems to work for everything I looked at in my repos.
-rw-r--r--bin/Makefile1
-rw-r--r--bin/hilex.c1
-rw-r--r--bin/hilex.h1
-rw-r--r--bin/man1/hilex.122
-rw-r--r--bin/sh.l175
5 files changed, 196 insertions, 4 deletions
diff --git a/bin/Makefile b/bin/Makefile
index b133232f..7e6b0b7d 100644
--- a/bin/Makefile
+++ b/bin/Makefile
@@ -83,6 +83,7 @@ OBJS.hilex += c11.o
 OBJS.hilex += hilex.o
 OBJS.hilex += make.o
 OBJS.hilex += mdoc.o
+OBJS.hilex += sh.o
 
 hilex: ${OBJS.hilex}
 	${CC} ${LDFLAGS} ${OBJS.$@} ${LDLIBS.$@} -o $@
diff --git a/bin/hilex.c b/bin/hilex.c
index 4952c7ad..8a03eb80 100644
--- a/bin/hilex.c
+++ b/bin/hilex.c
@@ -53,6 +53,7 @@ static const struct {
 	{ &LexC, "c", "[.][chlmy]$", NULL },
 	{ &LexMake, "make", "[.](mk|am)$|^Makefile$", NULL },
 	{ &LexMdoc, "mdoc", "[.][1-9]$", "^[.]Dd" },
+	{ &LexSh, "sh", "[.]sh$|^[.](profile|shrc)$", "^#!/bin/sh" },
 	{ &LexText, "text", "[.]txt$", NULL },
 };
 
diff --git a/bin/hilex.h b/bin/hilex.h
index 2c080e20..882b5f95 100644
--- a/bin/hilex.h
+++ b/bin/hilex.h
@@ -47,3 +47,4 @@ struct Lexer {
 extern const struct Lexer LexC;
 extern const struct Lexer LexMake;
 extern const struct Lexer LexMdoc;
+extern const struct Lexer LexSh;
diff --git a/bin/man1/hilex.1 b/bin/man1/hilex.1
index ffa20bcf..80b3155b 100644
--- a/bin/man1/hilex.1
+++ b/bin/man1/hilex.1
@@ -1,4 +1,4 @@
-.Dd January 18, 2021
+.Dd January 20, 2021
 .Dt HILEX 1
 .Os
 .
@@ -167,9 +167,8 @@ Inferred for
 files.
 .
 .It Cm make
-The BSD
-.Xr make 1
-language.
+BSD
+.Xr make 1 .
 Inferred for
 .Pa Makefile ,
 .Pa *.mk
@@ -187,6 +186,21 @@ files
 and files starting with
 .Dq .Dd .
 .
+.It Cm sh
+POSIX
+.Xr sh 1 .
+Since lexical analysis of
+the shell command language
+is effectively impossible,
+this is best-effort only.
+Inferred for
+.Pa *.sh ,
+.Pa .profile ,
+.Pa .shrc
+files
+and files starting with
+.Dq #!/bin/sh .
+.
 .It Cm text
 Plain text.
 Inferred for
diff --git a/bin/sh.l b/bin/sh.l
new file mode 100644
index 00000000..77dd2518
--- /dev/null
+++ b/bin/sh.l
@@ -0,0 +1,175 @@
+/* Copyright (C) 2021  C. McEnroe <june@causal.agency>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+%option prefix="sh"
+%option noyywrap
+
+%{
+#include <assert.h>
+#include <string.h>
+#include "hilex.h"
+
+enum { Cap = 64 };
+static int len = 1;
+static int stack[Cap];
+static int push(int val) {
+	if (len < Cap) stack[len++] = val;
+	return val;
+}
+static int pop(void) {
+	if (len > 1) len--;
+	return stack[len-1];
+}
+%}
+
+%s First
+%s Param Command Arith Backtick
+%x DQuote HereDocDel HereDoc HereDocLit
+
+word [[:alnum:]_.-]+
+param [^:=?+%#{}-]+
+reserved [!{}]|else|do|elif|for|done|fi|then|until|while|if|case|esac
+
+%%
+	static char *delimiter;
+
+[[:blank:]]+ { return Normal; }
+
+"\\". { return Escape; }
+
+<INITIAL,First,DQuote,HereDoc,Param,Command,Arith>{
+	"$"[*@#?$!0-9-] |
+	"$"[_[:alpha:][_[:alnum:]]* |
+	"${"[#]?{param}"}" {
+		return Subst;
+	}
+	"${"{param} {
+		BEGIN(push(Param));
+		return Subst;
+	}
+	"$(" {
+		BEGIN(push(Command));
+		return Subst;
+	}
+	"$((" {
+		BEGIN(push(Arith));
+		return Subst;
+	}
+	"`" {
+		BEGIN(push(Backtick));
+		return Subst;
+	}
+}
+<Param>"}" |
+<Command>")" |
+<Arith>"))" |
+<Backtick>"`" {
+	BEGIN(pop());
+	return Subst;
+}
+
+[&();|]|"&&"|";;"|"||" {
+	BEGIN(push(First));
+	return Operator;
+}
+[0-9]?([<>]"&"?|">|"|">>"|"<>") {
+	return Operator;
+}
+
+^{reserved} { return Keyword; }
+<First>{
+	[[:blank:]]+ { return Normal; }
+	{reserved} {
+		BEGIN(pop());
+		return Keyword;
+	}
+	{word} {
+		BEGIN(pop());
+		return Normal;
+	}
+}
+
+{word}/[[:blank:]]*"()" { return Ident; }
+
+[0-9]?("<<"|"<<-") {
+	BEGIN(push(HereDocDel));
+	return Operator;
+}
+<HereDocDel>{
+	[[:blank:]]+ { return Normal; }
+	{word} {
+		delimiter = strdup(yytext);
+		assert(delimiter);
+		BEGIN(pop(), push(HereDoc));
+		return Ident;
+	}
+	"'"{word}"'" {
+		delimiter = strndup(&yytext[1], strlen(yytext)-2);
+		assert(delimiter);
+		BEGIN(pop(), push(HereDocLit));
+		return Ident;
+	}
+}
+<HereDoc,HereDocLit>{
+	^"\t"*{word} {
+		if (strcmp(&yytext[strspn(yytext, "\t")], delimiter)) REJECT;
+		free(delimiter);
+		BEGIN(pop());
+		return Ident;
+	}
+}
+<HereDoc>{
+	[^$`\n]+ { return String; }
+	.|\n { return String; }
+}
+<HereDocLit>{
+	.*\n { return String; }
+}
+
+"'"[^'']*"'" { return String; }
+
+"\""/[^$`\\] {
+	BEGIN(push(DQuote));
+	yymore();
+}
+"\"" {
+	BEGIN(push(DQuote));
+	return String;
+}
+
+<DQuote>{
+	[^\\$`""]*"\"" {
+		BEGIN(pop());
+		return String;
+	}
+	"\\"[$`""\\\n] { return Escape; }
+	[^\\$`""]+|. { return String; }
+}
+
+<INITIAL,First,Command,Backtick,Arith>"#".* { return Comment; }
+
+{word} { return Normal; }
+
+.|\n { return Normal; }
+
+%{
+	(void)yyunput;
+	(void)input;
+%}
+
+%%
+
+const struct Lexer LexSh = { yylex, &yyin, &yytext };