diff options
author | June McEnroe <june@causal.agency> | 2021-01-20 18:01:04 -0500 |
---|---|---|
committer | June McEnroe <june@causal.agency> | 2021-01-20 18:04:04 -0500 |
commit | fa48f70dfcee8388fcb4e0d449d6d5bc4e5cfc68 (patch) | |
tree | fe33da042f097a918eb979c5591e1d4ea555fee0 | |
parent | Add all target to git.causal.agency Makefile (diff) | |
download | src-fa48f70dfcee8388fcb4e0d449d6d5bc4e5cfc68.tar.gz src-fa48f70dfcee8388fcb4e0d449d6d5bc4e5cfc68.zip |
Add messy sh lexer
Surprisingly seems to work for everything I looked at in my repos.
Diffstat (limited to '')
-rw-r--r-- | bin/Makefile | 1 | ||||
-rw-r--r-- | bin/hilex.c | 1 | ||||
-rw-r--r-- | bin/hilex.h | 1 | ||||
-rw-r--r-- | bin/man1/hilex.1 | 22 | ||||
-rw-r--r-- | bin/sh.l | 175 |
5 files changed, 196 insertions, 4 deletions
diff --git a/bin/Makefile b/bin/Makefile index b133232f..7e6b0b7d 100644 --- a/bin/Makefile +++ b/bin/Makefile @@ -83,6 +83,7 @@ OBJS.hilex += c11.o OBJS.hilex += hilex.o OBJS.hilex += make.o OBJS.hilex += mdoc.o +OBJS.hilex += sh.o hilex: ${OBJS.hilex} ${CC} ${LDFLAGS} ${OBJS.$@} ${LDLIBS.$@} -o $@ diff --git a/bin/hilex.c b/bin/hilex.c index 59c1f12d..79497e0b 100644 --- a/bin/hilex.c +++ b/bin/hilex.c @@ -53,6 +53,7 @@ static const struct { { &LexC, "c", "[.][chlmy]$", NULL }, { &LexMake, "make", "[.](mk|am)$|^Makefile$", NULL }, { &LexMdoc, "mdoc", "[.][1-9]$", "^[.]Dd" }, + { &LexSh, "sh", "[.]sh$|^[.](profile|shrc)$", "^#!/bin/sh" }, { &LexText, "text", "[.]txt$", NULL }, }; diff --git a/bin/hilex.h b/bin/hilex.h index b6c3c724..b57fc8cc 100644 --- a/bin/hilex.h +++ b/bin/hilex.h @@ -47,3 +47,4 @@ struct Lexer { extern const struct Lexer LexC; extern const struct Lexer LexMake; extern const struct Lexer LexMdoc; +extern const struct Lexer LexSh; diff --git a/bin/man1/hilex.1 b/bin/man1/hilex.1 index ffa20bcf..80b3155b 100644 --- a/bin/man1/hilex.1 +++ b/bin/man1/hilex.1 @@ -1,4 +1,4 @@ -.Dd January 18, 2021 +.Dd January 20, 2021 .Dt HILEX 1 .Os . @@ -167,9 +167,8 @@ Inferred for files. . .It Cm make -The BSD -.Xr make 1 -language. +BSD +.Xr make 1 . Inferred for .Pa Makefile , .Pa *.mk @@ -187,6 +186,21 @@ files and files starting with .Dq .Dd . . +.It Cm sh +POSIX +.Xr sh 1 . +Since lexical analysis of +the shell command language +is effectively impossible, +this is best-effort only. +Inferred for +.Pa *.sh , +.Pa .profile , +.Pa .shrc +files +and files starting with +.Dq #!/bin/sh . +. .It Cm text Plain text. Inferred for diff --git a/bin/sh.l b/bin/sh.l new file mode 100644 index 00000000..6b9d7223 --- /dev/null +++ b/bin/sh.l @@ -0,0 +1,175 @@ +/* Copyright (C) 2021 June McEnroe <june@causal.agency> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +%option prefix="sh" +%option noyywrap + +%{ +#include <assert.h> +#include <string.h> +#include "hilex.h" + +enum { Cap = 64 }; +static int len = 1; +static int stack[Cap]; +static int push(int val) { + if (len < Cap) stack[len++] = val; + return val; +} +static int pop(void) { + if (len > 1) len--; + return stack[len-1]; +} +%} + +%s First +%s Param Command Arith Backtick +%x DQuote HereDocDel HereDoc HereDocLit + +word [[:alnum:]_.-]+ +param [^:=?+%#{}-]+ +reserved [!{}]|else|do|elif|for|done|fi|then|until|while|if|case|esac + +%% + static char *delimiter; + +[[:blank:]]+ { return Normal; } + +"\\". { return Escape; } + +<INITIAL,First,DQuote,HereDoc,Param,Command,Arith>{ + "$"[*@#?$!0-9-] | + "$"[_[:alpha:][_[:alnum:]]* | + "${"[#]?{param}"}" { + return Subst; + } + "${"{param} { + BEGIN(push(Param)); + return Subst; + } + "$(" { + BEGIN(push(Command)); + return Subst; + } + "$((" { + BEGIN(push(Arith)); + return Subst; + } + "`" { + BEGIN(push(Backtick)); + return Subst; + } +} +<Param>"}" | +<Command>")" | +<Arith>"))" | +<Backtick>"`" { + BEGIN(pop()); + return Subst; +} + +[&();|]|"&&"|";;"|"||" { + BEGIN(push(First)); + return Operator; +} +[0-9]?([<>]"&"?|">|"|">>"|"<>") { + return Operator; +} + +^{reserved} { return Keyword; } +<First>{ + [[:blank:]]+ { return Normal; } + {reserved} { + BEGIN(pop()); + return Keyword; + } + {word} { + BEGIN(pop()); + return Normal; + } +} + +{word}/[[:blank:]]*"()" { return Ident; } + +[0-9]?("<<"|"<<-") { + BEGIN(push(HereDocDel)); + return Operator; +} +<HereDocDel>{ + [[:blank:]]+ { return Normal; } + {word} { + delimiter = strdup(yytext); + assert(delimiter); + BEGIN(pop(), push(HereDoc)); + return Ident; + } + "'"{word}"'" { + delimiter = strndup(&yytext[1], strlen(yytext)-2); + assert(delimiter); + BEGIN(pop(), push(HereDocLit)); + return Ident; + } +} +<HereDoc,HereDocLit>{ + ^"\t"*{word} { + if (strcmp(&yytext[strspn(yytext, "\t")], delimiter)) REJECT; + free(delimiter); + BEGIN(pop()); + return Ident; + } +} +<HereDoc>{ + [^$`\n]+ { return String; } + .|\n { return String; } +} +<HereDocLit>{ + .*\n { return String; } +} + +"'"[^'']*"'" { return String; } + +"\""/[^$`\\] { + BEGIN(push(DQuote)); + yymore(); +} +"\"" { + BEGIN(push(DQuote)); + return String; +} + +<DQuote>{ + [^\\$`""]*"\"" { + BEGIN(pop()); + return String; + } + "\\"[$`""\\\n] { return Escape; } + [^\\$`""]+|. { return String; } +} + +<INITIAL,First,Command,Backtick,Arith>"#".* { return Comment; } + +{word} { return Normal; } + +.|\n { return Normal; } + +%{ + (void)yyunput; + (void)input; +%} + +%% + +const struct Lexer LexSh = { yylex, &yyin, &yytext }; |