From 02a9d8a8b07530f50d27b6158329dd8d218d298b Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Wed, 20 Jan 2021 18:01:04 -0500 Subject: Add messy sh lexer Surprisingly seems to work for everything I looked at in my repos. --- bin/Makefile | 1 + bin/hilex.c | 1 + bin/hilex.h | 1 + bin/man1/hilex.1 | 22 +++++-- bin/sh.l | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 196 insertions(+), 4 deletions(-) create mode 100644 bin/sh.l (limited to 'bin') diff --git a/bin/Makefile b/bin/Makefile index b133232f..7e6b0b7d 100644 --- a/bin/Makefile +++ b/bin/Makefile @@ -83,6 +83,7 @@ OBJS.hilex += c11.o OBJS.hilex += hilex.o OBJS.hilex += make.o OBJS.hilex += mdoc.o +OBJS.hilex += sh.o hilex: ${OBJS.hilex} ${CC} ${LDFLAGS} ${OBJS.$@} ${LDLIBS.$@} -o $@ diff --git a/bin/hilex.c b/bin/hilex.c index 4952c7ad..8a03eb80 100644 --- a/bin/hilex.c +++ b/bin/hilex.c @@ -53,6 +53,7 @@ static const struct { { &LexC, "c", "[.][chlmy]$", NULL }, { &LexMake, "make", "[.](mk|am)$|^Makefile$", NULL }, { &LexMdoc, "mdoc", "[.][1-9]$", "^[.]Dd" }, + { &LexSh, "sh", "[.]sh$|^[.](profile|shrc)$", "^#!/bin/sh" }, { &LexText, "text", "[.]txt$", NULL }, }; diff --git a/bin/hilex.h b/bin/hilex.h index 2c080e20..882b5f95 100644 --- a/bin/hilex.h +++ b/bin/hilex.h @@ -47,3 +47,4 @@ struct Lexer { extern const struct Lexer LexC; extern const struct Lexer LexMake; extern const struct Lexer LexMdoc; +extern const struct Lexer LexSh; diff --git a/bin/man1/hilex.1 b/bin/man1/hilex.1 index ffa20bcf..80b3155b 100644 --- a/bin/man1/hilex.1 +++ b/bin/man1/hilex.1 @@ -1,4 +1,4 @@ -.Dd January 18, 2021 +.Dd January 20, 2021 .Dt HILEX 1 .Os . @@ -167,9 +167,8 @@ Inferred for files. . .It Cm make -The BSD -.Xr make 1 -language. +BSD +.Xr make 1 . Inferred for .Pa Makefile , .Pa *.mk @@ -187,6 +186,21 @@ files and files starting with .Dq .Dd . . +.It Cm sh +POSIX +.Xr sh 1 . +Since lexical analysis of +the shell command language +is effectively impossible, +this is best-effort only. +Inferred for +.Pa *.sh , +.Pa .profile , +.Pa .shrc +files +and files starting with +.Dq #!/bin/sh . +. .It Cm text Plain text. Inferred for diff --git a/bin/sh.l b/bin/sh.l new file mode 100644 index 00000000..77dd2518 --- /dev/null +++ b/bin/sh.l @@ -0,0 +1,175 @@ +/* Copyright (C) 2021 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +%option prefix="sh" +%option noyywrap + +%{ +#include +#include +#include "hilex.h" + +enum { Cap = 64 }; +static int len = 1; +static int stack[Cap]; +static int push(int val) { + if (len < Cap) stack[len++] = val; + return val; +} +static int pop(void) { + if (len > 1) len--; + return stack[len-1]; +} +%} + +%s First +%s Param Command Arith Backtick +%x DQuote HereDocDel HereDoc HereDocLit + +word [[:alnum:]_.-]+ +param [^:=?+%#{}-]+ +reserved [!{}]|else|do|elif|for|done|fi|then|until|while|if|case|esac + +%% + static char *delimiter; + +[[:blank:]]+ { return Normal; } + +"\\". { return Escape; } + +{ + "$"[*@#?$!0-9-] | + "$"[_[:alpha:][_[:alnum:]]* | + "${"[#]?{param}"}" { + return Subst; + } + "${"{param} { + BEGIN(push(Param)); + return Subst; + } + "$(" { + BEGIN(push(Command)); + return Subst; + } + "$((" { + BEGIN(push(Arith)); + return Subst; + } + "`" { + BEGIN(push(Backtick)); + return Subst; + } +} +"}" | +")" | +"))" | +"`" { + BEGIN(pop()); + return Subst; +} + +[&();|]|"&&"|";;"|"||" { + BEGIN(push(First)); + return Operator; +} +[0-9]?([<>]"&"?|">|"|">>"|"<>") { + return Operator; +} + +^{reserved} { return Keyword; } +{ + [[:blank:]]+ { return Normal; } + {reserved} { + BEGIN(pop()); + return Keyword; + } + {word} { + BEGIN(pop()); + return Normal; + } +} + +{word}/[[:blank:]]*"()" { return Ident; } + +[0-9]?("<<"|"<<-") { + BEGIN(push(HereDocDel)); + return Operator; +} +{ + [[:blank:]]+ { return Normal; } + {word} { + delimiter = strdup(yytext); + assert(delimiter); + BEGIN(pop(), push(HereDoc)); + return Ident; + } + "'"{word}"'" { + delimiter = strndup(&yytext[1], strlen(yytext)-2); + assert(delimiter); + BEGIN(pop(), push(HereDocLit)); + return Ident; + } +} +{ + ^"\t"*{word} { + if (strcmp(&yytext[strspn(yytext, "\t")], delimiter)) REJECT; + free(delimiter); + BEGIN(pop()); + return Ident; + } +} +{ + [^$`\n]+ { return String; } + .|\n { return String; } +} +{ + .*\n { return String; } +} + +"'"[^'']*"'" { return String; } + +"\""/[^$`\\] { + BEGIN(push(DQuote)); + yymore(); +} +"\"" { + BEGIN(push(DQuote)); + return String; +} + +{ + [^\\$`""]*"\"" { + BEGIN(pop()); + return String; + } + "\\"[$`""\\\n] { return Escape; } + [^\\$`""]+|. { return String; } +} + +"#".* { return Comment; } + +{word} { return Normal; } + +.|\n { return Normal; } + +%{ + (void)yyunput; + (void)input; +%} + +%% + +const struct Lexer LexSh = { yylex, &yyin, &yytext }; -- cgit 1.4.1 an>Allow FocusEvents in xtermJune McEnroe Oops! This whole time I thought OpenBSD xterm for some reason didn't support focus events. It turns out allowMouseOps: false disables them by default. Replace the disallowedMouseOps list with everything but. 2021-09-23Use NI_NUMERICSERVJune McEnroe 2021-09-23Make up build away from FreeBSDJune McEnroe 2021-09-23Add quickJune McEnroe 2021-09-23Add The HobbitJune McEnroe Surprisingly good for something written by a man like a hundred years ago! 2021-09-22Remove PSF fontsJune McEnroe They were fun to make but I never actually used them. 2021-09-22Remove Linux-specific utilitiesJune McEnroe I haven't used these in ages. 2021-09-22Call sandbox in CGI modeJune McEnroe Otherwise upload won't actually work. 2021-09-22Support HTTP PUT in upJune McEnroe For use by Palaver[1]. Unfortunately, at least in the current App Store version of Palaver, this doesn't work correctly with basic auth. [1]: https://palaverapp.com/guides/commands/set.html#ui-image_service 2021-09-22Remove default faviconJune McEnroe I hate these things and also this one sucks. 2021-09-21Use Z_FILTERED strategyJune McEnroe 2021-09-21Recalculate various lengths only as neededJune McEnroe This actually speeds things up quite a bit, saving roughly a second on a big PNG screenshot. Almost all the remaining time is spent in deflate. 2021-09-21Rewrite pngo, add explicit optionsJune McEnroe Interesting to see how my code habits have changed. 2021-09-16Fix /* **/ comment matchingJune McEnroe 2021-09-15Remove typer, add downgrade to READMEJune McEnroe 2021-09-15Set bot mode on downgradeJune McEnroe 2021-09-15Enter capsicum in downgradeJune McEnroe 2021-09-15Factor out common parts of downgrade messagesJune McEnroe Also bump the message cap to 1024 because that is ostensibly useful for replying to older messages. 2021-09-14Add downgrade IRC botJune McEnroe 2021-09-14Sort by title if authors matchJune McEnroe There are probably better things to sort by but title definitely always exists. 2021-09-13Swap-remove tags as they're foundJune McEnroe This makes it even faster. From ~1s on a sqlite3.c amalgamation to ~0.85s. 2021-09-12Replace htagml regex with strncmpJune McEnroe Since ctags only ever produces regular expressions of the form /^re$/ or /^re/ with no other special characters, instead unescape the pattern and simply use strncmp. Running on a sqlite3.c amalgamation, the regex version takes ~37s while the strncmp version takes ~1s, producing identical output. Big win! 2021-09-11Also defer printing comment for lone close-parensJune McEnroe 2021-09-10Publish "git-comment"June McEnroe 2021-09-10Add git comment --pretty optionJune McEnroe 2021-09-08Defer printing comment if line is blank or closing braceJune McEnroe This fixes badly indented comments. 2021-09-08Up default min-repeat to 30 linesJune McEnroe 2021-09-08Handle dirty lines in git-commentJune McEnroe 2021-09-08Document and install git-commentJune McEnroe 2021-09-08Add repeat and all options to git-commentJune McEnroe 2021-09-08Add group threshold to git-commentJune McEnroe