From 02a9d8a8b07530f50d27b6158329dd8d218d298b Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Wed, 20 Jan 2021 18:01:04 -0500 Subject: Add messy sh lexer Surprisingly seems to work for everything I looked at in my repos. --- bin/sh.l | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 bin/sh.l (limited to 'bin/sh.l') diff --git a/bin/sh.l b/bin/sh.l new file mode 100644 index 00000000..77dd2518 --- /dev/null +++ b/bin/sh.l @@ -0,0 +1,175 @@ +/* Copyright (C) 2021 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +%option prefix="sh" +%option noyywrap + +%{ +#include +#include +#include "hilex.h" + +enum { Cap = 64 }; +static int len = 1; +static int stack[Cap]; +static int push(int val) { + if (len < Cap) stack[len++] = val; + return val; +} +static int pop(void) { + if (len > 1) len--; + return stack[len-1]; +} +%} + +%s First +%s Param Command Arith Backtick +%x DQuote HereDocDel HereDoc HereDocLit + +word [[:alnum:]_.-]+ +param [^:=?+%#{}-]+ +reserved [!{}]|else|do|elif|for|done|fi|then|until|while|if|case|esac + +%% + static char *delimiter; + +[[:blank:]]+ { return Normal; } + +"\\". { return Escape; } + +{ + "$"[*@#?$!0-9-] | + "$"[_[:alpha:][_[:alnum:]]* | + "${"[#]?{param}"}" { + return Subst; + } + "${"{param} { + BEGIN(push(Param)); + return Subst; + } + "$(" { + BEGIN(push(Command)); + return Subst; + } + "$((" { + BEGIN(push(Arith)); + return Subst; + } + "`" { + BEGIN(push(Backtick)); + return Subst; + } +} +"}" | +")" | +"))" | +"`" { + BEGIN(pop()); + return Subst; +} + +[&();|]|"&&"|";;"|"||" { + BEGIN(push(First)); + return Operator; +} +[0-9]?([<>]"&"?|">|"|">>"|"<>") { + return Operator; +} + +^{reserved} { return Keyword; } +{ + [[:blank:]]+ { return Normal; } + {reserved} { + BEGIN(pop()); + return Keyword; + } + {word} { + BEGIN(pop()); + return Normal; + } +} + +{word}/[[:blank:]]*"()" { return Ident; } + +[0-9]?("<<"|"<<-") { + BEGIN(push(HereDocDel)); + return Operator; +} +{ + [[:blank:]]+ { return Normal; } + {word} { + delimiter = strdup(yytext); + assert(delimiter); + BEGIN(pop(), push(HereDoc)); + return Ident; + } + "'"{word}"'" { + delimiter = strndup(&yytext[1], strlen(yytext)-2); + assert(delimiter); + BEGIN(pop(), push(HereDocLit)); + return Ident; + } +} +{ + ^"\t"*{word} { + if (strcmp(&yytext[strspn(yytext, "\t")], delimiter)) REJECT; + free(delimiter); + BEGIN(pop()); + return Ident; + } +} +{ + [^$`\n]+ { return String; } + .|\n { return String; } +} +{ + .*\n { return String; } +} + +"'"[^'']*"'" { return String; } + +"\""/[^$`\\] { + BEGIN(push(DQuote)); + yymore(); +} +"\"" { + BEGIN(push(DQuote)); + return String; +} + +{ + [^\\$`""]*"\"" { + BEGIN(pop()); + return String; + } + "\\"[$`""\\\n] { return Escape; } + [^\\$`""]+|. { return String; } +} + +"#".* { return Comment; } + +{word} { return Normal; } + +.|\n { return Normal; } + +%{ + (void)yyunput; + (void)input; +%} + +%% + +const struct Lexer LexSh = { yylex, &yyin, &yytext }; -- cgit 1.4.1