From ab54d9c9f162d07a62004bc0e9b958c345a03c07 Mon Sep 17 00:00:00 2001 From: "C. McEnroe" Date: Mon, 28 Dec 2020 20:06:44 -0500 Subject: Add initial version of hilex hilex is meant to replace hi, based on lex rather than a mess of overlapping regexps. I want to preserve hi's tagging abilities, but that will require some amount of parsing/post-processing, which I'm not sure how to approach yet. Macro lexing for C still needs work, as I want to match strings and comments inside macros. --- bin/hilex/c.l | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 bin/hilex/c.l (limited to 'bin/hilex/c.l') diff --git a/bin/hilex/c.l b/bin/hilex/c.l new file mode 100644 index 00000000..159980e2 --- /dev/null +++ b/bin/hilex/c.l @@ -0,0 +1,131 @@ +/* Copyright (C) 2020 C. McEnroe + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +%option prefix="c11" +%option noyywrap + +%{ +#include "hilex.h" +%} + +%x MacroLine MacroInclude +%x CharLiteral StringLiteral + +width "*"|[0-9]+ + +%% + +[[:space:]]+ { return Normal; } + +([-+*/%&|^=!<>]|"<<"|">>")"="? | +[=~.?:]|"++"|"--"|"&&"|"||"|"->" | +sizeof|(_A|alignof) { + return Operator; +} + +([1-9][0-9]*|"0"[0-7]*|"0x"[[:xdigit:]]+)([ulUL]{0,3}) | +[0-9]*("."[0-9]*)?([eE][+-]?[0-9]+)?[flFL]? | +"0x"[[:xdigit:]]*("."[[:xdigit:]]*)?([pP][+-]?[0-9]+)[flFL]? { + return Number; +} + +auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|inline | +register|restrict|return|static|struct|switch|typedef|union|volatile|while | +(_A|a)lignas|_Atomic|_Generic|(_N|n)oreturn|(_S|s)tatic_assert | +(_T|t)hread_local { + return Keyword; +} + +[_[:alpha:]][_[:alnum:]]* { return Identifier; } + +^"#" { + BEGIN(MacroLine); + return Macro; +} +^"#"[[:blank:]]*"include" { + BEGIN(MacroInclude); + return Macro; +} +{ + "\n" { + BEGIN(0); + return Macro; + } + "\\\n" { return Macro; } + [^\\\n<"]+|. { return Macro; } +} +{ + "<"[^>]+">" | + "\""[^"]+"\"" { + return String; + } +} + +"//"([^\n]|"\\\n")* | +"/*"([^*]|"*"[^/])*"*"+"/" { + return Comment; +} + +[LUu]?"'" { + BEGIN(CharLiteral); + return String; +} +([LU]|u8?)?"\"" { + BEGIN(StringLiteral); + return String; +} + +{ + "\\"['""?\\abfnrtv] | + "\\"([0-7]{1,3}) | + "\\x"([[:xdigit:]]{2}) | + "\\u"([[:xdigit:]]{4}) | + "\\U"([[:xdigit:]]{8}) { + return StringEscape; + } +} +{ + "%%" | + "%"[ #+-0]*{width}?("."{width})?([Lhjltz]|hh|ll)?[AEFGXacdefginopsux] { + return StringFormat; + } +} + +{ + "'" { + BEGIN(0); + return String; + } + [^\\']+|. { return String; } +} +{ + "\"" { + BEGIN(0); + return String; + } + [^%\\"]+|. { return String; } +} + +. { return Normal; } + +%{ + (void)yyunput; + (void)input; +%} + +%% + +const struct Lexer LexC = { yylex, &yyin, &yytext }; -- cgit 1.4.1