summary refs log tree commit diff
path: root/bin/hilex/c.l
diff options
context:
space:
mode:
authorJune McEnroe <june@causal.agency>2020-12-28 20:06:44 -0500
committerJune McEnroe <june@causal.agency>2020-12-28 20:11:17 -0500
commit593137c52dc77cf113c1d9561d1b27b98a550bb5 (patch)
treef4f9d7c939bcd8fe527193c7d2fa3e8b3ed67ed8 /bin/hilex/c.l
parentAdd TOUR.7 (diff)
downloadsrc-593137c52dc77cf113c1d9561d1b27b98a550bb5.tar.gz
src-593137c52dc77cf113c1d9561d1b27b98a550bb5.zip
Add initial version of hilex
hilex is meant to replace hi, based on lex rather than a mess of
overlapping regexps. I want to preserve hi's tagging abilities, but that
will require some amount of parsing/post-processing, which I'm not sure
how to approach yet.

Macro lexing for C still needs work, as I want to match strings and
comments inside macros.
Diffstat (limited to 'bin/hilex/c.l')
-rw-r--r--bin/hilex/c.l131
1 files changed, 131 insertions, 0 deletions
diff --git a/bin/hilex/c.l b/bin/hilex/c.l
new file mode 100644
index 00000000..95cb499f
--- /dev/null
+++ b/bin/hilex/c.l
@@ -0,0 +1,131 @@
+/* Copyright (C) 2020  June McEnroe <june@causal.agency>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+%option prefix="c11"
+%option noyywrap
+
+%{
+#include "hilex.h"
+%}
+
+%x MacroLine MacroInclude
+%x CharLiteral StringLiteral
+
+width "*"|[0-9]+
+
+%%
+
+[[:space:]]+ { return Normal; }
+
+([-+*/%&|^=!<>]|"<<"|">>")"="? |
+[=~.?:]|"++"|"--"|"&&"|"||"|"->" |
+sizeof|(_A|alignof) {
+	return Operator;
+}
+
+([1-9][0-9]*|"0"[0-7]*|"0x"[[:xdigit:]]+)([ulUL]{0,3}) |
+[0-9]*("."[0-9]*)?([eE][+-]?[0-9]+)?[flFL]? |
+"0x"[[:xdigit:]]*("."[[:xdigit:]]*)?([pP][+-]?[0-9]+)[flFL]? {
+	return Number;
+}
+
+auto|break|case|const|continue|default|do|else|enum|extern|for|goto|if|inline |
+register|restrict|return|static|struct|switch|typedef|union|volatile|while |
+(_A|a)lignas|_Atomic|_Generic|(_N|n)oreturn|(_S|s)tatic_assert |
+(_T|t)hread_local {
+	return Keyword;
+}
+
+[_[:alpha:]][_[:alnum:]]* { return Identifier; }
+
+^"#" {
+	BEGIN(MacroLine);
+	return Macro;
+}
+^"#"[[:blank:]]*"include" {
+	BEGIN(MacroInclude);
+	return Macro;
+}
+<MacroLine,MacroInclude>{
+	"\n" {
+		BEGIN(0);
+		return Macro;
+	}
+	"\\\n" { return Macro; }
+	[^\\\n<"]+|. { return Macro; }
+}
+<MacroInclude>{
+	"<"[^>]+">" |
+	"\""[^"]+"\"" {
+		return String;
+	}
+}
+
+"//"([^\n]|"\\\n")* |
+"/*"([^*]|"*"[^/])*"*"+"/" {
+	return Comment;
+}
+
+[LUu]?"'" {
+	BEGIN(CharLiteral);
+	return String;
+}
+([LU]|u8?)?"\"" {
+	BEGIN(StringLiteral);
+	return String;
+}
+
+<CharLiteral,StringLiteral>{
+	"\\"['""?\\abfnrtv] |
+	"\\"([0-7]{1,3}) |
+	"\\x"([[:xdigit:]]{2}) |
+	"\\u"([[:xdigit:]]{4}) |
+	"\\U"([[:xdigit:]]{8}) {
+		return StringEscape;
+	}
+}
+<StringLiteral>{
+	"%%" |
+	"%"[ #+-0]*{width}?("."{width})?([Lhjltz]|hh|ll)?[AEFGXacdefginopsux] {
+		return StringFormat;
+	}
+}
+
+<CharLiteral>{
+	"'" {
+		BEGIN(0);
+		return String;
+	}
+	[^\\']+|. { return String; }
+}
+<StringLiteral>{
+	"\"" {
+		BEGIN(0);
+		return String;
+	}
+	[^%\\"]+|. { return String; }
+}
+
+. { return Normal; }
+
+%{
+	(void)yyunput;
+	(void)input;
+%}
+
+%%
+
+const struct Lexer LexC = { yylex, &yyin, &yytext };