From 9c7d3eec98ee3704ba81547cc48dc8e681964ada Mon Sep 17 00:00:00 2001 From: June McEnroe Date: Mon, 17 Oct 2022 17:22:39 -0400 Subject: Fix 3- and 4-byte UTF-8 sequence matching lex(1) has weird regex precedence rules! --- bin/shotty.l | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/shotty.l b/bin/shotty.l index 7496e75d..dcac43ec 100644 --- a/bin/shotty.l +++ b/bin/shotty.l @@ -181,13 +181,13 @@ ESC \x1B | (wchar_t)(yytext[1] & 0x3F); return Data; } -[\xE0-\xEF][\x80-\xBF]{2} { +[\xE0-\xEF]([\x80-\xBF]{2}) { ch = (wchar_t)(yytext[0] & 0x0F) << 12 | (wchar_t)(yytext[1] & 0x3F) << 6 | (wchar_t)(yytext[2] & 0x3F); return Data; } -[\xF0-\xF7][\x80-\xBF]{3} { +[\xF0-\xF7]([\x80-\xBF]{3}) { ch = (wchar_t)(yytext[0] & 0x07) << 18 | (wchar_t)(yytext[1] & 0x3F) << 12 | (wchar_t)(yytext[2] & 0x3F) << 6 -- cgit 1.4.1