diff options
author | June McEnroe <june@causal.agency> | 2022-10-17 17:22:39 -0400 |
---|---|---|
committer | June McEnroe <june@causal.agency> | 2022-10-17 17:22:39 -0400 |
commit | 9c7d3eec98ee3704ba81547cc48dc8e681964ada (patch) | |
tree | a61f895b9faa046af15cb66f8e43edf616f6809c | |
parent | Fix same month, different day diffs (diff) | |
download | src-9c7d3eec98ee3704ba81547cc48dc8e681964ada.tar.gz src-9c7d3eec98ee3704ba81547cc48dc8e681964ada.zip |
Fix 3- and 4-byte UTF-8 sequence matching
lex(1) has weird regex precedence rules!
-rw-r--r-- | bin/shotty.l | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/bin/shotty.l b/bin/shotty.l index 7496e75d..dcac43ec 100644 --- a/bin/shotty.l +++ b/bin/shotty.l @@ -181,13 +181,13 @@ ESC \x1B | (wchar_t)(yytext[1] & 0x3F); return Data; } -[\xE0-\xEF][\x80-\xBF]{2} { +[\xE0-\xEF]([\x80-\xBF]{2}) { ch = (wchar_t)(yytext[0] & 0x0F) << 12 | (wchar_t)(yytext[1] & 0x3F) << 6 | (wchar_t)(yytext[2] & 0x3F); return Data; } -[\xF0-\xF7][\x80-\xBF]{3} { +[\xF0-\xF7]([\x80-\xBF]{3}) { ch = (wchar_t)(yytext[0] & 0x07) << 18 | (wchar_t)(yytext[1] & 0x3F) << 12 | (wchar_t)(yytext[2] & 0x3F) << 6 |