From ef8fe557841c440bf9e3e13ee0801bc127091b7e Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 21 Apr 2016 06:57:42 -0700 Subject: Handle non-UTF-8 byte in regex scanned from string. The current behavior is that there is no lex rule for this, so such a byte gets echoed. parser.l (grammar): Add fallback rule to match one byte in SREGEX state and turn it into 0xDCxx character. --- parser.l | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/parser.l b/parser.l index d87e03eb..9ac79228 100644 --- a/parser.l +++ b/parser.l @@ -872,6 +872,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return REGCHAR; } +. { + /* Allow non-UTF-8 byte for regexes scanned from string */ + yylval->chr = (unsigned char) yytext[0] + 0xDC00; + return REGCHAR; +} + . { yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"), num(convert(unsigned char, yytext[0])), nao); -- cgit v1.2.3