From 0c364bffbc87487ea32ec49d000cb84164fe6135 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 5 Feb 2019 08:16:34 -0800 Subject: parser: security: UTF-8 and NUL handling in literals. A null byte in regex and string literals is being processed as a #\nul instead of correctly turning into #\pnul. Bad UTF-8 is not being rejected. * parser.l (REGCHAR, LITCHAR): Use utf8_from_buffer to properly convert yytext using its true length, rather than utf8_from which assumes a null-terminated string. Thus null bytes (including the case of a yytext being single NUL) are handled properly. Check that the result is exactly one character (null-terminated buffer, two characters wide). * utf8.c (utf8_from): Unused function removed. * utf8.h (utf8_from): Declaration removed. --- parser.l | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'parser.l') diff --git a/parser.l b/parser.l index da2f8116..7a9d8d8b 100644 --- a/parser.l +++ b/parser.l @@ -903,9 +903,13 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } {UANYN} { - wchar_t buf[8]; - utf8_from(buf, yytext); - yylval->chr = buf[0]; + wchar_t wchr[8]; + if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) { + yyerrprepf(yyg, lit("non-UTF-8 byte in regex: '\\x~02x'"), + num(convert(unsigned char, yytext[0])), nao); + return ERRTOK; + } + yylval->chr = wchr[0]; return REGCHAR; } @@ -1057,9 +1061,13 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } {UANYN} { - wchar_t buf[8]; - utf8_from(buf, yytext); - yylval->chr = buf[0]; + wchar_t wchr[8]; + if (utf8_from_buf(wchr, coerce(unsigned char *, yytext), yyleng) != 2) { + yyerrprepf(yyg, lit("non-UTF-8 byte in literal: '\\x~02x'"), + num(convert(unsigned char, yytext[0])), nao); + return ERRTOK; + } + yylval->chr = wchr[0]; return LITCHAR; } -- cgit v1.2.3