From 6742e3e96b3387bbea484c7278305cab1bd5397e Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Sat, 15 Aug 2015 08:41:30 -0700 Subject: Allow slashes in regex passed to regex-parse. * parser.l (SREGEX): New start state, for stand-alone regex parsing. (grammar): All REGEX state rules are active in the SREGEX state also. The rule for the / character returns a REGCHAR if in the SREGEX state, so it is treated as an ordinary character. * txr.1: Updated regex-parse documentation about the treatment of the slash. Also added notes about double escaping when a string literal is passed to regex-parse. --- parser.l | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'parser.l') diff --git a/parser.l b/parser.l index 66a51cfc..af838a63 100644 --- a/parser.l +++ b/parser.l @@ -218,7 +218,7 @@ UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} -%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT QWLIT +%x SPECIAL BRACED NESTED REGEX SREGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT QWLIT %% @@ -765,48 +765,47 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return ERRTOK; } -[/] { +[/] { yylval->chr = '/'; - return '/'; + return (YYSTATE == SREGEX) ? REGCHAR : '/'; } - -[\\][abtnvfre\\ ] { +[\\][abtnvfre\\ ] { yylval->chr = char_esc(yytext[1]); return REGCHAR; } -[\\](x{HEX}+|{OCT}+);? { +[\\](x{HEX}+|{OCT}+);? { yylval->chr = num_esc(yyg, yytext + 1); return REGCHAR; } -[\\][sSdDwW] { +[\\][sSdDwW] { yylval->chr = yytext[1]; return REGTOKEN; } -{WS}[\\]\n{WS} { +{WS}[\\]\n{WS} { yyextra->lineno++; } -\n { +\n { yyextra->lineno++; yyerrprepf(yyg, lit("newline in regex"), nao); return ERRTOK; } -{REGOP} { +{REGOP} { yylval->chr = yytext[0]; return yytext[0]; } -[\\]{REGOP} { +[\\]{REGOP} { yylval->chr = yytext[1]; return REGCHAR; } -[\\]. { +[\\]. { if (opt_compat && opt_compat <= 105) { yylval->chr = yytext[1]; return REGCHAR; @@ -816,12 +815,12 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return ERRTOK; } -[\\] { +[\\] { yyerrprepf(yyg, lit("dangling backslash in regex"), nao); return ERRTOK; } -{UANYN} { +{UANYN} { wchar_t buf[8]; utf8_from(buf, yytext); yylval->chr = buf[0]; @@ -986,7 +985,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} void end_of_regex(scanner_t *yyg) { - if (YYSTATE != REGEX) + if (YYSTATE != REGEX && YYSTATE != SREGEX) internal_error("end_of_regex called in wrong scanner state"); yy_pop_state(yyg); @@ -1050,7 +1049,7 @@ void prime_scanner(scanner_t *yyg, enum prime_parser prim) yy_push_state(NESTED, yyg); break; case prime_regex: - yy_push_state(REGEX, yyg); + yy_push_state(SREGEX, yyg); break; } } -- cgit v1.2.3