From 9aaeb2fc604feee26a72a672fd846b4e70c6c1aa Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 2 Jul 2015 19:56:58 -0700 Subject: Support trailing semicolon after hex/octal characters. * parser.l (%option): Remove nounput option since we need yyunput. (grammar): Rule for matching hex and octal escape in SPECIAL state recognizes optional semicolon. In 109 compatibility, this is pushed back into the stream, otherwise consumed. * txr.1: Updated documentation, including compat notes. * genvim.txr (txr_char): Include optional semicolon in match. Corrected some errors where 8 and 9 were being included as matches for octal digits. (txr_error): Default match for \x or \o not followed by digits. --- ChangeLog | 18 ++++++++++++++++++ genvim.txr | 7 ++++--- parser.l | 11 +++++++++-- txr.1 | 17 ++++++++++++++++- 4 files changed, 47 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index fbb802ff..998d3dab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2015-07-02 Kaz Kylheku + + Support trailing semicolon after hex/octal characters. + + * parser.l (%option): Remove nounput option since we need + yyunput. + (grammar): Rule for matching hex and octal escape in SPECIAL + state recognizes optional semicolon. In 109 compatibility, + this is pushed back into the stream, otherwise consumed. + + * txr.1: Updated documentation, including compat notes. + + * genvim.txr (txr_char): Include optional semicolon in + match. Corrected some errors where 8 and 9 were being + included as matches for octal digits. + (txr_error): Default match for \x or \o not followed + by digits. + 2015-07-02 Kaz Kylheku Hash-bang support for .tl files. diff --git a/genvim.txr b/genvim.txr index e74ed9c2..5aab351f 100644 --- a/genvim.txr +++ b/genvim.txr @@ -78,8 +78,9 @@ syn match txr_atat "@@[ \t]*@@" syn match txr_comment "@@[ \t]*[#;].*" syn match txr_contin "@@[ \t]*\\$" syn match txr_char "@@[ \t]*\\." -syn match txr_char "@@[ \t]*\\x[0-9A-Fa-f]\+" -syn match txr_char "@@[ \t]*\\[0-9]\+" +syn match txr_error "@@[ \t]*\\[xo]" +syn match txr_char "@@[ \t]*\\x[0-9A-Fa-f]\+;\?" +syn match txr_char "@@[ \t]*\\[0-7]\+;\?" syn match txr_variable "@@[ \t]*[*]\?[ \t]*[A-Za-z_][A-Za-z0-9_]*" syn match txr_splicevar "@@[ \t,*]*[A-Za-z_][A-Za-z0-9_]*" syn match txr_regdir "@@[ \t]*/\(\\/\|[^/]\|\\\n\)*/" @@ -97,7 +98,7 @@ syn match txr_regesc "\\[abtnvfre\\ \n/sSdDwW()\|.*?+~&%\[\]\-]" contained syn match txr_nested_error "[^\t `]\+" contained syn match txr_chr "#\\x[A-Fa-f0-9]\+"@(if txr-p " contained") -syn match txr_chr "#\\o[0-9]\+"@(if txr-p " contained") +syn match txr_chr "#\\o[0-7]\+"@(if txr-p " contained") syn match txr_chr "#\\[^ \t\nA-Za-z0-9_]"@(if txr-p " contained") syn match txr_chr "#\\[A-Za-z0-9_]\+"@(if txr-p " contained") syn match txr_ncomment ";.*"@(if txr-p " contained") diff --git a/parser.l b/parser.l index 37bbdc70..30cb3034 100644 --- a/parser.l +++ b/parser.l @@ -166,7 +166,7 @@ static wchar_t num_esc(scanner_t *scn, char *num) %} -%option stack nounput noinput reentrant bison-bridge extra-type="parser_t *" +%option stack noinput reentrant bison-bridge extra-type="parser_t *" SYM [a-zA-Z0-9_]+ SGN [+\-] @@ -685,11 +685,18 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return TEXT; } -[\\](x{HEX}+|{OCT}+) { +[\\](x{HEX}+|{OCT}+);? { wchar_t lexeme[2]; lexeme[0] = num_esc(yyg, yytext + 1); lexeme[1] = 0; yylval->lexeme = chk_strdup(lexeme); + + { + char lastchar = yytext[yyleng-1]; + if (lastchar == ';' && opt_compat && opt_compat <= 109) + unput(lastchar); + } + yy_pop_state(yyscanner); return TEXT; } diff --git a/txr.1 b/txr.1 index 3bbdabef..894143db 100644 --- a/txr.1 +++ b/txr.1 @@ -1155,7 +1155,9 @@ A immediately followed by a sequence of hex digits is interpreted as a hexadecimal numeric character code. For instance .code @\ex41 -is the ASCII character A. +is the ASCII character A. If a semicolon character immediately follows the +hex digits, it is consumed, and characters which follow are not considered +part of the hex escape even if they are hex digits. .meIP @\e < octal-digits A @@ -1165,6 +1167,9 @@ as an octal character code. For instance .code @\e010 is character 8, same as .codn @\eb . +If a semicolon character immediately follows the octal digits, it is consumed, +and subsequent characters are not treated as part of the octal escape, +even if they are octal digits. .PP Note that if a newline is embedded into a query line with @@ -30636,6 +30641,16 @@ is given an argument which is equal or lower. For instance .code -C 103 selects the behaviors described below for version 105, but not those for 102. +.IP 109 +The optional trailing semicolon on hex and octal codes in the \*(TX +pattern language was introduced in 110. The feature is disabled +with 109 or lower compatibility, so that +.code @\ex21;a +encodes +.code !;a +rather than the current behavior of encoding +.codn !a . + .IP 107 Up through \*(TX 107, by accident, there was a function called .code flip -- cgit v1.2.3