From 292d6a1f11174da58fad1106b4aa5f8baea585f2 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 23 Jul 2015 07:47:11 -0700 Subject: * parser.l (grammar): Do not allow unescaped newline in word list literals and word list quasiliterals, except in <= 109 compatibility mode. An escaped newline in these literals, together with surrounding whitespace, now produces a single space, except in <= 109 compatibility mode. * txr.1: Documented new rules for WLL's and QLL's, and added compatibility notes. --- ChangeLog | 12 ++++++++++++ parser.l | 16 +++++++++++++++- txr.1 | 27 +++++++++++++++++++++------ 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6df4ab0d..5ae1f8b0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2015-07-23 Kaz Kylheku + + * parser.l (grammar): Do not allow unescaped newline in + word list literals and word list quasiliterals, except + in <= 109 compatibility mode. An escaped newline in + these literals, together with surrounding whitespace, + now produces a single space, except in <= 109 + compatibility mode. + + * txr.1: Documented new rules for WLL's and QLL's, + and added compatibility notes. + 2015-07-23 Kaz Kylheku * genvim.txr: Scan cadr.c, cadr.tl, with-resources.tl, txr-case.tl. diff --git a/parser.l b/parser.l index 29a45ab1..b1fb63fe 100644 --- a/parser.l +++ b/parser.l @@ -854,10 +854,18 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return LITCHAR; } +{WS}[\\]\n{WS} { + yyextra->lineno++; +} + {WS}[\\]\n{WS} { yyextra->lineno++; + + if (!opt_compat || opt_compat > 109) + return ' '; } + [\\](x{HEX}+|{OCT}+);? { yylval->chr = num_esc(yyg, yytext+1); return LITCHAR; @@ -905,7 +913,13 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} \n { yyextra->lineno++; - return ' '; + + if (opt_compat && opt_compat <= 109) + return ' '; + + yyerrprepf(yyg, lit("newline in word list literal"), nao); + yylval->chr = yytext[0]; + return ERRTOK; } @/([[({'^,@]|{TOK}) { diff --git a/txr.1 b/txr.1 index c137359b..79c8f2f7 100644 --- a/txr.1 +++ b/txr.1 @@ -2436,20 +2436,27 @@ as in order to include it as a character. All the escaping conventions used in string literals can be used in word literals. -Unlike in string literals, whitespace (tabs, spaces and newlines) is not +Unlike in string literals, whitespace (tabs and spaces) is not significant in word literals: it separates words. Whitespace may be escaped with a backslash in order to include it as a literal character. +Just like in string literals, an unescaped newline character is not allowed. +A newline preceded by a backslash is permitted. Such an escaped backslash, +together with any leading and trailing unescaped whitespace, is removed +and replaced with a single space. Example: .cblk - #"abc def ghi" --> notates ("abc" "def" "ghi") + #"abc def ghi" --> notates ("abc" "def" "ghi") - #"abc def - ghi" --> notates ("abc" "def" "ghi") + #"abc def \e + ghi" --> notates ("abc" "def" "ghi") #"abc\e def ghi" --> notates ("abc def" "ghi") + + #"abc\e def\e \e + \e ghi" --> notates ("abc def " " ghi") .cble A splicing word literal differs from a word literal in that it does not @@ -2526,10 +2533,13 @@ as \ in order to include it as a character. All the escaping conventions used in quasiliterals can be used in QLL. -Unlike in quasiliterals, whitespace (tabs, spaces and newlines) is not +Unlike in quasiliterals, whitespace (tabs and spaces) is not significant in QLL: it separates words. Whitespace may be escaped with a backslash in order to include it as a literal character. +A newline is not permitted unless escaped. An escaped newline works exactly the +same way as it does in word list literals (WLL-s). + Note that the delimiting into words is done before the variable substitution. If the variable a contains spaces, then .code #`@a` @@ -2542,7 +2552,7 @@ Examples: .cblk #`abc @a ghi` --> notates (`abc` `@a` `ghi`) - #`abc @d@e@f + #`abc @d@e@f \e ghi` --> notates (`abc` `@d@e@f` `ghi`) #`@a\e @b @c` --> notates (`@a @b` `@c`) @@ -31180,6 +31190,11 @@ encodes .code !;a rather than the current behavior of encoding .codn !a . +Also, in 109 and earlier, newlines were allowed in word list literals and +word list quasiliterals. They were treated as a word-separating space. +A backslash-escaped newline, and all whitespace around it, was deleted +just like in ordinary literals, and did not separate words. The old +behavior is emulated. .IP 107 Up through \*(TX 107, by accident, there was a function called -- cgit v1.2.3