From 292d6a1f11174da58fad1106b4aa5f8baea585f2 Mon Sep 17 00:00:00 2001
From: Kaz Kylheku <kaz@kylheku.com>
Date: Thu, 23 Jul 2015 07:47:11 -0700
Subject: * parser.l (grammar): Do not allow unescaped newline in word list
 literals and word list quasiliterals, except in <= 109 compatibility mode. An
 escaped newline in these literals, together with surrounding whitespace, now
 produces a single space, except in <= 109 compatibility mode.

* txr.1: Documented new rules for WLL's and QLL's,
and added compatibility notes.
---
 ChangeLog | 12 ++++++++++++
 parser.l  | 16 +++++++++++++++-
 txr.1     | 27 +++++++++++++++++++++------
 3 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6df4ab0d..5ae1f8b0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2015-07-23  Kaz Kylheku  <kaz@kylheku.com>
+
+	* parser.l (grammar): Do not allow unescaped newline in
+	word list literals and word list quasiliterals, except
+	in <= 109 compatibility mode. An escaped newline in
+	these literals, together with surrounding whitespace,
+	now produces a single space, except in <= 109
+	compatibility mode.
+
+	* txr.1: Documented new rules for WLL's and QLL's,
+	and added compatibility notes.
+
 2015-07-23  Kaz Kylheku  <kaz@kylheku.com>
 
 	* genvim.txr: Scan cadr.c, cadr.tl, with-resources.tl, txr-case.tl.
diff --git a/parser.l b/parser.l
index 29a45ab1..b1fb63fe 100644
--- a/parser.l
+++ b/parser.l
@@ -854,10 +854,18 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
   return LITCHAR;
 }
 
+<STRLIT,QSILIT>{WS}[\\]\n{WS} {
+  yyextra->lineno++;
+}
+
 <STRLIT,QSILIT,WLIT,QWLIT>{WS}[\\]\n{WS} {
   yyextra->lineno++;
+
+  if (!opt_compat || opt_compat > 109)
+    return ' ';
 }
 
+
 <STRLIT,QSILIT,WLIT,QWLIT>[\\](x{HEX}+|{OCT}+);?  {
   yylval->chr = num_esc(yyg, yytext+1);
   return LITCHAR;
@@ -905,7 +913,13 @@ UONLY   {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U}
 
 <WLIT,QWLIT>\n {
   yyextra->lineno++;
-  return ' ';
+
+  if (opt_compat && opt_compat <= 109)
+    return ' ';
+
+  yyerrprepf(yyg, lit("newline in word list literal"), nao);
+  yylval->chr = yytext[0];
+  return ERRTOK;
 }
 
 <QSILIT,QWLIT>@/([[({'^,@]|{TOK}) {
diff --git a/txr.1 b/txr.1
index c137359b..79c8f2f7 100644
--- a/txr.1
+++ b/txr.1
@@ -2436,20 +2436,27 @@ as
 in order to include it as a character. All the escaping conventions
 used in string literals can be used in word literals.
 
-Unlike in string literals, whitespace (tabs, spaces and newlines) is not
+Unlike in string literals, whitespace (tabs and spaces) is not
 significant in word literals: it separates words.  Whitespace may be
 escaped with a backslash in order to include it as a literal character.
 
+Just like in string literals, an unescaped newline character is not allowed.
+A newline preceded by a backslash is permitted. Such an escaped backslash,
+together with any leading and trailing unescaped whitespace, is removed
+and replaced with a single space.
 
 Example:
 
 .cblk
-  #"abc def ghi"  --> notates ("abc" "def" "ghi")
+  #"abc def ghi"   --> notates ("abc" "def" "ghi")
 
-  #"abc   def
-  ghi"            --> notates ("abc" "def" "ghi")
+  #"abc   def \e
+      ghi"         --> notates ("abc" "def" "ghi")
 
   #"abc\e def ghi" --> notates ("abc def" "ghi")
+
+  #"abc\e def\e \e
+   \e ghi"         --> notates ("abc def " " ghi")
 .cble
 
 A splicing word literal differs from a word literal in that it does not
@@ -2526,10 +2533,13 @@ as
 \ in order to include it as a character. All the escaping conventions
 used in quasiliterals can be used in QLL.
 
-Unlike in quasiliterals, whitespace (tabs, spaces and newlines) is not
+Unlike in quasiliterals, whitespace (tabs and spaces) is not
 significant in QLL: it separates words.  Whitespace may be
 escaped with a backslash in order to include it as a literal character.
 
+A newline is not permitted unless escaped. An escaped newline works exactly the
+same way as it does in word list literals (WLL-s).
+
 Note that the delimiting into words is done before the variable
 substitution. If the variable a contains spaces, then
 .code #`@a`
@@ -2542,7 +2552,7 @@ Examples:
 .cblk
   #`abc @a ghi`  --> notates (`abc` `@a` `ghi`)
 
-  #`abc   @d@e@f
+  #`abc   @d@e@f \e
   ghi`            --> notates (`abc` `@d@e@f` `ghi`)
 
   #`@a\e @b @c` --> notates (`@a @b` `@c`)
@@ -31180,6 +31190,11 @@ encodes
 .code !;a
 rather than the current behavior of encoding
 .codn !a .
+Also, in 109 and earlier, newlines were allowed in word list literals and
+word list quasiliterals. They were treated as a word-separating space.
+A backslash-escaped newline, and all whitespace around it, was deleted
+just like in ordinary literals, and did not separate words. The old
+behavior is emulated.
 
 .IP 107
 Up through \*(TX 107, by accident, there was a function called
-- 
cgit v1.2.3