From 7c6391bb10adc88d156ec88148184bc3eb8681ce Mon Sep 17 00:00:00 2001
From: Kaz Kylheku <kaz@kylheku.com>
Date: Tue, 19 Jan 2010 15:16:28 -0800
Subject: More regex grammar work.

---
 ChangeLog | 11 +++++++++++
 parser.h  |  1 +
 parser.y  |  3 ++-
 txr.1     |  7 +++----
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 93a450de..a8247e8b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2010-01-19  Kaz Kylheku  <kkylheku@gmail.com>
+
+	* parser.y (regex): Getting rid of empty '/' '/' production
+	again.
+	(regexpr): Re-introducing empty production; this time using
+	%prec LOW trick to give this interpretation the lowest
+	possible precedence. Thus expressions like /&/ work again.
+	(regbranch): New production to allow R1~R2 to be valid.
+
+	* txr.1: Documented.
+
 2010-01-19  Kaz Kylheku  <kkylheku@gmail.com>
 
 	* parser.l (grammar): The ^ character is no longer considered
diff --git a/parser.h b/parser.h
index e4f712b9..3a7fb720 100644
--- a/parser.h
+++ b/parser.h
@@ -36,3 +36,4 @@ void yyerror(const char *s);
 void yyerrorf(val s, ...);
 void yybadtoken(int tok, val context);
 void end_of_regex(void);
+int yylex(void);
diff --git a/parser.y b/parser.y
index cb9d320f..b2745c2f 100644
--- a/parser.y
+++ b/parser.y
@@ -451,7 +451,6 @@ expr : IDENT                    { $$ = intern(string_own($1), nil); }
      ;
 
 regex : '/' regexpr '/'         { $$ = $2; end_of_regex(); }
-      | '/' '/'                 { $$ = nil; end_of_regex(); }
       | '/' error               { $$ = nil;
                                   yybadtoken(yychar, lit("regex"));
                                   end_of_regex(); }
@@ -463,10 +462,12 @@ regexpr : regbranch                     { $$ = if3(cdr($1),
         | regexpr '|' regexpr           { $$ = list(or_s, $1, $3, nao); }
         | regexpr '&' regexpr           { $$ = list(and_s, $1, $3, nao); }
         | '~' regexpr                   { $$ = list(compl_s, $2, nao); }
+        | /* empty */ %prec LOW         { $$ = nil; }
         ;
 
 regbranch : regterm %prec LOW   { $$ = cons($1, nil); }
           | regterm regbranch   { $$ = cons($1, $2); }
+          | regterm '~' regexpr { $$ = list($1, list(compl_s, $3, nao), nao); }
           ;
 
 regterm : regterm '*'           { $$ = list(zeroplus_s, $1, nao); }
diff --git a/txr.1 b/txr.1
index 64403966..729107be 100644
--- a/txr.1
+++ b/txr.1
@@ -695,7 +695,7 @@ string, then R1%R2 is equivalent to R1*.
 .IP ~R
 match the complement of the following expression R; i.e. match
 those texts that R does not match. This operator is called complement,
-or logical not.
+or logical not. The form R1~R2 is permitted and means R1(~R2)
 .IP R1R2
 Two consecutive regular expressions denote catenation:
 the left expression must match, and then the right.
@@ -735,9 +735,8 @@ means ab((c*)%(d*ef)). The left argument of % is c*, but the right is the
 entire expression d*ef.
 
 The unary complement operator has the next lower precedence, so
-that ~A* means the ~(A*): "match the all text that is not matched by zero
-or more repetitions of A", not "match zero or more times the text
-not matched by A".
+that ~AB means ~(AB) not (~A)B.   AB~CD means (AB)~(CD) where
+the (CD) is complemented, and catenated to (AB).
 
 Catenation is on the next lower precedence rung, so that AB? means A(B?), or
 "match A, and then optionally B", not "match A and B, as one optional
-- 
cgit v1.2.3