From 6513ac64f6c33223da753d7a3665cedcc225faaf Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 13 Oct 2011 09:15:52 -0700 Subject: * parser.y (elem): Amending previous change. A single space should only denote multiple spaces, not mixtures of spaces and tabs. WE have to be careful with tabs because they can be semantically different from spaces (e.g. file with tab delimited fields which can be blank, empty or have leading or trailing spaces.) * txr.1: Updated. --- ChangeLog | 10 ++++++++++ parser.y | 4 +--- txr.1 | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 173f64e0..6740700d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2011-10-13 Kaz Kylheku + + * parser.y (elem): Amending previous change. A single space + should only denote multiple spaces, not mixtures of spaces and + tabs. WE have to be careful with tabs because they can be + semantically different from spaces (e.g. file with tab delimited + fields which can be blank, empty or have leading or trailing spaces.) + + * txr.1: Updated. + 2011-10-13 Kaz Kylheku * Makefile (%.ok: %.txr): Use unified diff for showing diff --git a/parser.y b/parser.y index b814c1d6..18ff2d35 100644 --- a/parser.y +++ b/parser.y @@ -226,9 +226,7 @@ elems : elem { $$ = cons($1, nil); } elem : TEXT { $$ = string_own($1); } | SPACE { if ($1[0] == ' ' && $1[1] == 0) { val spaces = list(oneplus_s, - list(set_s, chr(' '), - chr('\t'), nao), - nao); + chr(' '), nao); $$ = cons(regex_compile(spaces), spaces); free($1); } else diff --git a/txr.1 b/txr.1 index e88dd7bf..5ca5731d 100644 --- a/txr.1 +++ b/txr.1 @@ -320,8 +320,8 @@ an undivided mixture of tabs and spaces is a whitespace token. Whitespace tokens match a precisely identical piece of whitespace in the input, with one exception: a whitespace token consisting of precisely one space has a -special meaning. It is equivalent to the regular expression @/[ \t]+/: match -one or more tabs or spaces. +special meaning. It is equivalent to the regular expression @/[ ]+/: match +an extent of one or more spaces (but not tabs!) Thus, the query line "a b" (one space) matches texts like "a b", "a b", et cetera (arbitrary number of tabs and spaces between a and b). However "a b" -- cgit v1.2.3