From 1057218015c2388a90bfbe9baa8cfb90820a6d8c Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 29 Sep 2011 22:31:27 -0700 Subject: * parser.l: Implemented backslash continuations in SPECIAL state, regexes and string literals. * txr.1: Documented. --- ChangeLog | 7 +++++++ parser.l | 40 +++++++++++++++++++++++++++------------- txr.1 | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 166adc68..723a3f2d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2011-09-29 Kaz Kylheku + + * parser.l: Implemented backslash continuations in SPECIAL + state, regexes and string literals. + + * txr.1: Documented. + 2011-09-29 Kaz Kylheku * match.c (match_line): Implemented horizontal all, some, diff --git a/parser.l b/parser.l index 991df970..a57c954b 100644 --- a/parser.l +++ b/parser.l @@ -136,6 +136,7 @@ void yybadtoken(int tok, val context) static wchar_t char_esc(int letter) { switch (letter) { + case ' ': return L' '; case 'a': return L'\a'; case 'b': return L'\b'; case 't': return L'\t'; @@ -357,7 +358,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return yytext[0]; } -[\t ]+ { /* Eat whitespace in directive */ } +{WS} { /* Eat whitespace in directive */ } \" { yy_push_state(STRLIT); @@ -394,14 +395,19 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return '.'; } -[\\][abtnvfre] { - wchar_t lexeme[2]; - lexeme[0] = char_esc(yytext[1]); - lexeme[1] = 0; - yylval.lexeme = chk_strdup(lexeme); - yy_pop_state(); - return TEXT; - } +[\\]\n{WS} { + yy_pop_state(); + lineno++; + } + +[\\][abtnvfre ] { + wchar_t lexeme[2]; + lexeme[0] = char_esc(yytext[1]); + lexeme[1] = 0; + yylval.lexeme = chk_strdup(lexeme); + yy_pop_state(); + return TEXT; + } [\\](x{HEX}+|{OCT}+) { wchar_t lexeme[2]; @@ -429,7 +435,7 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } -[\\][abtnvfre\\] { +[\\][abtnvfre\\ ] { yylval.chr = char_esc(yytext[1]); return REGCHAR; } @@ -439,6 +445,10 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return REGCHAR; } +{WS}[\\]\n{WS} { + lineno++; + } + \n { lineno++; yyerror("newline in regex"); @@ -521,10 +531,14 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} } [\\][abtnvfre"`'\\] { - yylval.chr = char_esc(yytext[1]); - return LITCHAR; - } + yylval.chr = char_esc(yytext[1]); + return LITCHAR; + } +{WS}[\\]\n{WS} { + lineno++; + } + [\\](x{HEX}+|{OCT}+) { yylval.chr = num_esc(yytext + 1); return LITCHAR; diff --git a/txr.1 b/txr.1 index 0a5bcad1..3d2f46cb 100644 --- a/txr.1 +++ b/txr.1 @@ -368,6 +368,27 @@ Control characters may be embedded directly in a query (with the exception of newline characters). An alternative to embedding is to use escape syntax. The following escapes are supported: +.IP @\e +A backslash immediately followed by a newline introduces a physical line +break without breaking up the logical line. Material following this sequence +continues to be interpreted as a continuation of the previous line, so +that indentation can be introduced to show the continuation without appearing +in the data. +.IP @\e +A backslash followed by a space encodes a space. This is useful in line +continuations when it is necessary for leading spaces to be preserved. +For instance the two line sequence + + abcd@\ + @\ efg + +is equivalent to the line + + abcd efg + +The two spaces before the @\ in the second line are consumed. The +spaces after are preserved. + .IP @\ea Alert character (ASCII 7, BEL). .IP @\eb @@ -445,6 +466,17 @@ directive may be used, which has the following syntax: where the RE part enclosed in slashes represents regular expression syntax (described in the section Regular Expressions below). +Long regular expressions can be broken into multiple lines using a +backslash-newline sequence. Whitespace before the sequence or after the +sequence is not significant, so the following two are equivalent: + + @/reg \e + ular/ + + @/regular/ + +There may not be whitespace between the backslash and newline. + Whereas literal text simply represents itself, regular expression denotes a (potentially infinite) set of texts. The regular expression directive matches the longest piece of text (possibly empty) which belongs to the set -- cgit v1.2.3