From d1ecfd527d7717921e013d35be3070e7f95265e5 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 7 Jan 2014 19:01:33 -0800 Subject: The lisp-parse function can now be called multiple times on the same stream to extract multiple objects; the requirement that the stream must hold exactly one complete Lisp object with no following material is now lifted. * parser.l (YY_INPUT): Modified the macro so that it reads no more than one character. Though this probably makes the lexer less efficient, it gives us the important property that the lexer does not scan ahead into the input stream, hogging data into its buffer which is then destroyed. This is essential if the lisp-parse function is to support multiple calls to pull objects one by one out of a stream. * parser.y (spec): Use YYACCEPT in the SECRET_ESCAPE_E clause for pulling a single expression out of the token stream. YYACCEPT is a trick for not invoking the $accept : spec . $end production which is implicitly built into the grammar, and which causes a token of lookahead to occur. This allows us to read a full expression without stealing any further token: but only if the grammar is structured right. (exprs): This phrase structure now handles the DOTDOT syntax. There is no such thing as an expr DOTDOT expr expression any more; it is in the list syntax (and not supported in the dot position). (expr): Remove DOTDOT syntax. * txr.1: Updated description of .. syntax, and relaxed the description of lisp-parse since it now allows multiple calls to extract multiple objects. --- ChangeLog | 31 +++++++++++++++++++++++++++++++ parser.l | 13 ++++--------- parser.y | 7 +++++-- txr.1 | 20 +++++++++++++++++++- 4 files changed, 59 insertions(+), 12 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9108376e..ecfc54cd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,34 @@ +2014-01-07 Kaz Kylheku + + The lisp-parse function can now be called multiple times + on the same stream to extract multiple objects; the requirement + that the stream must hold exactly one complete Lisp object + with no following material is now lifted. + + * parser.l (YY_INPUT): Modified the macro so that it reads no more + than one character. Though this probably makes the lexer less + efficient, it gives us the important property that the lexer does + not scan ahead into the input stream, hogging data into its buffer + which is then destroyed. This is essential if the lisp-parse function + is to support multiple calls to pull objects one by one out of + a stream. + + * parser.y (spec): Use YYACCEPT in the SECRET_ESCAPE_E clause for + pulling a single expression out of the token stream. YYACCEPT + is a trick for not invoking the $accept : spec . $end production + which is implicitly built into the grammar, and which causes + a token of lookahead to occur. This allows us to read a full + expression without stealing any further token: but only if the + grammar is structured right. + (exprs): This phrase structure now handles the DOTDOT syntax. + There is no such thing as an expr DOTDOT expr expression any more; + it is in the list syntax (and not supported in the dot position). + (expr): Remove DOTDOT syntax. + + * txr.1: Updated description of .. syntax, and relaxed the description + of lisp-parse since it now allows multiple calls to extract + multiple objects. + 2014-01-07 Kaz Kylheku * match.c (v_load): Call yylex_destroy after yyparse, so we don't diff --git a/parser.l b/parser.l index 2ab713ab..bdac7a6a 100644 --- a/parser.l +++ b/parser.l @@ -52,15 +52,10 @@ #define YY_INPUT(buf, result, max_size) \ do { \ - val c = nil; \ - size_t n; \ - int ch = '*'; \ - for (n = 0; n < max_size && \ - (c = get_byte(yyin_stream)) && \ - (ch = c_num(c)) != '\n'; ++n) \ - buf[n] = (char) ch; \ - if (ch == '\n') \ - buf[n++] = (char) ch; \ + val c = get_byte(yyin_stream); \ + int n = 0; \ + if (c) \ + buf[n++] = (char) c_num(c); \ result = n; \ } while (0) diff --git a/parser.y b/parser.y index 8e7577ee..40c231c1 100644 --- a/parser.y +++ b/parser.y @@ -118,7 +118,7 @@ static val parsed_spec; spec : clauses { parsed_spec = $1; } | /* empty */ { parsed_spec = nil; } | SECRET_ESCAPE_R regexpr { parsed_spec = $2; end_of_regex(); } - | SECRET_ESCAPE_E expr { parsed_spec = $2; } + | SECRET_ESCAPE_E expr { parsed_spec = $2; YYACCEPT; } | error '\n' { parsed_spec = nil; if (errors >= 8) YYABORT; @@ -720,9 +720,13 @@ meta_expr : METAPAR exprs ')' { $$ = rlcp(cons(expr_s, expand($2)), $2); } | METAPAR error { $$ = nil; yybadtoken(yychar, lit("meta expression")); } ; + exprs : expr { $$ = rlcp(cons($1, nil), $1); } | expr exprs { $$ = rlcp(cons($1, $2), $1); } | expr '.' expr { $$ = rlcp(cons($1, $3), $1); } + | expr DOTDOT exprs { $$ = rlcp(cons(list(cons_s, $1, + car($3), nao), + cdr($3)), $1); } ; exprs_opt : exprs { $$ = $1; } @@ -743,7 +747,6 @@ expr : SYMTOK { $$ = rl(sym_helper($1, t), num(lineno)); } | chrlit { $$ = rl($1, num(lineno)); } | strlit { $$ = $1; } | quasilit { $$ = $1; } - | expr DOTDOT expr { $$ = list(cons_s, $1, $3, nao); } ; regex : '/' regexpr '/' { $$ = cons(regex_s, $2); end_of_regex(); diff --git a/txr.1 b/txr.1 index 941c2255..94462c80 100644 --- a/txr.1 +++ b/txr.1 @@ -4963,6 +4963,20 @@ to represent a pair of numbers or other objects. For instance, if L is a list, then [L 1 .. 3] computes a sublist of L consisting of elements 1 through 2 (counting from zero). +.TP +Restrictions: + +The notation must be enclosed in a list. For instance a..b is not an +expression, but (a..b) is. This is important if Lisp data is being parsed from +a string or stream using the lisp-parse function. If the data "a..b" is +parsed, the symbol "a" will be extracted, leaving "..a", which, if parsed, +produces a syntax error since it consists of a "dotdot" token followed by +a symbol, which is not valid syntax, akin to something like ")a" or ".a". + +The notation cannot occur in the dot position; that is, the syntax (a . b .. c) +is invalid. The dotdot operator can only be used between the non-dot-position +elements of a list. + .SS The DWIM Brackets TXR Lisp has a square bracket notation. The syntax [...] is a shorthand @@ -10525,7 +10539,11 @@ Description: The lisp-parse function converts text denoting TXR Lisp structure, into the corresponding data structure. The argument may be either a character string, or a stream. The source must provide the syntax of one complete Lisp -object, without any stray tokens after that object. +object. + +Multiple calls to lisp-parse on the same stream will extract successive objects +from the stream. To parse successive objects from a string, it is necessary +to convert it to a string stream. The optional argument can be used to specify a stream to which parse errors diagnostics are sent. If absent, the diagnostics are suppressed. -- cgit v1.2.3