From f4a6c56c8e8841c1991c1bb44546681ccbdb8f3a Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 25 Mar 2014 22:10:31 -0700 Subject: * eval.c (me_quasilist): New static function. (eval_init): Register me_quasilist as quasilist macro expander. * lib.c (quasilist_s): New global variable. (obj_init): quasilist_s initialized. * lib.h (quasilist_s): Declared. * match.c (do_txreval): Handle quasilist syntax. * parser.l (QWLIT): New exclusive state. Extend lexical grammar to transition to QWLIT state upon the #` or #*` sequence which kicks off a word literal, and in that state, piecewise lexically analyze the QLL, mostly by borrowing rules from quasiliterals. * parser.y (QWORDS, QWSPLICE): New tokens. (n_exprs): Integrate splicing form of QLL syntax. (n_expr): Integrate non-splicing form of QLL syntax. (litchars): Propagate line number info. (quasilit): Fix "string literal" wording in error message. * txr.1: Introduced WLL abbreviation for word list literals, cleaned up the text a little, and documented QLL's. --- ChangeLog | 27 ++++++++++++++++++++ eval.c | 6 +++++ lib.c | 4 ++- lib.h | 3 ++- match.c | 18 ++++++++++--- parser.l | 88 +++++++++++++++++++++++++++++++++++++++++---------------------- parser.y | 27 ++++++++++++++++---- txr.1 | 58 +++++++++++++++++++++++++++++++++-------- 8 files changed, 179 insertions(+), 52 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2ba9aa05..76c2ee25 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,30 @@ +2014-03-25 Kaz Kylheku + + * eval.c (me_quasilist): New static function. + (eval_init): Register me_quasilist as quasilist macro expander. + + * lib.c (quasilist_s): New global variable. + (obj_init): quasilist_s initialized. + + * lib.h (quasilist_s): Declared. + + * match.c (do_txreval): Handle quasilist syntax. + + * parser.l (QWLIT): New exclusive state. + Extend lexical grammar to transition to QWLIT state upon + the #` or #*` sequence which kicks off a word literal, + and in that state, piecewise lexically analyze the QLL, + mostly by borrowing rules from quasiliterals. + + * parser.y (QWORDS, QWSPLICE): New tokens. + (n_exprs): Integrate splicing form of QLL syntax. + (n_expr): Integrate non-splicing form of QLL syntax. + (litchars): Propagate line number info. + (quasilit): Fix "string literal" wording in error message. + + * txr.1: Introduced WLL abbreviation for word list literals, + cleaned up the text a little, and documented QLL's. + 2014-03-25 Kaz Kylheku * eval.c (expand_quasi): Bugfix: incorrect logic, failing diff --git a/eval.c b/eval.c index 5c783371..e97432cc 100644 --- a/eval.c +++ b/eval.c @@ -2048,6 +2048,11 @@ static val me_until(val form, val menv) rest(rest(form)), nao)); } +static val me_quasilist(val form, val menv) +{ + return cons(list_s, cdr(form)); +} + val expand_forms(val form, val menv) { if (atom(form)) { @@ -3214,6 +3219,7 @@ void eval_init(void) reg_mac(intern(lit("unless"), user_package), me_unless); reg_mac(intern(lit("while"), user_package), me_while); reg_mac(intern(lit("until"), user_package), me_until); + reg_mac(quasilist_s, me_quasilist); reg_fun(cons_s, func_n2(cons)); reg_fun(intern(lit("make-lazy-cons"), user_package), func_n1(make_lazy_cons)); diff --git a/lib.c b/lib.c index def54e2f..edeb8737 100644 --- a/lib.c +++ b/lib.c @@ -76,7 +76,8 @@ val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; val nongreedy_s, compiled_regex_s; val quote_s, qquote_s, unquote_s, splice_s; val sys_qquote_s, sys_unquote_s, sys_splice_s; -val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s; +val zeroplus_s, optional_s, compl_s, compound_s; +val or_s, and_s, quasi_s, quasilist_s; val skip_s, trailer_s, block_s, next_s, freeform_s, fail_s, accept_s; val all_s, some_s, none_s, maybe_s, cases_s, collect_s, until_s, coll_s; val define_s, output_s, single_s, first_s, last_s, empty_s; @@ -5218,6 +5219,7 @@ static void obj_init(void) or_s = intern(lit("or"), user_package); and_s = intern(lit("and"), user_package); quasi_s = intern(lit("quasi"), system_package); + quasilist_s = intern(lit("quasilist"), system_package); skip_s = intern(lit("skip"), user_package); trailer_s = intern(lit("trailer"), user_package); block_s = intern(lit("block"), user_package); diff --git a/lib.h b/lib.h index 702ef585..cf7f3413 100644 --- a/lib.h +++ b/lib.h @@ -330,7 +330,8 @@ extern val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; extern val nongreedy_s, compiled_regex_s; extern val quote_s, qquote_s, unquote_s, splice_s; extern val sys_qquote_s, sys_unquote_s, sys_splice_s; -extern val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s; +extern val zeroplus_s, optional_s, compl_s, compound_s; +extern val or_s, and_s, quasi_s, quasilist_s; extern val skip_s, trailer_s, block_s, next_s, freeform_s, fail_s, accept_s; extern val all_s, some_s, none_s, maybe_s, cases_s, collect_s, until_s, coll_s; extern val define_s, output_s, single_s, first_s, last_s, empty_s; diff --git a/match.c b/match.c index 9b2a6900..ae37a56d 100644 --- a/match.c +++ b/match.c @@ -1449,19 +1449,29 @@ static val do_txeval(val spec, val form, val bindings, val allow_unbound) ret = cdr(binding); } } else if (consp(form)) { - if (first(form) == quasi_s) { + val sym = first(form); + if (sym == quasi_s) { uw_env_begin; uw_set_match_context(cons(spec, bindings)); ret = cat_str(subst_vars(rest(form), bindings, nil), nil); uw_env_end; - } else if (regexp(car(form))) { + } else if (sym == quasilist_s) { + uw_env_begin; + val iter; + list_collect_decl (out, tail); + uw_set_match_context(cons(spec, bindings)); + for (iter = rest(form); iter != nil; iter = cdr(iter)) + list_collect(tail, subst_vars(cdr(car(iter)), bindings, nil)); + ret = out; + uw_env_end; + } else if (regexp(sym)) { ret = form; - } else if (first(form) == var_s) { + } else if (sym == var_s) { uw_env_begin; uw_set_match_context(cons(spec, bindings)); ret = eval(second(form), make_env(bindings, nil, nil), form); uw_env_end; - } else if (first(form) == expr_s) { + } else if (sym == expr_s) { uw_env_begin; uw_set_match_context(cons(spec, bindings)); ret = eval(rest(form), make_env(bindings, nil, nil), form); diff --git a/parser.l b/parser.l index 3bd3436a..15e7958b 100644 --- a/parser.l +++ b/parser.l @@ -190,7 +190,7 @@ UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} -%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT +%x SPECIAL BRACED NESTED REGEX STRLIT CHRLIT QSILIT QSPECIAL WLIT QWLIT %% @@ -198,7 +198,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(10)); @@ -209,7 +210,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext + 2)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(16)); @@ -220,7 +222,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext + 2)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(8)); @@ -231,7 +234,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext + 2)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(2)); @@ -242,7 +246,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = flo_str(str); @@ -253,7 +258,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} val str = string_own(utf8_dup_from(yytext)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = flo_str(str); @@ -268,48 +274,53 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} yyerrorf(lit("trailing junk in floating-point literal: ~a"), str, nao); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = flo_str(str); return NUMBER; } -@{NUM} { +@{NUM} { val str = string_own(utf8_dup_from(yytext + 1)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(10)); return METANUM; } -@{XNUM} { +@{XNUM} { val str = string_own(utf8_dup_from(yytext + 3)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(16)); return METANUM; } -@{ONUM} { +@{ONUM} { val str = string_own(utf8_dup_from(yytext + 3)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(8)); return METANUM; } -@{BNUM} { +@{BNUM} { val str = string_own(utf8_dup_from(yytext + 3)); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.val = int_str(str, num(2)); return METANUM; @@ -319,7 +330,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} {BTOK} | {NTOK} { if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); yylval.lexeme = utf8_dup_from(yytext); @@ -537,7 +549,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} [}] { yy_pop_state(); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); return yytext[0]; } @@ -545,7 +558,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} [)\]] { yy_pop_state(); if (yy_top_state() == INITIAL - || yy_top_state() == QSILIT) + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); return yytext[0]; } @@ -584,6 +598,16 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return WSPLICE; } +#\` { + yy_push_state(QWLIT); + return QWORDS; +} + +#\*\` { + yy_push_state(QWLIT); + return QWSPLICE; +} + # { return '#'; } @@ -776,26 +800,26 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return yytext[0]; } -` { +\` { yy_pop_state(); return yytext[0]; } -[\\][abtnvfre "`'\\ ] { +[\\][abtnvfre "`'\\ ] { yylval.chr = char_esc(yytext[1]); return LITCHAR; } -{WS}[\\]\n{WS} { +{WS}[\\]\n{WS} { lineno++; } -[\\](x{HEX}+|{OCT}+);? { +[\\](x{HEX}+|{OCT}+);? { yylval.chr = num_esc(yytext+1); return LITCHAR; } -[\\]. { +[\\]. { yyerrorf(lit("unrecognized escape: \\~a"), chr(yytext[1]), nao); } @@ -835,27 +859,27 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return ERRTOK; } -\n { +\n { lineno++; - return '\n'; + return ' '; } -@ { +@ { yy_push_state(QSPECIAL); } -{WS} { +{WS} { return ' '; } -{UANYN} { +{UANYN} { wchar_t buf[8]; utf8_from(buf, yytext); yylval.chr = buf[0]; return LITCHAR; } -. { +. { yyerrprepf(lit("non-UTF-8 byte in literal: '\\x~02x'"), num((unsigned char) yytext[0]), nao); return ERRTOK; @@ -871,7 +895,9 @@ void end_of_regex(void) yy_pop_state(); if (YYSTATE != INITIAL) { - if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) + if (yy_top_state() == INITIAL + || yy_top_state() == QSILIT + || yy_top_state() == QWLIT) yy_pop_state(); } } diff --git a/parser.y b/parser.y index f9931541..ff3ff944 100644 --- a/parser.y +++ b/parser.y @@ -79,7 +79,7 @@ static val parsed_spec; %token MOD MODLAST DEFINE TRY CATCH FINALLY %token ERRTOK /* deliberately not used in grammar */ %token HASH_BACKSLASH HASH_SLASH DOTDOT HASH_H -%token WORDS WSPLICE +%token WORDS WSPLICE QWORDS QWSPLICE %token SECRET_ESCAPE_R SECRET_ESCAPE_E %token NUMBER METANUM @@ -102,7 +102,7 @@ static val parsed_spec; %type regex lisp_regex regexpr regbranch %type regterm regtoken regclass regclassterm regrange %type strlit chrlit quasilit quasi_items quasi_item litchars wordslit -%type not_a_clause +%type wordsqlit not_a_clause %type regchar %type '(' '[' '@' @@ -757,6 +757,9 @@ n_exprs : n_expr { $$ = rlcp(cons($1, nil), $1); } | WSPLICE wordslit { $$ = rl($2, num($1)); } | WSPLICE wordslit n_exprs { $$ = nappend2(rl($2, num($1)), $3); } + | QWSPLICE wordsqlit { $$ = rl($2, num($1)); } + | QWSPLICE wordsqlit + n_exprs { $$ = nappend2(rl($2, num($1)), $3); } ; n_expr : SYMTOK { $$ = sym_helper($1, t); } @@ -773,6 +776,7 @@ n_expr : SYMTOK { $$ = sym_helper($1, t); } | strlit { $$ = $1; } | quasilit { $$ = $1; } | WORDS wordslit { $$ = rl($2, num($1)); } + | QWORDS wordsqlit { $$ = rl(cons(quasilist_s, $2), num($1)); } | '\'' n_expr { $$ = rlcp(list(quote_s, $2, nao), $2); } | '^' n_expr { $$ = rlcp(list(sys_qquote_s, $2, nao), $2); } | ',' n_expr { $$ = rlcp(list(sys_unquote_s, $2, nao), $2); } @@ -911,7 +915,7 @@ quasilit : '`' '`' { $$ = null_string; } rlcp($$, $2); rl($$, num(lineno)); } | '`' error { $$ = nil; - yybadtoken(yychar, lit("string literal")); } + yybadtoken(yychar, lit("quasistring")); } ; quasi_items : quasi_item { $$ = cons($1, nil); @@ -934,13 +938,24 @@ litchars : LITCHAR { $$ = rl(cons(chr($1), nil), num(lineno)); } wordslit : '"' { $$ = nil; } | ' ' wordslit { $$ = $2; } - | '\n' wordslit { $$ = $2; } | litchars wordslit { val word = lit_char_helper($1); $$ = rlcp(cons(word, $2), $1); } | error { $$ = nil; - yybadtoken(yychar, lit("word literal")); } + yybadtoken(yychar, lit("word list")); } ; +wordsqlit : '`' { $$ = nil; } + | ' ' wordsqlit { $$ = $2; } + | quasi_items '`' { val qword = cons(quasi_s, + o_elems_transform($1)); + $$ = rlcp(cons(qword, nil), $1); } + | quasi_items ' ' + wordsqlit + { val qword = cons(quasi_s, + o_elems_transform($1)); + $$ = rlcp(cons(qword, $3), $1); } + ; + not_a_clause : ALL { $$ = make_expr(all_s, nil, num(lineno)); } | SOME { $$ = make_expr(some_s, nil, num(lineno)); } | NONE { $$ = make_expr(none_s, nil, num(lineno)); } @@ -1345,6 +1360,8 @@ void yybadtoken(int tok, val context) case HASH_H: problem = lit("#H"); break; case WORDS: problem = lit("#\""); break; case WSPLICE: problem = lit("#*\""); break; + case QWORDS: problem = lit("#`"); break; + case QWSPLICE: problem = lit("#*`"); break; } if (problem != 0) diff --git a/txr.1 b/txr.1 index 5f281f2d..c7e9fac8 100644 --- a/txr.1 +++ b/txr.1 @@ -1131,16 +1131,16 @@ The first string literal is the string "foobar". The second two are "foo bar". .SS Word List Literals -A word list literal provides a convenient way to write a list of strings +A word list literal (WLL) provides a convenient way to write a list of strings when such a list can be given as whitespace-delimited words. -There are two flavors of the word list literal: the regular word list -literal which begins with #" (hash, double-quote) and the splicing -list literal which begins with #*" (hash, star, double-quote). +There are two flavors of the WLL: the regular WLL which begins with #" (hash, +double-quote) and the splicing list literal which begins with #*" (hash, star, +double-quote). -Both literals are terminated by a double quote, which may be escaped +Both types are terminated by a double quote, which may be escaped as \e" in order to include it as a character. All the escaping conventions -used in string literals can be used in words literals. +used in string literals can be used in word literals. Unlike in string literals, whitespace (tabs, spaces and newlines) is not significant in word literals: it separates words. Whitespace may be @@ -1156,9 +1156,9 @@ Example: #"abc\ def ghi" --> notates ("abc def" "ghi") -A splicing word literal differs from a word literal in that it deos not +A splicing word literal differs from a word literal in that it does not produce a list of string literals, but rather it produces a sequence of string -literal tokens that is merged into the surrounding syntax. +literals that is merged into the surrounding syntax. Example: @@ -1166,8 +1166,8 @@ Example: --> (1 2 3 "abc" "def" 4 5 ("abc" "def")) -The regular word list literal produced a single list object, but the splicing -word list literal expanded into multiple string literal objects. +The regular WLL produced a single list object, but the splicing +WLL expanded into multiple string literal objects. .SS String Quasiliterals @@ -1186,6 +1186,44 @@ the TXR pattern language when the quasiliteral occurs in the pattern language. Quasliterals can be split into multiple lines in the same way as ordinary string literals. +.SS Quasiword Lists Literals + +The quasiword list literals (QLL-s) are to quasiliterals what WLL-s are to +ordinary literals. (See the above section Word List Literals.) + +A QLL combines the convenience of the WLL +with the power of quasistrings. + +Just as in the case of WLL-s, there are two flavors of the +QLL: the regular QLL which begins with #` +(hash, backquote) and the splicing list literal which begins with #*` (hash, +star, backquote). + +Both types are terminated by a backquote, which may be escaped +as \e` in order to include it as a character. All the escaping conventions +used in quasiliterals can be used in QLL. + +Unlike in quasiliterals, whitespace (tabs, spaces and newlines) is not +significant in QLL: it separates words. Whitespace may be +escaped with a backslash in order to include it as a literal character. + +Note that the delimiting into words is done before the variable +substitution. If the variable a contains spaces, then #`@a` nevertheless +expands into a list of one item: the string derived from a. + +Example: + + #`abc @a ghi` --> notates (`abc` `@a` `ghi`) + + #`abc @d@e@f + ghi` --> notates (`abc` `@d@e@f` `ghi`) + + #`@a\ @b @c` --> notates (`@a @b` `@c`) + +A splicing QLL differs from an ordinary QLL in that it does not produce a list +of quasiliterals, but rather it produces a sequence of quasiliterals that is +merged into the surrounding syntax. + .SS Numbers TXR supports integers and floating-point numbers. -- cgit v1.2.3