From 5ab2b46a0f5d6c73b6e81a0efc47c29d29928966 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 6 Oct 2011 22:20:16 -0700 Subject: Extending syntax to allow for @VAR and @(...) forms inside nested lists. This is in anticipation of future features. * lib.c (expr_s): New symbol variable. (obj_init): expr_s initialized. * lib.h (expr_s): Declared. * match.c (dest_bind): Now takes linenum. Tests for the meta-syntax denoted by the system symbols var_s and expr_s, and throws an error. (eval_form): Similar error checks added. Also, hack: do not add file and line number to an exception which begins with a '(' character; just re-throw it. This suppresses duplicate line number addition when this throw occurs across some nestings. (match_files): Updated calls to dest_bind. * parser.l (yybadtoken): Handle new token kind, METAVAR and METAPAR. (grammar): Refactoring among patterns: TOK broken into SYM and NUM, NTOK introduced, unused NUM_END removed. Rule for @( producing METAPAR in nested state. * parser.y (METAVAR, METAPAR): New tokens. (meta_expr): New nonterminal. (expr): meta_expr and META_VAR productions handled. --- ChangeLog | 28 ++++++++++++++++++++++++++++ lib.c | 3 ++- lib.h | 2 +- match.c | 41 ++++++++++++++++++++++++++++++++++------- parser.l | 26 +++++++++++++++++++++----- parser.y | 13 +++++++++++-- 6 files changed, 97 insertions(+), 16 deletions(-) diff --git a/ChangeLog b/ChangeLog index a7165fbb..7b3fe9c9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,31 @@ +2011-10-06 Kaz Kylheku + + Extending syntax to allow for @VAR and @(...) forms inside + nested lists. This is in anticipation of future features. + + * lib.c (expr_s): New symbol variable. + (obj_init): expr_s initialized. + + * lib.h (expr_s): Declared. + + * match.c (dest_bind): Now takes linenum. Tests for the meta-syntax + denoted by the system symbols var_s and expr_s, and throws an + error. + (eval_form): Similar error checks added. Also, hack: do not add + file and line number to an exception which begins with a '(' + character; just re-throw it. This suppresses duplicate line + number addition when this throw occurs across some nestings. + (match_files): Updated calls to dest_bind. + + * parser.l (yybadtoken): Handle new token kind, METAVAR and METAPAR. + (grammar): Refactoring among patterns: TOK broken into + SYM and NUM, NTOK introduced, unused NUM_END removed. + Rule for @( producing METAPAR in nested state. + + * parser.y (METAVAR, METAPAR): New tokens. + (meta_expr): New nonterminal. + (expr): meta_expr and META_VAR productions handled. + 2011-10-06 Kaz Kylheku Renaming the currying combinators according to new scheme. diff --git a/lib.c b/lib.c index b5562c54..ed1e737a 100644 --- a/lib.c +++ b/lib.c @@ -52,7 +52,7 @@ val system_package, keyword_package, user_package; val null, t, cons_s, str_s, chr_s, num_s, sym_s, pkg_s, fun_s, vec_s; val stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s; -val var_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; +val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; val nongreedy_s, compiled_regex_s; val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s; val skip_s, trailer_s, block_s, next_s, freeform_s, fail_s, accept_s; @@ -2165,6 +2165,7 @@ static void obj_init(void) lstr_s = intern(lit("lstr"), user_package); cobj_s = intern(lit("cobj"), user_package); var_s = intern(lit("var"), system_package); + expr_s = intern(lit("expr"), system_package); regex_s = intern(lit("regex"), system_package); nongreedy_s = intern(lit("nongreedy"), system_package); compiled_regex_s = intern(lit("compiled-regex"), system_package); diff --git a/lib.h b/lib.h index 403cf2ae..38717fe9 100644 --- a/lib.h +++ b/lib.h @@ -219,7 +219,7 @@ INLINE val chr(wchar_t ch) extern val keyword_package, system_package, user_package; extern val null, t, cons_s, str_s, chr_s, num_s, sym_s, pkg_s, fun_s, vec_s; extern val stream_s, hash_s, hash_iter_s, lcons_s, lstr_s, cobj_s; -extern val var_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; +extern val var_s, expr_s, regex_s, chset_s, set_s, cset_s, wild_s, oneplus_s; extern val nongreedy_s, compiled_regex_s; extern val zeroplus_s, optional_s, compl_s, compound_s, or_s, and_s, quasi_s; extern val skip_s, trailer_s, block_s, next_s, freeform_s, fail_s, accept_s; diff --git a/match.c b/match.c index a3c5b7ed..19b527b5 100644 --- a/match.c +++ b/match.c @@ -226,7 +226,7 @@ static val bindable(val obj) return (obj && symbolp(obj) && obj != t && !keywordp(obj)) ? t : nil; } -static val dest_bind(val bindings, val pattern, val value) +static val dest_bind(val linenum, val bindings, val pattern, val value) { if (symbolp(pattern)) { if (bindable(pattern)) { @@ -246,9 +246,22 @@ static val dest_bind(val bindings, val pattern, val value) } else if (consp(pattern)) { val piter = pattern, viter = value; + if (first(pattern) == var_s) { + uw_throwf(query_error_s, + lit("metavariable @~a syntax cannot be used here"), + second(pattern), nao); + } + + if (first(pattern) == expr_s) { + uw_throwf(query_error_s, + lit("the @~s syntax cannot be used here"), + rest(pattern), nao); + } + + while (consp(piter) && consp(viter)) { - bindings = dest_bind(bindings, car(piter), car(viter)); + bindings = dest_bind(linenum, bindings, car(piter), car(viter)); if (bindings == t) return t; piter = cdr(piter); @@ -256,7 +269,7 @@ static val dest_bind(val bindings, val pattern, val value) } if (bindable(piter)) { - bindings = dest_bind(bindings, piter, viter); + bindings = dest_bind(linenum, bindings, piter, viter); if (bindings == t) return t; } else { @@ -823,10 +836,16 @@ static val eval_form(val lineno, val form, val bindings) } else if (bindable(form)) { ret = assoc(bindings, form); } else if (consp(form)) { - if (car(form) == quasi_s) { + if (first(form) == quasi_s) { ret = cons(t, cat_str(subst_vars(rest(form), bindings, nil), nil)); } else if (regexp(car(form))) { ret = cons(t, form); + } else if (first(form) == var_s) { + sem_error(lineno, lit("metavariable @~a syntax cannot be used here"), + second(form), nao); + } else if (first(form) == expr_s) { + sem_error(lineno, lit("the @~s syntax cannot be used here"), + rest(form), nao); } else { val subforms = mapcar(curry_123_2(func_n3(eval_form), lineno, bindings), form); @@ -841,6 +860,12 @@ static val eval_form(val lineno, val form, val bindings) } uw_catch (exc_sym, exc) { + if (stringp(exc) && !equal(exc, lit("")) && + chr_str(exc, zero) == chr('(')) + { + uw_throw (exc_sym, exc); + } + sem_error(lineno, lit("~a"), exc, nao); } } @@ -1718,7 +1743,7 @@ repeat_spec_same_data: val form = second(args); val val = eval_form(spec_linenum, form, bindings); - bindings = dest_bind(bindings, pattern, cdr(val)); + bindings = dest_bind(spec_linenum, bindings, pattern, cdr(val)); if (bindings == t) return nil; @@ -1871,7 +1896,8 @@ repeat_spec_same_data: val value = car(viter); if (value) { - bindings = dest_bind(bindings, param, cdr(value)); + bindings = dest_bind(spec_linenum, bindings, + param, cdr(value)); if (bindings == t) { all_bind = nil; @@ -2077,7 +2103,8 @@ repeat_spec_same_data: if (symbolp(arg)) { val newbind = assoc(new_bindings, param); if (newbind) { - bindings = dest_bind(bindings, arg, cdr(newbind)); + bindings = dest_bind(spec_linenum, bindings, + arg, cdr(newbind)); if (bindings == t) { debuglf(spec_linenum, lit("binding mismatch on ~a " diff --git a/parser.l b/parser.l index 3b80d38c..6441eceb 100644 --- a/parser.l +++ b/parser.l @@ -94,6 +94,7 @@ void yybadtoken(int tok, val context) case TEXT: problem = lit("text"); break; case IDENT: problem = lit("identifier"); break; case KEYWORD: problem = lit("keyword"); break; + case METAVAR: problem = lit("metavar"); break; case ALL: problem = lit("\"all\""); break; case SOME: problem = lit("\"some\""); break; case NONE: problem = lit("\"none\""); break; @@ -120,6 +121,7 @@ void yybadtoken(int tok, val context) case NUMBER: problem = lit("\"number\""); break; case REGCHAR: problem = lit("regular expression character"); break; case LITCHAR: problem = lit("string literal character"); break; + case METAPAR: problem = lit("@("); break; } if (problem != 0) @@ -175,9 +177,11 @@ static wchar_t num_esc(char *num) %option nounput %option noinput -TOK :?[a-zA-Z_][a-zA-Z0-9_]*|[+-]?[0-9]+ +SYM [a-zA-Z_][a-zA-Z0-9_]* +NUM [+-]?[0-9]+ +TOK :?{SYM}|{NUM} +NTOK [:@]?{SYM}|{NUM} ID_END [^a-zA-Z0-9_] -NUM_END [^0-9] WS [\t ]* HEX [0-9A-Fa-f] OCT [0-7] @@ -190,14 +194,15 @@ U3 [\xe0-\xef] U4 [\xf0-\xf4] UANY {ASC}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} -UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} +UANYN {ASCN}|{U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} %x SPECIAL NESTED REGEX STRLIT CHRLIT QSILIT %% -{TOK} { +{TOK} | +{NTOK} { cnum val; char *errp; @@ -206,9 +211,15 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} || yy_top_state() == QSILIT) yy_pop_state(); - if (yytext[0] == ':') { + switch (yytext[0]) { + case ':': yylval.lexeme = utf8_dup_from(yytext + 1); return KEYWORD; + case '@': + yylval.lexeme = utf8_dup_from(yytext + 1); + return METAVAR; + default: + break; } errno = 0; @@ -349,11 +360,16 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return FINALLY; } +@\( | \{|\( { yy_push_state(NESTED); if (yy_top_state() == INITIAL || yy_top_state() == QSILIT) yy_pop_state(); + if (yytext[0] == '@') { + yylval.chr = '('; + return METAPAR; + } return yytext[0]; } diff --git a/parser.y b/parser.y index 3da23429..96fec8f6 100644 --- a/parser.y +++ b/parser.y @@ -58,12 +58,13 @@ static val parsed_spec; cnum num; } -%token TEXT IDENT KEYWORD ALL SOME NONE MAYBE CASES CHOOSE +%token TEXT IDENT KEYWORD METAVAR ALL SOME NONE MAYBE CASES CHOOSE %token AND OR END COLLECT %token UNTIL COLL OUTPUT REPEAT REP SINGLE FIRST LAST EMPTY DEFINE %token TRY CATCH FINALLY %token NUMBER %token REGCHAR LITCHAR +%token METAPAR %type spec clauses clauses_opt clause %type all_clause some_clause none_clause maybe_clause @@ -71,7 +72,7 @@ static val parsed_spec; %type clause_parts additional_parts %type output_clause define_clause try_clause catch_clauses_opt %type line elems_opt elems clause_parts_h additional_parts_h -%type elem var var_op +%type elem var var_op meta_expr %type list exprs exprs_opt expr out_clauses out_clauses_opt out_clause %type repeat_clause repeat_parts_opt o_line %type o_elems_opt o_elems_opt2 o_elems o_elem rep_elem rep_parts_opt @@ -491,6 +492,11 @@ list : '(' exprs ')' { $$ = $2; } yybadtoken(yychar, lit("list expression")); } ; +meta_expr : METAPAR exprs ')' { $$ = cons(expr_s, $2); } + | METAPAR ')' { $$ = cons(expr_s, nil); } + | METAPAR error { $$ = nil; + yybadtoken(yychar, lit("meta expression")); } + ; exprs : expr { $$ = cons($1, nil); } | expr exprs { $$ = cons($1, $2); } | expr '.' expr { $$ = cons($1, $3); } @@ -503,8 +509,11 @@ exprs_opt : exprs { $$ = $1; } expr : IDENT { $$ = intern(string_own($1), nil); } | KEYWORD { $$ = intern(string_own($1), keyword_package); } + | METAVAR { $$ = list(var_s, + intern(string_own($1), nil), nao); } | NUMBER { $$ = num($1); } | list { $$ = $1; } + | meta_expr { $$ = $1; } | regex { $$ = cons(regex_compile(rest($1)), rest($1)); } | chrlit { $$ = $1; } -- cgit v1.2.3