From b177dea74e5280ccaeeba4bd90fdf6d7967035f1 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 29 Sep 2011 12:12:32 -0700 Subject: * match.c (chars_k): New variable. (match_line): Keyword arguments in coll implemented. (match_init): chars_k variable initialized. * parser.l (COLL): Lexical syntax changed to allow for argument material. * parser.y (elem): Coll syntax rewritten for arguments. * txr.1: Updated. --- ChangeLog | 13 +++++++++ match.c | 98 ++++++++++++++++++++++++++++++++++++++++++--------------------- parser.l | 4 +-- parser.y | 6 ++-- txr.1 | 9 ++++++ 5 files changed, 92 insertions(+), 38 deletions(-) diff --git a/ChangeLog b/ChangeLog index adb6e2d1..3d0554d2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2011-09-29 Kaz Kylheku + + * match.c (chars_k): New variable. + (match_line): Keyword arguments in coll implemented. + (match_init): chars_k variable initialized. + + * parser.l (COLL): Lexical syntax changed to allow for + argument material. + + * parser.y (elem): Coll syntax rewritten for arguments. + + * txr.1: Updated. + 2011-09-28 Kaz Kylheku * match.c (mingap_k, maxgap_k, gap_k, times_k, lines_k): New diff --git a/match.c b/match.c index 53177865..68f05600 100644 --- a/match.c +++ b/match.c @@ -47,7 +47,7 @@ int output_produced; -val mingap_k, maxgap_k, gap_k, times_k, lines_k; +val mingap_k, maxgap_k, gap_k, times_k, lines_k, chars_k; static void debugf(val fmt, ...) { @@ -475,53 +475,84 @@ static val match_line(val bindings, val specline, val dataline, } else if (directive == coll_s) { val coll_specline = second(elem); val until_specline = third(elem); + val args = fourth(elem); val bindings_coll = nil; + val max = getplist(args, maxgap_k); + val min = getplist(args, mingap_k); + val gap = getplist(args, gap_k); + val times = getplist(args, times_k); + val chars = getplist(args, chars_k); + cnum cmax = nump(gap) ? c_num(gap) : (nump(max) ? c_num(max) : 0); + cnum cmin = nump(gap) ? c_num(gap) : (nump(min) ? c_num(min) : 0); + cnum mincounter = cmin, maxcounter = 0; + cnum timescounter = 0, charscounter = 0; + cnum ctimes = nump(times) ? c_num(times) : 0; + cnum cchars = nump(chars) ? c_num(chars) : 0; val iter; + if ((times && ctimes == 0) || (chars && cchars == 0)) + break; + for (;;) { - cons_bind (new_bindings, new_pos, - match_line(bindings, coll_specline, dataline, pos, - spec_lineno, data_lineno, file)); + if ((gap || min) && mincounter < cmin) + goto next_coll; + + if (chars && charscounter++ >= cchars) + break; - if (until_specline) { - cons_bind (until_bindings, until_pos, - match_line(bindings, until_specline, dataline, pos, + { + cons_bind (new_bindings, new_pos, + match_line(bindings, coll_specline, dataline, pos, spec_lineno, data_lineno, file)); - if (until_pos) { - (void) until_bindings; - LOG_MATCH("until", until_pos); - break; - } else { - LOG_MISMATCH("until"); + if (until_specline) { + cons_bind (until_bindings, until_pos, + match_line(bindings, until_specline, dataline, pos, + spec_lineno, data_lineno, file)); + + if (until_pos) { + (void) until_bindings; + LOG_MATCH("until", until_pos); + break; + } else { + LOG_MISMATCH("until"); + } } - } - if (new_pos) { - LOG_MATCH("coll", new_pos); + if (new_pos) { + LOG_MATCH("coll", new_pos); - for (iter = new_bindings; iter && iter != bindings; - iter = cdr(iter)) - { - val binding = car(iter); - val existing = assoc(bindings_coll, car(binding)); + for (iter = new_bindings; iter && iter != bindings; + iter = cdr(iter)) + { + val binding = car(iter); + val existing = assoc(bindings_coll, car(binding)); - bindings_coll = acons_new(bindings_coll, car(binding), - cons(cdr(binding), cdr(existing))); + bindings_coll = acons_new(bindings_coll, car(binding), + cons(cdr(binding), cdr(existing))); + } } - } - if (new_pos && !equal(new_pos, pos)) { - pos = new_pos; - bug_unless (length_str_ge(dataline, pos)); - } else { - pos = plus(pos, one); - } + if (new_pos && !equal(new_pos, pos)) { + pos = new_pos; + bug_unless (length_str_ge(dataline, pos)); - if (length_str_le(dataline, pos)) - break; - } + if (times && ++timescounter >= ctimes) + break; + mincounter = 0; + maxcounter = 0; + } else { +next_coll: + mincounter++; + if ((gap || max) && ++maxcounter > cmax) + break; + pos = plus(pos, one); + } + if (length_str_le(dataline, pos)) + break; + } + } if (!bindings_coll) debuglf(spec_lineno, lit("nothing was collected"), nao); @@ -1960,4 +1991,5 @@ void match_init(void) gap_k = intern(lit("gap"), keyword_package); times_k = intern(lit("times"), keyword_package); lines_k = intern(lit("lines"), keyword_package); + chars_k = intern(lit("chars"), keyword_package); } diff --git a/parser.l b/parser.l index 9f40dec8..991df970 100644 --- a/parser.l +++ b/parser.l @@ -275,8 +275,8 @@ UONLY {U2}{U}|{U3}{U}{U}|{U4}{U}{U}{U} return COLLECT; } -\({WS}coll{WS}\) { - yy_pop_state(); +\({WS}coll/{ID_END} { + yy_push_state(NESTED); return COLL; } diff --git a/parser.y b/parser.y index 0972c100..17ad883a 100644 --- a/parser.y +++ b/parser.y @@ -201,9 +201,9 @@ elem : TEXT { $$ = string_own($1); } | list { $$ = $1; } | regex { $$ = cons(regex_compile(rest($1)), rest($1)); } - | COLL elems END { $$ = list(coll_s, $2, nao); } - | COLL elems - UNTIL elems END { $$ = list(coll_s, $2, $4, nao); } + | COLL exprs_opt ')' elems END { $$ = list(coll_s, $4, nil, $2, nao); } + | COLL exprs_opt ')' elems + UNTIL elems END { $$ = list(coll_s, $4, $6, $2, nao); } | COLL error { $$ = nil; yybadtoken(yychar, lit("coll clause")); } ; diff --git a/txr.1 b/txr.1 index 1caadf71..0a5bcad1 100644 --- a/txr.1 +++ b/txr.1 @@ -1643,6 +1643,15 @@ Note that the @(end) is followed by a semicolon. That's because when the @(until) clause meets a match, the matching material is not consumed. +.SS Coll Keyword Parameters + +The @(coll) directive takes most of the same parameters as @(collect). +See the section Collect Keyword Parameters above. +So for instance @(coll :gap 0) means that the collects must be +consecutive, and @(coll :times 2) means that (at most) two matches +will be collected. The :lines keyword does not exist, but there is +an analogous :chars keyword. + .SS The Flatten Directive. The flatten directive can be used to convert variables to one dimensional -- cgit v1.2.3