From 56cfdc348955495dbd11fc70fb7ac04a8bba1f71 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Wed, 13 Jan 2010 15:25:11 -0800 Subject: Dynamically determine which regex implementation to use: NFA or derivatives. The default behavior is NFA, with derivatives used if the regular expression contains uses of complement or intersection. The --dv-regex option forces derivatives always. --- ChangeLog | 17 +++++++++++++++++ regex.c | 32 ++++++++++++++++++++++++++++++-- txr.c | 4 ++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8f34644a..1a134673 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2010-01-13 Kaz Kylheku + + Dynamically determine which regex implementation to use: + NFA or derivatives. The default behavior is NFA, with + derivatives used if the regular expression contains + uses of complement or intersection. The --dv-regex + option forces derivatives always. + + * regex.c (opt_derivative_regex): Default value is 0 now. + (regex_requires_dv): New function. + (regex_compile): If regex_requires_dv function reports + true, or if the opt_derivative_regex flag is true, + treat the regex with the derivative-based implementation. + + * txr.c (txr_main): Implemented --dv-regex option + to set the opt_derivative_regex flag. + 2010-01-13 Kaz Kylheku * lib.h (c_num): Remove redundant declaration. diff --git a/regex.c b/regex.c index 147f03cb..a45339d5 100644 --- a/regex.c +++ b/regex.c @@ -183,7 +183,7 @@ union regex_machine { struct dv_machine d; }; -int opt_derivative_regex = 1; +int opt_derivative_regex = 0; static int L0_full(cset_L0_t *L0) { @@ -1336,9 +1336,37 @@ static cnum dv_run(val regex, const wchar_t *str) return last_accept_pos ? last_accept_pos - str : -1; } +static val regex_requires_dv(val exp) +{ + if (atom(exp)) { + return nil; + } else { + val sym = first(exp); + val args = rest(exp); + + if (sym == set_s || sym == cset_s) { + return nil; + } else if (sym == compound_s) { + return some_satisfy(args, func_n1(regex_requires_dv), nil); + } else if (sym == zeroplus_s || sym == oneplus_s || + sym == optional_s) { + return regex_requires_dv(first(args)); + } else if (sym == compl_s) { + return t; + } else if (sym == or_s) { + return if2(regex_requires_dv(first(args)) || + regex_requires_dv(second(args)), t); + } else if (sym == and_s) { + return t; + } else { + internal_error("bad operator in regex"); + } + } +} + val regex_compile(val regex_sexp) { - if (opt_derivative_regex) { + if (opt_derivative_regex || regex_requires_dv(regex_sexp)) { return cons(compiled_regex_s, cons(dv_compile_regex(regex_sexp), nil)); } else { nfa_t *pnfa = (nfa_t *) chk_malloc(sizeof *pnfa); diff --git a/txr.c b/txr.c index 16faa19a..ff1397f7 100644 --- a/txr.c +++ b/txr.c @@ -281,6 +281,10 @@ int txr_main(int argc, char **argv) prog_string, string_utf8(*argv), nao); return EXIT_FAILURE; #endif + } else if (!strcmp(*argv, "--dv-regex")) { + opt_derivative_regex = 1; + argv++, argc--; + continue; } { -- cgit v1.2.3