diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2020-04-26 16:59:01 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2020-04-26 16:59:01 +0300 |
commit | 461ed6d987ad5ab4e4fce811abd639f1d055a311 (patch) | |
tree | 085ceb46069298040e8f113661860e7cfe4675c2 /support | |
parent | 7de3b57a8a7e36acd718ab1313c25276607e9b41 (diff) | |
download | egawk-461ed6d987ad5ab4e4fce811abd639f1d055a311.tar.gz egawk-461ed6d987ad5ab4e4fce811abd639f1d055a311.tar.bz2 egawk-461ed6d987ad5ab4e4fce811abd639f1d055a311.zip |
Speedup to dfa.c.
Diffstat (limited to 'support')
-rw-r--r-- | support/ChangeLog | 5 | ||||
-rw-r--r-- | support/dfa.c | 65 |
2 files changed, 56 insertions, 14 deletions
diff --git a/support/ChangeLog b/support/ChangeLog index 2e1dadf4..6098e40a 100644 --- a/support/ChangeLog +++ b/support/ChangeLog @@ -1,3 +1,8 @@ +2020-04-26 Arnold D. Robbins <arnold@skeeve.com> + + * dfa.c: Speed improvements from Norihiro Tanaka <noritnk@kcn.ne.jp>, + copied in from the bug-grep list. + 2020-04-14 Arnold D. Robbins <arnold@skeeve.com> * 5.1.0: Release tar ball made. diff --git a/support/dfa.c b/support/dfa.c index c088a92b..7e671127 100644 --- a/support/dfa.c +++ b/support/dfa.c @@ -485,6 +485,7 @@ struct dfa bool fast; /* The DFA is fast. */ token utf8_anychar_classes[9]; /* To lower ANYCHAR in UTF-8 locales. */ mbstate_t mbs; /* Multibyte conversion state. */ + bool epsilon; /* The following are valid only if MB_CUR_MAX > 1. */ @@ -1612,13 +1613,21 @@ addtok_mb (struct dfa *dfa, token t, char mbprop) dfa->parse.depth--; break; - case BACKREF: - dfa->fast = false; + case BEGLINE: + case ENDLINE: + case BEGWORD: + case ENDWORD: + case LIMWORD: + case NOTLIMWORD: + case EMPTY: + dfa->epsilon = true; FALLTHROUGH; + default: - dfa->nleaves++; - FALLTHROUGH; - case EMPTY: + if (t == BACKREF) + dfa->fast = false; + if (t != EMPTY) + dfa->nleaves++; dfa->parse.depth++; break; } @@ -2294,14 +2303,15 @@ state_index (struct dfa *d, position_set const *s, int context) constraint. Repeat exhaustively until no funny positions are left. S->elems must be large enough to hold the result. */ static void -epsclosure (struct dfa const *d) +epsclosure (struct dfa const *d, position_set *backword) { position_set tmp; alloc_position_set (&tmp, d->nleaves); for (idx_t i = 0; i < d->tindex; i++) if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR && d->tokens[i] != BACKREF && d->tokens[i] != ANYCHAR - && d->tokens[i] != MBCSET && d->tokens[i] < CSET) + && d->tokens[i] != MBCSET && d->tokens[i] < CSET + && d->tokens[i] != BEG) { unsigned int constraint; switch (d->tokens[i]) @@ -2324,16 +2334,21 @@ epsclosure (struct dfa const *d) case NOTLIMWORD: constraint = NOTLIMWORD_CONSTRAINT; break; - default: + case EMPTY: constraint = NO_CONSTRAINT; break; + default: + abort (); } delete (i, &d->follows[i]); - for (idx_t j = 0; j < d->tindex; j++) - if (i != j && d->follows[j].nelem > 0) - replace (&d->follows[j], i, &d->follows[i], constraint, &tmp); + for (idx_t j = 0; j < backword[i].nelem; j++) + replace (&d->follows[backword[i].elems[j].index], i, &d->follows[i], + constraint, &tmp); + for (idx_t j = 0; j < d->follows[i].nelem; j++) + replace (&backword[d->follows[i].elems[j].index], i, &backword[i], + NO_CONSTRAINT, &tmp); } free (tmp.elems); } @@ -2640,6 +2655,7 @@ dfaanalyze (struct dfa *d, bool searchflag) /* Firstpos and lastpos elements. */ position *firstpos = posalloc; position *lastpos = firstpos + d->nleaves; + position_set *backword = NULL; position pos; position_set tmp; @@ -2672,6 +2688,9 @@ dfaanalyze (struct dfa *d, bool searchflag) alloc_position_set (&merged, d->nleaves); d->follows = xcalloc (d->tindex, sizeof *d->follows); + if (d->epsilon) + backword = xcalloc (d->tindex, sizeof *backword); + for (idx_t i = 0; i < d->tindex; i++) { switch (d->tokens[i]) @@ -2709,6 +2728,17 @@ dfaanalyze (struct dfa *d, bool searchflag) case CAT: /* Every element in the firstpos of the second argument is in the follow of every element in the lastpos of the first argument. */ + if (d->epsilon) + { + tmp.nelem = stk[-2].nlastpos; + tmp.elems = lastpos - stk[-1].nlastpos - stk[-2].nlastpos; + position *p = firstpos - stk[-1].nfirstpos; + for (idx_t j = 0; j < stk[-1].nfirstpos; j++) + { + merge (&tmp, &backword[p[j].index], &merged); + copy (&merged, &backword[p[j].index]); + } + } { tmp.nelem = stk[-1].nfirstpos; tmp.elems = firstpos - stk[-1].nfirstpos; @@ -2798,9 +2828,15 @@ dfaanalyze (struct dfa *d, bool searchflag) #endif } - /* For each follow set that is the follow set of a real position, replace - it with its epsilon closure. */ - epsclosure (d); + if (d->epsilon) + { + /* For each follow set that is the follow set of a real position, + replace it with its epsilon closure. */ + epsclosure (d, backword); + + for (idx_t i = 0; i < d->tindex; i++) + free (backword[i].elems); + } dfaoptimize (d); @@ -2862,6 +2898,7 @@ dfaanalyze (struct dfa *d, bool searchflag) d->min_trcount++; d->trcount = 0; + free (backword); free (posalloc); free (stkalloc); free (merged.elems); |