aboutsummaryrefslogtreecommitdiffstats
path: root/support
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2020-04-26 16:59:01 +0300
committerArnold D. Robbins <arnold@skeeve.com>2020-04-26 16:59:01 +0300
commit461ed6d987ad5ab4e4fce811abd639f1d055a311 (patch)
tree085ceb46069298040e8f113661860e7cfe4675c2 /support
parent7de3b57a8a7e36acd718ab1313c25276607e9b41 (diff)
downloadegawk-461ed6d987ad5ab4e4fce811abd639f1d055a311.tar.gz
egawk-461ed6d987ad5ab4e4fce811abd639f1d055a311.tar.bz2
egawk-461ed6d987ad5ab4e4fce811abd639f1d055a311.zip
Speedup to dfa.c.
Diffstat (limited to 'support')
-rw-r--r--support/ChangeLog5
-rw-r--r--support/dfa.c65
2 files changed, 56 insertions, 14 deletions
diff --git a/support/ChangeLog b/support/ChangeLog
index 2e1dadf4..6098e40a 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,8 @@
+2020-04-26 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.c: Speed improvements from Norihiro Tanaka <noritnk@kcn.ne.jp>,
+ copied in from the bug-grep list.
+
2020-04-14 Arnold D. Robbins <arnold@skeeve.com>
* 5.1.0: Release tar ball made.
diff --git a/support/dfa.c b/support/dfa.c
index c088a92b..7e671127 100644
--- a/support/dfa.c
+++ b/support/dfa.c
@@ -485,6 +485,7 @@ struct dfa
bool fast; /* The DFA is fast. */
token utf8_anychar_classes[9]; /* To lower ANYCHAR in UTF-8 locales. */
mbstate_t mbs; /* Multibyte conversion state. */
+ bool epsilon;
/* The following are valid only if MB_CUR_MAX > 1. */
@@ -1612,13 +1613,21 @@ addtok_mb (struct dfa *dfa, token t, char mbprop)
dfa->parse.depth--;
break;
- case BACKREF:
- dfa->fast = false;
+ case BEGLINE:
+ case ENDLINE:
+ case BEGWORD:
+ case ENDWORD:
+ case LIMWORD:
+ case NOTLIMWORD:
+ case EMPTY:
+ dfa->epsilon = true;
FALLTHROUGH;
+
default:
- dfa->nleaves++;
- FALLTHROUGH;
- case EMPTY:
+ if (t == BACKREF)
+ dfa->fast = false;
+ if (t != EMPTY)
+ dfa->nleaves++;
dfa->parse.depth++;
break;
}
@@ -2294,14 +2303,15 @@ state_index (struct dfa *d, position_set const *s, int context)
constraint. Repeat exhaustively until no funny positions are left.
S->elems must be large enough to hold the result. */
static void
-epsclosure (struct dfa const *d)
+epsclosure (struct dfa const *d, position_set *backword)
{
position_set tmp;
alloc_position_set (&tmp, d->nleaves);
for (idx_t i = 0; i < d->tindex; i++)
if (d->follows[i].nelem > 0 && d->tokens[i] >= NOTCHAR
&& d->tokens[i] != BACKREF && d->tokens[i] != ANYCHAR
- && d->tokens[i] != MBCSET && d->tokens[i] < CSET)
+ && d->tokens[i] != MBCSET && d->tokens[i] < CSET
+ && d->tokens[i] != BEG)
{
unsigned int constraint;
switch (d->tokens[i])
@@ -2324,16 +2334,21 @@ epsclosure (struct dfa const *d)
case NOTLIMWORD:
constraint = NOTLIMWORD_CONSTRAINT;
break;
- default:
+ case EMPTY:
constraint = NO_CONSTRAINT;
break;
+ default:
+ abort ();
}
delete (i, &d->follows[i]);
- for (idx_t j = 0; j < d->tindex; j++)
- if (i != j && d->follows[j].nelem > 0)
- replace (&d->follows[j], i, &d->follows[i], constraint, &tmp);
+ for (idx_t j = 0; j < backword[i].nelem; j++)
+ replace (&d->follows[backword[i].elems[j].index], i, &d->follows[i],
+ constraint, &tmp);
+ for (idx_t j = 0; j < d->follows[i].nelem; j++)
+ replace (&backword[d->follows[i].elems[j].index], i, &backword[i],
+ NO_CONSTRAINT, &tmp);
}
free (tmp.elems);
}
@@ -2640,6 +2655,7 @@ dfaanalyze (struct dfa *d, bool searchflag)
/* Firstpos and lastpos elements. */
position *firstpos = posalloc;
position *lastpos = firstpos + d->nleaves;
+ position_set *backword = NULL;
position pos;
position_set tmp;
@@ -2672,6 +2688,9 @@ dfaanalyze (struct dfa *d, bool searchflag)
alloc_position_set (&merged, d->nleaves);
d->follows = xcalloc (d->tindex, sizeof *d->follows);
+ if (d->epsilon)
+ backword = xcalloc (d->tindex, sizeof *backword);
+
for (idx_t i = 0; i < d->tindex; i++)
{
switch (d->tokens[i])
@@ -2709,6 +2728,17 @@ dfaanalyze (struct dfa *d, bool searchflag)
case CAT:
/* Every element in the firstpos of the second argument is in the
follow of every element in the lastpos of the first argument. */
+ if (d->epsilon)
+ {
+ tmp.nelem = stk[-2].nlastpos;
+ tmp.elems = lastpos - stk[-1].nlastpos - stk[-2].nlastpos;
+ position *p = firstpos - stk[-1].nfirstpos;
+ for (idx_t j = 0; j < stk[-1].nfirstpos; j++)
+ {
+ merge (&tmp, &backword[p[j].index], &merged);
+ copy (&merged, &backword[p[j].index]);
+ }
+ }
{
tmp.nelem = stk[-1].nfirstpos;
tmp.elems = firstpos - stk[-1].nfirstpos;
@@ -2798,9 +2828,15 @@ dfaanalyze (struct dfa *d, bool searchflag)
#endif
}
- /* For each follow set that is the follow set of a real position, replace
- it with its epsilon closure. */
- epsclosure (d);
+ if (d->epsilon)
+ {
+ /* For each follow set that is the follow set of a real position,
+ replace it with its epsilon closure. */
+ epsclosure (d, backword);
+
+ for (idx_t i = 0; i < d->tindex; i++)
+ free (backword[i].elems);
+ }
dfaoptimize (d);
@@ -2862,6 +2898,7 @@ dfaanalyze (struct dfa *d, bool searchflag)
d->min_trcount++;
d->trcount = 0;
+ free (backword);
free (posalloc);
free (stkalloc);
free (merged.elems);