From bba32e4b7a4c936d45e43d7e4959d2a5bb92fba6 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Tue, 12 Sep 2017 20:15:42 -0700 Subject: regex: accept-folding optimization. In this optimization, we identify places in the NFA graph where empty states transition to accept states. We eliminate these transitions and turn the empty states into accept states. Accept states which thereby become unreachable from the start state are pruned away. * regex.c (nfa_fold_accept): New static function. (nfa_optimize): Add a pass which applies nfa_fold_accept to all states. --- regex.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/regex.c b/regex.c index 009bf7e0..aa57a326 100644 --- a/regex.c +++ b/regex.c @@ -1176,6 +1176,26 @@ static void nfa_thread_epsilons(nfa_state_t **ps, unsigned visited) } } +static void nfa_fold_accept(nfa_state_t *s, mem_t *ctx) +{ + (void) ctx; + + if (s->a.kind == nfa_empty) { + nfa_state_t *e0 = s->e.trans0; + nfa_state_t *e1 = s->e.trans1; + + if (e0 && nfa_accept_state_p(e0)) { + s->e.accept = 1; + s->e.trans0 = 0; + } + + if (e1 && nfa_accept_state_p(e1)) { + s->e.accept = 1; + s->e.trans1 = 0; + } + } +} + static void nfa_noop(nfa_state_t *s, mem_t *ctx) { (void) s; @@ -1196,6 +1216,9 @@ static nfa_t nfa_optimize(nfa_t nfa) /* Eliminate useless epsilons from graph. */ nfa_thread_epsilons(&nfa.start, nfa.start->a.visited + 1); + /* Fold accept states into empty transitions which reference them. */ + nfa_map_states(nfa.start, 0, nfa_fold_accept, nfa.start->a.visited + 1); + /* Visit all reachable states. */ nfa_map_states(nfa.start, 0, nfa_noop, nfa.start->a.visited + 1); -- cgit v1.2.3