From e7c55ed9b8ff89310dc08b98eb03fc35ef280e41 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Wed, 13 Sep 2017 06:34:31 -0700 Subject: regex: re-introduce nfa_accept states. The nfa_accept state label is re-introduced. This state type has the same representation as nfa_empty; essentially, this replaces the flag. This makes the state type smaller, and we don't have to access the flag to tell if a state is an acceptance state. * regex.c (nfa_kind_t): New enum label, nfa_accept. (struct nfa_state_empty): Member accept removed. (nfa_accept_state_p): Macro tests only for nfa_accept type. (nfa_empty_state_p): New macro. (nfa_state_accept): Set type of new state to nfa_accept; do not set accept flag. (nfa_state_empty): Do not set accept flag. (nfa_state_empty_convert): Do not clear accept flag. (nfa_map_states): Handle nfa_accept in switch, in the same case as nfa_empty. (nfa_thread_epsilons): Don't test for accept state in nfa_empty case; it would be always false now. Add nfa_accept case to switch which only arranges for a traversal of the two transitions. (Though these are expected to be null at the stage of the graph when this function is applied). (nfa_fold_accept): Switch type to nfa_accept rather than setting accept flag. (nfa_closure, nfa_move_closure): Use new macro for testing whether a state is empty. --- regex.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/regex.c b/regex.c index 61f4b5e9..38e8c1ac 100644 --- a/regex.c +++ b/regex.c @@ -190,7 +190,7 @@ typedef union char_set { } char_set_t; typedef enum { - nfa_empty, nfa_wild, nfa_single, nfa_set + nfa_empty, nfa_accept, nfa_wild, nfa_single, nfa_set } nfa_kind_t; struct nfa_state_any { @@ -203,7 +203,6 @@ struct nfa_state_empty { unsigned visited; nfa_state_t *trans0; nfa_state_t *trans1; - int accept; }; struct nfa_state_single { @@ -227,7 +226,9 @@ union nfa_state { struct nfa_state_set s; }; -#define nfa_accept_state_p(s) ((s)->a.kind == nfa_empty && (s)->e.accept) +#define nfa_accept_state_p(s) ((s)->a.kind == nfa_accept) +#define nfa_empty_state_p(s) ((s)->a.kind == nfa_accept || \ + (s)->a.kind == nfa_empty) struct nfa_machine { int is_nfa; /* common member */ @@ -812,10 +813,9 @@ static struct cobj_ops char_set_obj_ops = cobj_ops_init(eq, static nfa_state_t *nfa_state_accept(void) { nfa_state_t *st = coerce(nfa_state_t *, chk_malloc(sizeof *st)); - st->e.kind = nfa_empty; + st->e.kind = nfa_accept; st->e.visited = 0; st->e.trans0 = st->e.trans1 = 0; - st->e.accept = 1; return st; } @@ -826,7 +826,6 @@ static nfa_state_t *nfa_state_empty(nfa_state_t *t0, nfa_state_t *t1) st->e.visited = 0; st->e.trans0 = t0; st->e.trans1 = t1; - st->e.accept = 0; return st; } @@ -887,7 +886,6 @@ static void nfa_state_empty_convert(nfa_state_t *acc, nfa_state_t *t0, acc->e.kind = nfa_empty; acc->e.trans0 = t0; acc->e.trans1 = t1; - acc->e.accept = 0; } /* @@ -1071,6 +1069,7 @@ static void nfa_map_states(nfa_state_t *s, switch (s->a.kind) { case nfa_empty: + case nfa_accept: nfa_map_states(s->e.trans0, ctx, fun, visited); nfa_map_states(s->e.trans1, ctx, fun, visited); break; @@ -1144,8 +1143,6 @@ static void nfa_thread_epsilons(nfa_state_t **ps, unsigned visited) switch (s->a.kind) { case nfa_empty: - if (nfa_accept_state_p(s)) - break; if (s->e.trans0 && s->e.trans1) { ps0 = &s->e.trans0; ps1 = &s->e.trans1; @@ -1159,6 +1156,10 @@ static void nfa_thread_epsilons(nfa_state_t **ps, unsigned visited) *ps = 0; } break; + case nfa_accept: + ps0 = &s->e.trans0; + ps1 = &s->e.trans1; + break; case nfa_single: case nfa_wild: case nfa_set: @@ -1185,12 +1186,12 @@ static void nfa_fold_accept(nfa_state_t *s, mem_t *ctx) nfa_state_t *e1 = s->e.trans1; if (e0 && nfa_accept_state_p(e0)) { - s->e.accept = 1; + s->a.kind = nfa_accept; s->e.trans0 = 0; } if (e1 && nfa_accept_state_p(e1)) { - s->e.accept = 1; + s->a.kind = nfa_accept; s->e.trans1 = 0; } } @@ -1274,7 +1275,7 @@ static int nfa_closure(nfa_state_t **stack, nfa_state_t **set, int nin, /* Only states of type nfa_empty are interesting. Each such state at most two epsilon transitions. */ - if (top->a.kind == nfa_empty) { + if (nfa_empty_state_p(top)) { nfa_state_t *e0 = top->e.trans0; nfa_state_t *e1 = top->e.trans1; @@ -1362,7 +1363,7 @@ static int nfa_move_closure(nfa_state_t **stack, nfa_state_t **set, int nin, /* Only states of type nfa_empty are interesting. Each such state at most two epsilon transitions. */ - if (top->a.kind == nfa_empty) { + if (nfa_empty_state_p(top)) { nfa_state_t *e0 = top->e.trans0; nfa_state_t *e1 = top->e.trans1; -- cgit v1.2.3