diff options
-rw-r--r-- | ChangeLog | 19 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | awk.h | 9 | ||||
-rw-r--r-- | awkgram.c | 10 | ||||
-rw-r--r-- | awkgram.y | 10 | ||||
-rw-r--r-- | io.c | 17 | ||||
-rw-r--r-- | main.c | 2 | ||||
-rw-r--r-- | re.c | 60 | ||||
-rw-r--r-- | symbol.c | 6 |
9 files changed, 83 insertions, 52 deletions
@@ -1,3 +1,22 @@ +2017-01-04 Arnold Robbins <arnold@skeeve.com> + + Trade space for time for programs that toggle IGNORECASE a lot. + Brings 25% to 39% speedup. NODE does not actually grow in size. + + * awk.h (NODE::preg): Now an array of size two. + [CASE]: Flag no longer needed, so removed. + (IGNORECASE): Change type from int to bool. + * awkgram.y (make_regnode): Build two copies of the compiled regexp, + one without ignorecase, and one with. + * io.c (RS_re): Array replacing RS_re_yes_case and RS_re_no_case. + (set_RS): Use RS_re[IGNORECASE] as appropriate. Free and recompute + as needed. + * main.c (IGNORECASE): Change type from int to bool. + * re.c (re_update): Simplify the code. No need to check CASE flag + any longer. Recompute only if text of regexp changed. + * symbol.c (free_bc_internal): Adjust to free both elements of + m_re_reg. + 2017-01-18 Andrew J. Schorr <aschorr@telemetry-investments.com> * interpret.h (r_interpret): Increase robustness of the optimization @@ -98,6 +98,8 @@ Changes from 4.1.x to 4.2.0 recommend that you do so. Fortunately, the changes are fairly minor and straightforward. +24. Programs that toggle IGNORECASE a lot should now be noticeably faster. + Changes from 4.1.3 to 4.1.4 --------------------------- @@ -343,7 +343,7 @@ typedef struct exp_node { } l; union { struct exp_node *rptr; - Regexp *preg; + Regexp *preg[2]; struct exp_node **av; BUCKET **bv; void *aq; @@ -361,9 +361,8 @@ typedef struct exp_node { struct exp_node *rn; unsigned long cnt; unsigned long reflags; -# define CASE 1 -# define CONSTANT 2 -# define FS_DFLT 4 +# define CONSTANT 1 +# define FS_DFLT 2 } nodep; struct { @@ -1083,7 +1082,7 @@ extern long NF; extern long NR; extern long FNR; extern int BINMODE; -extern int IGNORECASE; +extern bool IGNORECASE; extern bool RS_is_null; extern char *OFS; extern int OFSlen; @@ -7422,8 +7422,14 @@ make_regnode(int type, NODE *exp) n->re_cnt = 1; if (type == Node_regex) { - n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false); - if (n->re_reg == NULL) { + n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, false); + if (n->re_reg[0] == NULL) { + freenode(n); + return NULL; + } + n->re_reg[1] = make_regexp(exp->stptr, exp->stlen, true, true, false); + if (n->re_reg[1] == NULL) { + refree(n->re_reg[0]); freenode(n); return NULL; } @@ -5002,8 +5002,14 @@ make_regnode(int type, NODE *exp) n->re_cnt = 1; if (type == Node_regex) { - n->re_reg = make_regexp(exp->stptr, exp->stlen, false, true, false); - if (n->re_reg == NULL) { + n->re_reg[0] = make_regexp(exp->stptr, exp->stlen, false, true, false); + if (n->re_reg[0] == NULL) { + freenode(n); + return NULL; + } + n->re_reg[1] = make_regexp(exp->stptr, exp->stlen, true, true, false); + if (n->re_reg[1] == NULL) { + refree(n->re_reg[0]); freenode(n); return NULL; } @@ -318,8 +318,7 @@ static long read_default_timeout; static struct redirect *red_head = NULL; static NODE *RS = NULL; -static Regexp *RS_re_yes_case; /* regexp for RS when ignoring case */ -static Regexp *RS_re_no_case; /* regexp for RS when not ignoring case */ +static Regexp *RS_re[2]; /* index 0 - don't ignore case, index 1, do */ static Regexp *RS_regexp; static const char nonfatal[] = "NONFATAL"; @@ -3870,7 +3869,7 @@ set_RS() * set_IGNORECASE() relies on this routine to call * set_FS(). */ - RS_regexp = (IGNORECASE ? RS_re_no_case : RS_re_yes_case); + RS_regexp = RS_re[IGNORECASE]; goto set_FS; } unref(save_rs); @@ -3882,9 +3881,9 @@ set_RS() * Please do not remerge the if condition; hinders memory deallocation * in case of fatal error in make_regexp. */ - refree(RS_re_yes_case); /* NULL argument is ok */ - refree(RS_re_no_case); - RS_re_yes_case = RS_re_no_case = RS_regexp = NULL; + refree(RS_re[0]); /* NULL argument is ok */ + refree(RS_re[1]); + RS_re[0] = RS_re[1] = RS_regexp = NULL; if (RS->stlen == 0) { RS_is_null = true; @@ -3892,9 +3891,9 @@ set_RS() } else if (RS->stlen > 1 && ! do_traditional) { static bool warned = false; - RS_re_yes_case = make_regexp(RS->stptr, RS->stlen, false, true, true); - RS_re_no_case = make_regexp(RS->stptr, RS->stlen, true, true, true); - RS_regexp = (IGNORECASE ? RS_re_no_case : RS_re_yes_case); + RS_re[0] = make_regexp(RS->stptr, RS->stlen, false, true, true); + RS_re[1] = make_regexp(RS->stptr, RS->stlen, true, true, true); + RS_regexp = RS_re[IGNORECASE]; matchrec = rsrescan; @@ -85,7 +85,7 @@ long NF; long NR; long FNR; int BINMODE; -int IGNORECASE; +bool IGNORECASE; char *OFS; char *ORS; char *OFMT; @@ -349,50 +349,48 @@ re_update(NODE *t) NODE *t1; if (t->type == Node_val && (t->flags & REGEX) != 0) - return t->typed_re->re_reg; - - if ((t->re_flags & CASE) == IGNORECASE) { - /* regex was compiled with settings matching IGNORECASE */ - if ((t->re_flags & CONSTANT) != 0) { - /* it's a constant, so just return it as is */ - assert(t->type == Node_regex); - return t->re_reg; - } - t1 = t->re_exp; - if (t->re_text != NULL) { - /* if contents haven't changed, just return it */ - if (cmp_nodes(t->re_text, t1, true) == 0) - return t->re_reg; - /* things changed, fall through to recompile */ - unref(t->re_text); - } - /* get fresh copy of the text of the regexp */ - t->re_text = dupnode(t1); + return t->typed_re->re_reg[IGNORECASE]; + + if ((t->re_flags & CONSTANT) != 0) { + /* it's a constant, so just return it as is */ + assert(t->type == Node_regex); + return t->re_reg[IGNORECASE]; } - /* was compiled with different IGNORECASE or text changed */ + t1 = t->re_exp; + if (t->re_text != NULL) { + /* if contents haven't changed, just return it */ + if (cmp_nodes(t->re_text, t1, true) == 0) + return t->re_reg[IGNORECASE]; + /* things changed, fall through to recompile */ + unref(t->re_text); + } + /* get fresh copy of the text of the regexp */ + t->re_text = dupnode(t1); + + /* text changed */ /* free old */ - if (t->re_reg != NULL) - refree(t->re_reg); + if (t->re_reg[0] != NULL) + refree(t->re_reg[0]); + if (t->re_reg[1] != NULL) + refree(t->re_reg[1]); if (t->re_cnt > 0) t->re_cnt++; if (t->re_cnt > 10) t->re_cnt = 0; - if (t->re_text == NULL || (t->re_flags & CASE) != IGNORECASE) { + if (t->re_text == NULL) { /* reset regexp text if needed */ t1 = t->re_exp; unref(t->re_text); t->re_text = dupnode(t1); } /* compile it */ - t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen, - IGNORECASE, t->re_cnt, true); - - /* clear case flag */ - t->re_flags &= ~CASE; - /* set current value of case flag */ - t->re_flags |= IGNORECASE; - return t->re_reg; + t->re_reg[0] = make_regexp(t->re_text->stptr, t->re_text->stlen, + false, t->re_cnt, true); + t->re_reg[1] = make_regexp(t->re_text->stptr, t->re_text->stlen, + true, t->re_cnt, true); + + return t->re_reg[IGNORECASE]; } /* resetup --- choose what kind of regexps we match */ @@ -881,8 +881,10 @@ free_bc_internal(INSTRUCTION *cp) case Op_match: case Op_nomatch: m = cp->memory; - if (m->re_reg != NULL) - refree(m->re_reg); + if (m->re_reg[0] != NULL) + refree(m->re_reg[0]); + if (m->re_reg[1] != NULL) + refree(m->re_reg[1]); if (m->re_exp != NULL) unref(m->re_exp); if (m->re_text != NULL) |