diff options
author | Corinna Vinschen <corinna@vinschen.de> | 2010-02-04 12:35:49 +0000 |
---|---|---|
committer | Corinna Vinschen <corinna@vinschen.de> | 2010-02-04 12:35:49 +0000 |
commit | e1e595a649d55a304416c1e8e23f99e0df58a452 (patch) | |
tree | cb83b739abde54551467f2b187416820bd25d54f /winsup/cygwin/regex/engine.c | |
parent | c8f7d3cb48efbcdeed082c16a6d5b90ae3c75791 (diff) | |
download | cygnal-e1e595a649d55a304416c1e8e23f99e0df58a452.tar.gz cygnal-e1e595a649d55a304416c1e8e23f99e0df58a452.tar.bz2 cygnal-e1e595a649d55a304416c1e8e23f99e0df58a452.zip |
Replace regex files with multibyte-aware version from FreeBSD.
* Makefile.in (install-headers): Remove extra command to install
regex.h.
(uninstall-headers): Remove extra command to uninstall regex.h.
* nlsfuncs.cc (collate_lcid): Make externally available to allow
access to collation internals from regex functions.
(collate_charset): Ditto.
* wchar.h: Add __cplusplus guards to make C-clean.
* include/regex.h: New file, replacing regex/regex.h. Remove UCB
advertising clause.
* regex/COPYRIGHT: Accommodate BSD license. Remove UCB advertising
clause.
* regex/cclass.h: Remove.
* regex/cname.h: New file from FreeBSD.
* regex/engine.c: Ditto.
(NONCHAR): Tweak for Cygwin.
* regex/engine.ih: Remove.
* regex/mkh: Remove.
* regex/regcomp.c: New file from FreeBSD. Tweak slightly for Cygwin.
Import required collate internals from nlsfunc.cc.
(p_ere_exp): Add GNU-specific \< and \> handling for word boundaries.
(p_simp_re): Ditto.
(__collate_range_cmp): Define.
(p_b_term): Use Cygwin-specific collate internals.
(findmust): Ditto.
* regex/regcomp.ih: Remove.
* regex/regerror.c: New file from FreeBSD. Fix a few compiler warnings.
* regex/regerror.ih: Remove.
* regex/regex.7: New file from FreeBSD. Remove UCB advertising clause.
* regex/regex.h: Remove. Replaced by include/regex.h.
* regex/regexec.c: New file from FreeBSD. Fix a few compiler warnings.
* regex/regfree.c: New file from FreeBSD.
* regex/tests: Remove.
* regex/utils.h: New file from FreeBSD.
Diffstat (limited to 'winsup/cygwin/regex/engine.c')
-rw-r--r-- | winsup/cygwin/regex/engine.c | 552 |
1 files changed, 365 insertions, 187 deletions
diff --git a/winsup/cygwin/regex/engine.c b/winsup/cygwin/regex/engine.c index 42c91dc0f..ca24cc50c 100644 --- a/winsup/cygwin/regex/engine.c +++ b/winsup/cygwin/regex/engine.c @@ -1,3 +1,41 @@ +/*- + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)engine.c 8.5 (Berkeley) 3/20/94 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds Exp $"); + /* * The matching engine and friends. This file is #included by regexec.c * after suitable #defines of a variety of macros used herein, so that @@ -27,25 +65,77 @@ #define at lat #define match lmat #endif +#ifdef MNAMES +#define matcher mmatcher +#define fast mfast +#define slow mslow +#define dissect mdissect +#define backref mbackref +#define step mstep +#define print mprint +#define at mat +#define match mmat +#endif /* another structure passed up and down to avoid zillions of parameters */ struct match { struct re_guts *g; int eflags; regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ - char *offp; /* offsets work from here */ - char *beginp; /* start of string -- virtual NUL precedes */ - char *endp; /* end of string -- virtual NUL here */ - char *coldp; /* can be no match starting before here */ - char **lastpos; /* [nplus+1] */ + const char *offp; /* offsets work from here */ + const char *beginp; /* start of string -- virtual NUL precedes */ + const char *endp; /* end of string -- virtual NUL here */ + const char *coldp; /* can be no match starting before here */ + const char **lastpos; /* [nplus+1] */ STATEVARS; states st; /* current states */ states fresh; /* states for a fresh start */ states tmp; /* temporary */ states empty; /* empty set of states */ + mbstate_t mbs; /* multibyte conversion state */ }; -#include "engine.ih" +/* ========= begin header generated by ./mkh ========= */ +#ifdef __cplusplus +extern "C" { +#endif + +/* === engine.c === */ +static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); +static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); +static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int); +static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); +static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); +static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft); +#define MAX_RECURSION 100 +#define BOL (OUT-1) +#define EOL (BOL-1) +#define BOLEOL (BOL-2) +#define NOTHING (BOL-3) +#define BOW (BOL-4) +#define EOW (BOL-5) +#define BADCHAR (BOL-6) +#ifdef __CYGWIN__ +/* In contrast to BSD, wint_t on Cygwin is unsigned. This breaks this test, + unless the compared values are casted to signed. */ +#define NONCHAR(c) ((int)(c) <= (int)OUT) +#else +#define NONCHAR(c) ((c) <= OUT) +#endif +#ifdef REDEBUG +static void print(struct match *m, const char *caption, states st, int ch, FILE *d); +#endif +#ifdef REDEBUG +static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst); +#endif +#ifdef REDEBUG +static const char *pchar(int ch); +#endif + +#ifdef __cplusplus +} +#endif +/* ========= end header generated by ./mkh ========= */ #ifdef REDEBUG #define SP(t, s, c) print(m, t, s, c, stdout) @@ -59,26 +149,32 @@ struct match { /* - matcher - the actual matching engine - == static int matcher(register struct re_guts *g, char *string, \ + == static int matcher(struct re_guts *g, const char *string, \ == size_t nmatch, regmatch_t pmatch[], int eflags); */ static int /* 0 success, REG_NOMATCH failure */ -matcher(g, string, nmatch, pmatch, eflags) -register struct re_guts *g; -char *string; -size_t nmatch; -regmatch_t pmatch[]; -int eflags; +matcher(struct re_guts *g, + const char *string, + size_t nmatch, + regmatch_t pmatch[], + int eflags) { - register char *endp; - register size_t i; + const char *endp; + int i; struct match mv; - register struct match *m = &mv; - register char *dp; - const register sopno gf = g->firststate+1; /* +1 for OEND */ - const register sopno gl = g->laststate; - char *start; - char *stop; + struct match *m = &mv; + const char *dp; + const sopno gf = g->firststate+1; /* +1 for OEND */ + const sopno gl = g->laststate; + const char *start; + const char *stop; + /* Boyer-Moore algorithms variables */ + const char *pp; + int cj, mj; + const char *mustfirst; + const char *mustlast; + int *matchjump; + int *charjump; /* simplify the situation where possible */ if (g->cflags®_NOSUB) @@ -95,12 +191,46 @@ int eflags; /* prescreening; this does wonders for this rather slow code */ if (g->must != NULL) { - for (dp = start; dp < stop; dp++) - if (*dp == g->must[0] && stop - dp >= g->mlen && - memcmp(dp, g->must, (size_t)g->mlen) == 0) - break; - if (dp == stop) /* we didn't find g->must */ - return(REG_NOMATCH); + if (g->charjump != NULL && g->matchjump != NULL) { + mustfirst = g->must; + mustlast = g->must + g->mlen - 1; + charjump = g->charjump; + matchjump = g->matchjump; + pp = mustlast; + for (dp = start+g->mlen-1; dp < stop;) { + /* Fast skip non-matches */ + while (dp < stop && charjump[(int)*dp]) + dp += charjump[(int)*dp]; + + if (dp >= stop) + break; + + /* Greedy matcher */ + /* We depend on not being used for + * for strings of length 1 + */ + while (*--dp == *--pp && pp != mustfirst); + + if (*dp == *pp) + break; + + /* Jump to next possible match */ + mj = matchjump[pp - mustfirst]; + cj = charjump[(int)*dp]; + dp += (cj < mj ? mj : cj); + pp = mustlast; + } + if (pp != mustfirst) + return(REG_NOMATCH); + } else { + for (dp = start; dp < stop; dp++) + if (*dp == g->must[0] && + stop - dp >= g->mlen && + memcmp(dp, g->must, (size_t)g->mlen) == 0) + break; + if (dp == stop) /* we didn't find g->must */ + return(REG_NOMATCH); + } } /* match struct setup */ @@ -117,11 +247,22 @@ int eflags; SETUP(m->tmp); SETUP(m->empty); CLEAR(m->empty); + ZAPSTATE(&m->mbs); + + /* Adjust start according to moffset, to speed things up */ + if (g->moffset > -1) + start = ((dp - g->moffset) < start) ? start : dp - g->moffset; + + SP("mloop", m->st, *start); /* this loop does only one repetition except for backrefs */ for (;;) { endp = fast(m, start, stop, gf, gl); if (endp == NULL) { /* a miss */ + if (m->pmatch != NULL) + free((char *)m->pmatch); + if (m->lastpos != NULL) + free((char *)m->lastpos); STATETEARDOWN(m); return(REG_NOMATCH); } @@ -136,7 +277,8 @@ int eflags; if (endp != NULL) break; assert(m->coldp < m->endp); - m->coldp++; + m->coldp += XMBRTOWC(NULL, m->coldp, + m->endp - m->coldp, &m->mbs, 0); } if (nmatch == 1 && !g->backrefs) break; /* no further info needed */ @@ -156,15 +298,15 @@ int eflags; dp = dissect(m, m->coldp, endp, gf, gl); } else { if (g->nplus > 0 && m->lastpos == NULL) - m->lastpos = (char **)malloc((g->nplus+1) * - sizeof(char *)); + m->lastpos = malloc((g->nplus+1) * + sizeof(const char *)); if (g->nplus > 0 && m->lastpos == NULL) { free(m->pmatch); STATETEARDOWN(m); return(REG_ESPACE); } NOTE("backref dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); } if (dp != NULL) break; @@ -187,7 +329,7 @@ int eflags; } #endif NOTE("backoff dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); } assert(dp == NULL || dp == endp); if (dp != NULL) /* found a shorter one */ @@ -195,7 +337,9 @@ int eflags; /* despite initial appearances, there is no match here */ NOTE("false alarm"); - start = m->coldp + 1; /* recycle starting later */ + /* recycle starting later */ + start = m->coldp + XMBRTOWC(NULL, m->coldp, + stop - m->coldp, &m->mbs, 0); assert(start <= stop); } @@ -225,30 +369,29 @@ int eflags; /* - dissect - figure out what matched what, no back references - == static char *dissect(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); + == static const char *dissect(struct match *m, const char *start, \ + == const char *stop, sopno startst, sopno stopst); */ -static char * /* == stop (success) always */ -dissect(m, start, stop, startst, stopst) -register struct match *m; -char *start; -char *stop; -sopno startst; -sopno stopst; +static const char * /* == stop (success) always */ +dissect(struct match *m, + const char *start, + const char *stop, + sopno startst, + sopno stopst) { - register int i; - register sopno ss; /* start sop of current subRE */ - register sopno es; /* end sop of current subRE */ - register char *sp; /* start of string matched by it */ - register char *stp; /* string matched by it cannot pass here */ - register char *rest; /* start of rest of string */ - register char *tail; /* string unmatched by rest of RE */ - register sopno ssub; /* start sop of subsubRE */ - register sopno esub; /* end sop of subsubRE */ - register char *ssp; /* start of string matched by subsubRE */ - register char *sep; /* end of string matched by subsubRE */ - register char *oldssp; /* previous ssp */ - register char *dp; + int i; + sopno ss; /* start sop of current subRE */ + sopno es; /* end sop of current subRE */ + const char *sp; /* start of string matched by it */ + const char *stp; /* string matched by it cannot pass here */ + const char *rest; /* start of rest of string */ + const char *tail; /* string unmatched by rest of RE */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + const char *ssp; /* start of string matched by subsubRE */ + const char *sep; /* end of string matched by subsubRE */ + const char *oldssp; /* previous ssp */ + const char *dp; AT("diss", start, stop, startst, stopst); sp = start; @@ -273,7 +416,7 @@ sopno stopst; assert(nope); break; case OCHAR: - sp++; + sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); break; case OBOL: case OEOL: @@ -282,7 +425,7 @@ sopno stopst; break; case OANY: case OANYOF: - sp++; + sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); break; case OBACK_: case O_BACK: @@ -413,30 +556,31 @@ sopno stopst; /* - backref - figure out what matched what, figuring in back references - == static char *backref(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst, sopno lev); + == static const char *backref(struct match *m, const char *start, \ + == const char *stop, sopno startst, sopno stopst, sopno lev); */ -static char * /* == stop (success) or NULL (failure) */ -backref(m, start, stop, startst, stopst, lev) -register struct match *m; -char *start; -char *stop; -sopno startst; -sopno stopst; -sopno lev; /* PLUS nesting level */ +static const char * /* == stop (success) or NULL (failure) */ +backref(struct match *m, + const char *start, + const char *stop, + sopno startst, + sopno stopst, + sopno lev, /* PLUS nesting level */ + int rec) { - register int i; - register sopno ss; /* start sop of current subRE */ - register char *sp; /* start of string matched by it */ - register sopno ssub; /* start sop of subsubRE */ - register sopno esub; /* end sop of subsubRE */ - register char *ssp; /* start of string matched by subsubRE */ - register char *dp; - register size_t len; - register int hard; - register sop s; - register regoff_t offsave; - register cset *cs; + int i; + sopno ss; /* start sop of current subRE */ + const char *sp; /* start of string matched by it */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + const char *ssp; /* start of string matched by subsubRE */ + const char *dp; + size_t len; + int hard; + sop s; + regoff_t offsave; + cset *cs; + wint_t wc; AT("back", start, stop, startst, stopst); sp = start; @@ -446,17 +590,25 @@ sopno lev; /* PLUS nesting level */ for (ss = startst; !hard && ss < stopst; ss++) switch (OP(s = m->g->strip[ss])) { case OCHAR: - if (sp == stop || *sp++ != (char)OPND(s)) + if (sp == stop) + return(NULL); + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + if (wc != OPND(s)) return(NULL); break; case OANY: if (sp == stop) return(NULL); - sp++; + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + if (wc == BADCHAR) + return (NULL); break; case OANYOF: + if (sp == stop) + return (NULL); cs = &m->g->sets[OPND(s)]; - if (sp == stop || !CHIN(cs, *sp++)) + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + if (wc == BADCHAR || !CHIN(cs, wc)) return(NULL); break; case OBOL: @@ -529,6 +681,8 @@ sopno lev; /* PLUS nesting level */ return(NULL); assert(m->pmatch[i].rm_so != -1); len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; + if (len == 0 && rec++ > MAX_RECURSION) + return(NULL); assert(stop - m->beginp >= len); if (sp > stop - len) return(NULL); /* not enough left to match */ @@ -537,28 +691,28 @@ sopno lev; /* PLUS nesting level */ return(NULL); while (m->g->strip[ss] != SOP(O_BACK, i)) ss++; - return(backref(m, sp+len, stop, ss+1, stopst, lev)); + return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); break; case OQUEST_: /* to null or not */ - dp = backref(m, sp, stop, ss+1, stopst, lev); + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); /* not */ - return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); + return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); break; case OPLUS_: assert(m->lastpos != NULL); assert(lev+1 <= m->g->nplus); m->lastpos[lev+1] = sp; - return(backref(m, sp, stop, ss+1, stopst, lev+1)); + return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); break; case O_PLUS: if (sp == m->lastpos[lev]) /* last pass matched null */ - return(backref(m, sp, stop, ss+1, stopst, lev-1)); + return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); /* try another pass */ m->lastpos[lev] = sp; - dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); + dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); if (dp == NULL) - return(backref(m, sp, stop, ss+1, stopst, lev-1)); + return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); else return(dp); break; @@ -567,7 +721,7 @@ sopno lev; /* PLUS nesting level */ esub = ss + OPND(s) - 1; assert(OP(m->g->strip[esub]) == OOR1); for (;;) { /* find first matching branch */ - dp = backref(m, sp, stop, ssub, esub, lev); + dp = backref(m, sp, stop, ssub, esub, lev, rec); if (dp != NULL) return(dp); /* that one missed, try next one */ @@ -588,7 +742,7 @@ sopno lev; /* PLUS nesting level */ assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_so; m->pmatch[i].rm_so = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); m->pmatch[i].rm_so = offsave; @@ -599,7 +753,7 @@ sopno lev; /* PLUS nesting level */ assert(0 < i && i <= m->g->nsub); offsave = m->pmatch[i].rm_eo; m->pmatch[i].rm_eo = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); if (dp != NULL) return(dp); m->pmatch[i].rm_eo = offsave; @@ -613,42 +767,57 @@ sopno lev; /* PLUS nesting level */ /* "can't happen" */ assert(nope); /* NOTREACHED */ - return((char *)NULL); /* dummy */ + return "shut up gcc"; } /* - fast - step through the string at top speed - == static char *fast(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); + == static const char *fast(struct match *m, const char *start, \ + == const char *stop, sopno startst, sopno stopst); */ -static char * /* where tentative match ended, or NULL */ -fast(m, start, stop, startst, stopst) -register struct match *m; -char *start; -char *stop; -sopno startst; -sopno stopst; +static const char * /* where tentative match ended, or NULL */ +fast( struct match *m, + const char *start, + const char *stop, + sopno startst, + sopno stopst) { - register states st = m->st; - register states fresh = m->fresh; - register states tmp = m->tmp; - register char *p = start; - register int c = (start == m->beginp) ? OUT : *(start-1); - register int lastc; /* previous c */ - register int flagch; - register int i; - register char *coldp; /* last p after which no match was underway */ + states st = m->st; + states fresh = m->fresh; + states tmp = m->tmp; + const char *p = start; + wint_t c; + wint_t lastc; /* previous c */ + wint_t flagch; + int i; + const char *coldp; /* last p after which no match was underway */ + size_t clen; CLEAR(st); SET1(st, startst); + SP("fast", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); ASSIGN(fresh, st); SP("start", st, *p); coldp = NULL; + if (start == m->beginp) + c = OUT; + else { + /* + * XXX Wrong if the previous character was multi-byte. + * Newline never is (in encodings supported by FreeBSD), + * so this only breaks the ISWORD tests below. + */ + c = (uch)*(start - 1); + } for (;;) { /* next character */ lastc = c; - c = (p == m->endp) ? OUT : *p; + if (p == m->endp) { + clen = 0; + c = OUT; + } else + clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); if (EQ(st, fresh)) coldp = p; @@ -686,7 +855,7 @@ sopno stopst; } /* are we done? */ - if (ISSET(st, stopst) || p == stop) + if (ISSET(st, stopst) || p == stop || clen > stop - p) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ @@ -696,39 +865,39 @@ sopno stopst; st = step(m->g, startst, stopst, tmp, c, st); SP("aft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); - p++; + p += clen; } assert(coldp != NULL); m->coldp = coldp; if (ISSET(st, stopst)) - return(p+1); + return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); else return(NULL); } /* - slow - step through the string more deliberately - == static char *slow(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); + == static const char *slow(struct match *m, const char *start, \ + == const char *stop, sopno startst, sopno stopst); */ -static char * /* where it ended */ -slow(m, start, stop, startst, stopst) -register struct match *m; -char *start; -char *stop; -sopno startst; -sopno stopst; +static const char * /* where it ended */ +slow( struct match *m, + const char *start, + const char *stop, + sopno startst, + sopno stopst) { - register states st = m->st; - register states empty = m->empty; - register states tmp = m->tmp; - register char *p = start; - register int c = (start == m->beginp) ? OUT : *(start-1); - register int lastc; /* previous c */ - register int flagch; - register int i; - register char *matchp; /* last p at which a match ended */ + states st = m->st; + states empty = m->empty; + states tmp = m->tmp; + const char *p = start; + wint_t c; + wint_t lastc; /* previous c */ + wint_t flagch; + int i; + const char *matchp; /* last p at which a match ended */ + size_t clen; AT("slow", start, stop, startst, stopst); CLEAR(st); @@ -736,10 +905,24 @@ sopno stopst; SP("sstart", st, *p); st = step(m->g, startst, stopst, st, NOTHING, st); matchp = NULL; + if (start == m->beginp) + c = OUT; + else { + /* + * XXX Wrong if the previous character was multi-byte. + * Newline never is (in encodings supported by FreeBSD), + * so this only breaks the ISWORD tests below. + */ + c = (uch)*(start - 1); + } for (;;) { /* next character */ lastc = c; - c = (p == m->endp) ? OUT : *p; + if (p == m->endp) { + c = OUT; + clen = 0; + } else + clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; @@ -777,7 +960,7 @@ sopno stopst; /* are we done? */ if (ISSET(st, stopst)) matchp = p; - if (EQ(st, empty) || p == stop) + if (EQ(st, empty) || p == stop || clen > stop - p) break; /* NOTE BREAK OUT */ /* no, we must deal with this character */ @@ -787,7 +970,7 @@ sopno stopst; st = step(m->g, startst, stopst, tmp, c, st); SP("saft", st, c); assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); - p++; + p += clen; } return(matchp); @@ -796,33 +979,31 @@ sopno stopst; /* - step - map set of states reachable before char to set reachable after - == static states step(register struct re_guts *g, sopno start, sopno stop, \ - == register states bef, int ch, register states aft); - == #define BOL (OUT+1) - == #define EOL (BOL+1) - == #define BOLEOL (BOL+2) - == #define NOTHING (BOL+3) - == #define BOW (BOL+4) - == #define EOW (BOL+5) - == #define CODEMAX (BOL+5) // highest code used - == #define NONCHAR(c) ((c) > CHAR_MAX) - == #define NNONCHAR (CODEMAX-CHAR_MAX) + == static states step(struct re_guts *g, sopno start, sopno stop, \ + == states bef, int ch, states aft); + == #define BOL (OUT-1) + == #define EOL (BOL-1) + == #define BOLEOL (BOL-2) + == #define NOTHING (BOL-3) + == #define BOW (BOL-4) + == #define EOW (BOL-5) + == #define BADCHAR (BOL-6) + == #define NONCHAR(c) ((c) <= OUT) */ static states -step(g, start, stop, bef, ch, aft) -register struct re_guts *g; -sopno start; /* start state within strip */ -sopno stop; /* state after stop state within strip */ -register states bef; /* states reachable before */ -int ch; /* character or NONCHAR code */ -register states aft; /* states already known reachable after */ +step(struct re_guts *g, + sopno start, /* start state within strip */ + sopno stop, /* state after stop state within strip */ + states bef, /* states reachable before */ + wint_t ch, /* character or NONCHAR code */ + states aft) /* states already known reachable after */ { - register cset *cs; - register sop s; - register sopno pc; - register onestate here; /* note, macros know this name */ - register sopno look; - register long i; + cset *cs; + sop s; + sopno pc; + onestate here; /* note, macros know this name */ + sopno look; + int i; for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { s = g->strip[pc]; @@ -832,8 +1013,8 @@ register states aft; /* states already known reachable after */ break; case OCHAR: /* only characters can match */ - assert(!NONCHAR(ch) || ch != (char)OPND(s)); - if (ch == (char)OPND(s)) + assert(!NONCHAR(ch) || ch != OPND(s)); + if (ch == OPND(s)) FWD(aft, bef, 1); break; case OBOL: @@ -900,7 +1081,7 @@ register states aft; /* states already known reachable after */ OP(s = g->strip[pc+look]) != O_CH; look += OPND(s)) assert(OP(s) == OOR2); - FWD(aft, aft, look); + FWD(aft, aft, look + 1); } break; case OOR2: /* propagate OCH_'s marking */ @@ -926,21 +1107,20 @@ register states aft; /* states already known reachable after */ /* - print - print a set of states == #ifdef REDEBUG - == static void print(struct match *m, char *caption, states st, \ + == static void print(struct match *m, const char *caption, states st, \ == int ch, FILE *d); == #endif */ static void -print(m, caption, st, ch, d) -struct match *m; -char *caption; -states st; -int ch; -FILE *d; +print(struct match *m, + const char *caption, + states st, + int ch, + FILE *d) { - register struct re_guts *g = m->g; - register int i; - register int first = 1; + struct re_guts *g = m->g; + int i; + int first = 1; if (!(m->eflags®_TRACE)) return; @@ -959,18 +1139,17 @@ FILE *d; /* - at - print current situation == #ifdef REDEBUG - == static void at(struct match *m, char *title, char *start, char *stop, \ - == sopno startst, sopno stopst); + == static void at(struct match *m, const char *title, const char *start, \ + == const char *stop, sopno startst, sopno stopst); == #endif */ static void -at(m, title, start, stop, startst, stopst) -struct match *m; -char *title; -char *start; -char *stop; -sopno startst; -sopno stopst; +at( struct match *m, + const char *title, + const char *start, + const char *stop, + sopno startst, + sopno stopst) { if (!(m->eflags®_TRACE)) return; @@ -985,7 +1164,7 @@ sopno stopst; /* - pchar - make a character printable == #ifdef REDEBUG - == static char *pchar(int ch); + == static const char *pchar(int ch); == #endif * * Is this identical to regchar() over in debug.c? Well, yes. But a @@ -993,13 +1172,12 @@ sopno stopst; * a matching debug.o, and this is convenient. It all disappears in * the non-debug compilation anyway, so it doesn't matter much. */ -static char * /* -> representation */ -pchar(ch) -int ch; +static const char * /* -> representation */ +pchar(int ch) { static char pbuf[10]; - if (isprint(ch) || ch == ' ') + if (isprint((uch)ch) || ch == ' ') sprintf(pbuf, "%c", ch); else sprintf(pbuf, "\\%o", ch); |