X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/c0a7edde10fbc1d280ea1864e34db865e09d4e7d..4c20f3d26bf2ae6e3af074f0f9fcdaefbd115028:/src/regex/regexec.c?ds=sidebyside diff --git a/src/regex/regexec.c b/src/regex/regexec.c index c3a29093f5..756cba131e 100644 --- a/src/regex/regexec.c +++ b/src/regex/regexec.c @@ -1,21 +1,21 @@ /* * re_*exec and friends - match REs * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,8 +27,6 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regexec.c,v 1.23 2003/08/08 21:41:56 momjian Exp $ - * */ #include "regguts.h" @@ -36,174 +34,151 @@ /* lazy-DFA representation */ -struct arcp -{ /* "pointer" to an outarc */ +struct arcp { /* "pointer" to an outarc */ struct sset *ss; - color co; + color co; }; -struct sset -{ /* state set */ - unsigned *states; /* pointer to bitvector */ - unsigned hash; /* hash of bitvector */ -#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) -#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ +struct sset { /* state set */ + unsigned *states; /* pointer to bitvector */ + unsigned hash; /* hash of bitvector */ +# define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) +# define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) - int flags; -#define STARTER 01 /* the initial state set */ -#define POSTSTATE 02 /* includes the goal state */ -#define LOCKED 04 /* locked in cache */ -#define NOPROGRESS 010 /* zero-progress state set */ - struct arcp ins; /* chain of inarcs pointing here */ - chr *lastseen; /* last entered on arrival here */ - struct sset **outs; /* outarc vector indexed by color */ - struct arcp *inchain; /* chain-pointer vector for outarcs */ + int flags; +# define STARTER 01 /* the initial state set */ +# define POSTSTATE 02 /* includes the goal state */ +# define LOCKED 04 /* locked in cache */ +# define NOPROGRESS 010 /* zero-progress state set */ + struct arcp ins; /* chain of inarcs pointing here */ + chr *lastseen; /* last entered on arrival here */ + struct sset **outs; /* outarc vector indexed by color */ + struct arcp *inchain; /* chain-pointer vector for outarcs */ }; -struct dfa -{ - int nssets; /* size of cache */ - int nssused; /* how many entries occupied yet */ - int nstates; /* number of states */ - int ncolors; /* length of outarc and inchain vectors */ - int wordsper; /* length of state-set bitvectors */ - struct sset *ssets; /* state-set cache */ - unsigned *statesarea; /* bitvector storage */ - unsigned *work; /* pointer to work area within statesarea */ - struct sset **outsarea; /* outarc-vector storage */ - struct arcp *incarea; /* inchain storage */ +struct dfa { + int nssets; /* size of cache */ + int nssused; /* how many entries occupied yet */ + int nstates; /* number of states */ + int ncolors; /* length of outarc and inchain vectors */ + int wordsper; /* length of state-set bitvectors */ + struct sset *ssets; /* state-set cache */ + unsigned *statesarea; /* bitvector storage */ + unsigned *work; /* pointer to work area within statesarea */ + struct sset **outsarea; /* outarc-vector storage */ + struct arcp *incarea; /* inchain storage */ struct cnfa *cnfa; struct colormap *cm; - chr *lastpost; /* location of last cache-flushed success */ - chr *lastnopr; /* location of last cache-flushed - * NOPROGRESS */ - struct sset *search; /* replacement-search-pointer memory */ - int cptsmalloced; /* were the areas individually malloced? */ - char *mallocarea; /* self, or master malloced area, or NULL */ + chr *lastpost; /* location of last cache-flushed success */ + chr *lastnopr; /* location of last cache-flushed NOPROGRESS */ + struct sset *search; /* replacement-search-pointer memory */ + int cptsmalloced; /* were the areas individually malloced? */ + char *mallocarea; /* self, or master malloced area, or NULL */ }; -#define WORK 1 /* number of work bitvectors needed */ +#define WORK 1 /* number of work bitvectors needed */ /* setup for non-malloc allocation for small cases */ -#define FEWSTATES 20 /* must be less than UBITS */ -#define FEWCOLORS 15 -struct smalldfa -{ - struct dfa dfa; - struct sset ssets[FEWSTATES * 2]; - unsigned statesarea[FEWSTATES * 2 + WORK]; - struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS]; - struct arcp incarea[FEWSTATES * 2 * FEWCOLORS]; +#define FEWSTATES 20 /* must be less than UBITS */ +#define FEWCOLORS 15 +struct smalldfa { + struct dfa dfa; + struct sset ssets[FEWSTATES*2]; + unsigned statesarea[FEWSTATES*2 + WORK]; + struct sset *outsarea[FEWSTATES*2 * FEWCOLORS]; + struct arcp incarea[FEWSTATES*2 * FEWCOLORS]; }; - -#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ +#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ /* internal variables, bundled for easy passing around */ -struct vars -{ - regex_t *re; +struct vars { + regex_t *re; struct guts *g; - int eflags; /* copies of arguments */ - size_t nmatch; + int eflags; /* copies of arguments */ + size_t nmatch; regmatch_t *pmatch; rm_detail_t *details; - chr *start; /* start of string */ - chr *stop; /* just past end of string */ - int err; /* error code if any (0 none) */ - regoff_t *mem; /* memory vector for backtracking */ + chr *start; /* start of string */ + chr *stop; /* just past end of string */ + int err; /* error code if any (0 none) */ + regoff_t *mem; /* memory vector for backtracking */ struct smalldfa dfa1; struct smalldfa dfa2; }; - -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return - * it */ -#define OFF(p) ((p) - v->start) -#define LOFF(p) ((long)OFF(p)) +#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ +#define ISERR() VISERR(v) +#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) +#define ERR(e) (void)VERR(v, e) /* record an error */ +#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ +#define OFF(p) ((p) - v->start) +#define LOFF(p) ((long)OFF(p)) /* * forward declarations */ +/* =====^!^===== begin forwards =====^!^===== */ +/* automatically gathered by fwd; do not hand-edit */ /* === regexec.c === */ -static int find(struct vars *, struct cnfa *, struct colormap *); -static int cfind(struct vars *, struct cnfa *, struct colormap *); -static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **); -static void zapsubs(regmatch_t *, size_t); -static void zapmem(struct vars *, struct subre *); -static void subset(struct vars *, struct subre *, chr *, chr *); -static int dissect(struct vars *, struct subre *, chr *, chr *); -static int condissect(struct vars *, struct subre *, chr *, chr *); -static int altdissect(struct vars *, struct subre *, chr *, chr *); -static int cdissect(struct vars *, struct subre *, chr *, chr *); -static int ccondissect(struct vars *, struct subre *, chr *, chr *); -static int crevdissect(struct vars *, struct subre *, chr *, chr *); -static int cbrdissect(struct vars *, struct subre *, chr *, chr *); -static int caltdissect(struct vars *, struct subre *, chr *, chr *); - +int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); +static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); +static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); +static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)); +static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t)); +static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *)); +static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); /* === rege_dfa.c === */ -static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); -static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *); -static chr *lastcold(struct vars *, struct dfa *); -static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *); -static void freedfa(struct dfa *); -static unsigned hash(unsigned *, int); -static struct sset *initialize(struct vars *, struct dfa *, chr *); -static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *); -static int lacon(struct vars *, struct cnfa *, chr *, pcolor); -static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); -static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); +static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *)); +static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)); +static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *)); +static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)); +static VOID freedfa _ANSI_ARGS_((struct dfa *)); +static unsigned hash _ANSI_ARGS_((unsigned *, int)); +static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *)); +static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *)); +static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor)); +static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); +static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); +/* automatically gathered by fwd; do not hand-edit */ +/* =====^!^===== end forwards =====^!^===== */ + /* - * regexec - match regular expression + - exec - match regular expression + ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, + ^ size_t, regmatch_t [], int); */ int -regexec(regex_t *re, - const chr *string, - size_t nmatch, - regmatch_t pmatch[], - int flags) -{ - rm_detail_t det; - size_t nLen = 0; - chr* s2 = (chr*) string; - - if (string && *string) - { - while(*++s2); - } - - nLen = ((s2 - string) / sizeof(chr)); - - return wx_regexec(re, string, nLen, &det, nmatch, pmatch, flags); -} -int -wx_regexec(regex_t *re, - const chr *string, - size_t len, - rm_detail_t *details, - size_t nmatch, - regmatch_t pmatch[], - int flags) +exec(re, string, len, details, nmatch, pmatch, flags) +regex_t *re; +CONST chr *string; +size_t len; +rm_detail_t *details; +size_t nmatch; +regmatch_t pmatch[]; +int flags; { struct vars var; register struct vars *v = &var; - int st; - size_t n; - int backref; - -#define LOCALMAT 20 - regmatch_t mat[LOCALMAT]; - -#define LOCALMEM 40 - regoff_t mem[LOCALMEM]; + int st; + size_t n; + int backref; +# define LOCALMAT 20 + regmatch_t mat[LOCALMAT]; +# define LOCALMEM 40 + regoff_t mem[LOCALMEM]; /* sanity checks */ if (re == NULL || string == NULL || re->re_magic != REMAGIC) @@ -213,51 +188,46 @@ wx_regexec(regex_t *re, /* setup */ v->re = re; - v->g = (struct guts *) re->re_guts; - if ((v->g->cflags & REG_EXPECT) && details == NULL) + v->g = (struct guts *)re->re_guts; + if ((v->g->cflags®_EXPECT) && details == NULL) return REG_INVARG; - if (v->g->info & REG_UIMPOSSIBLE) + if (v->g->info®_UIMPOSSIBLE) return REG_NOMATCH; - backref = (v->g->info & REG_UBACKREF) ? 1 : 0; + backref = (v->g->info®_UBACKREF) ? 1 : 0; v->eflags = flags; - if (v->g->cflags & REG_NOSUB) - nmatch = 0; /* override client */ + if (v->g->cflags®_NOSUB) + nmatch = 0; /* override client */ v->nmatch = nmatch; - if (backref) - { + if (backref) { /* need work area */ if (v->g->nsub + 1 <= LOCALMAT) v->pmatch = mat; else - v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) * - sizeof(regmatch_t)); + v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) * + sizeof(regmatch_t)); if (v->pmatch == NULL) return REG_ESPACE; v->nmatch = v->g->nsub + 1; - } - else + } else v->pmatch = pmatch; v->details = details; - v->start = (chr *) string; - v->stop = (chr *) string + len; + v->start = (chr *)string; + v->stop = (chr *)string + len; v->err = 0; - if (backref) - { + if (backref) { /* need retry memory */ assert(v->g->ntree >= 0); - n = (size_t) v->g->ntree; + n = (size_t)v->g->ntree; if (n <= LOCALMEM) v->mem = mem; else - v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t)); - if (v->mem == NULL) - { + v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t)); + if (v->mem == NULL) { if (v->pmatch != pmatch && v->pmatch != mat) FREE(v->pmatch); return REG_ESPACE; } - } - else + } else v->mem = NULL; /* do it */ @@ -268,11 +238,10 @@ wx_regexec(regex_t *re, st = find(v, &v->g->tree->cnfa, &v->g->cmap); /* copy (portion of) match vector over if necessary */ - if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) - { + if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { zapsubs(pmatch, nmatch); n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t)); + memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t)); } /* clean up */ @@ -284,23 +253,24 @@ wx_regexec(regex_t *re, } /* - * find - find a match for the main NFA (no-complications case) + - find - find a match for the main NFA (no-complications case) + ^ static int find(struct vars *, struct cnfa *, struct colormap *); */ static int -find(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm) +find(v, cnfa, cm) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; { struct dfa *s; struct dfa *d; - chr *begin; - chr *end = NULL; - chr *cold; - chr *open; /* open and close of range of possible - * starts */ - chr *close; - int hitend; - int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0; + chr *begin; + chr *end = NULL; + chr *cold; + chr *open; /* open and close of range of possible starts */ + chr *close; + int hitend; + int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0; /* first, a shot with the search RE */ s = newdfa(v, &v->g->search, cm, &v->dfa1); @@ -308,21 +278,20 @@ find(struct vars * v, NOERR(); MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; - close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL); + close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL); freedfa(s); NOERR(); - if (v->g->cflags & REG_EXPECT) - { + if (v->g->cflags®_EXPECT) { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } - if (close == NULL) /* not found */ + if (close == NULL) /* not found */ return REG_NOMATCH; - if (v->nmatch == 0) /* found, don't need exact location */ + if (v->nmatch == 0) /* found, don't need exact location */ return REG_OKAY; /* find starting point and match */ @@ -333,19 +302,18 @@ find(struct vars * v, d = newdfa(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); - for (begin = open; begin <= close; begin++) - { + for (begin = open; begin <= close; begin++) { MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) end = shortest(v, d, begin, begin, v->stop, - (chr **) NULL, &hitend); + (chr **)NULL, &hitend); else end = longest(v, d, begin, v->stop, &hitend); NOERR(); if (hitend && cold == NULL) cold = begin; if (end != NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } assert(end != NULL); /* search RE succeeded so loop should */ freedfa(d); @@ -354,15 +322,14 @@ find(struct vars * v, assert(v->nmatch > 0); v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); - if (v->g->cflags & REG_EXPECT) - { + if (v->g->cflags®_EXPECT) { if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } - if (v->nmatch == 1) /* no need for submatches */ + if (v->nmatch == 1) /* no need for submatches */ return REG_OKAY; /* submatches */ @@ -371,94 +338,93 @@ find(struct vars * v, } /* - * cfind - find a match for the main NFA (with complications) + - cfind - find a match for the main NFA (with complications) + ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); */ static int -cfind(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm) +cfind(v, cnfa, cm) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; { struct dfa *s; struct dfa *d; - chr *cold; - int ret; + chr *cold; + int ret; s = newdfa(v, &v->g->search, cm, &v->dfa1); NOERR(); d = newdfa(v, cnfa, cm, &v->dfa2); - if (ISERR()) - { + if (ISERR()) { assert(d == NULL); freedfa(s); return v->err; } - + cold = NULL;// WX: fix gcc warnings about cold being possibly uninitialized ret = cfindloop(v, cnfa, cm, d, s, &cold); freedfa(d); freedfa(s); NOERR(); - if (v->g->cflags & REG_EXPECT) - { + if (v->g->cflags®_EXPECT) { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } return ret; } /* - * cfindloop - the heart of cfind + - cfindloop - the heart of cfind + ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *, + ^ struct dfa *, struct dfa *, chr **); */ static int -cfindloop(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm, - struct dfa * d, - struct dfa * s, - chr **coldp) /* where to put coldstart pointer */ +cfindloop(v, cnfa, cm, d, s, coldp) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; +struct dfa *d; +struct dfa *s; +chr **coldp; /* where to put coldstart pointer */ { - chr *begin; - chr *end; - chr *cold; - chr *open; /* open and close of range of possible - * starts */ - chr *close; - chr *estart; - chr *estop; - int er; - int shorter = v->g->tree->flags & SHORTER; - int hitend; + chr *begin; + chr *end; + chr *cold; + chr *open; /* open and close of range of possible starts */ + chr *close; + chr *estart; + chr *estop; + int er; + int shorter = v->g->tree->flags&SHORTER; + int hitend; assert(d != NULL && s != NULL); cold = NULL; close = v->start; - do - { + do { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); - close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); + close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL); if (close == NULL) break; /* NOTE BREAK */ assert(cold != NULL); open = cold; cold = NULL; MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); - for (begin = open; begin <= close; begin++) - { + for (begin = open; begin <= close; begin++) { MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); estart = begin; estop = v->stop; - for (;;) - { + for (;;) { if (shorter) end = shortest(v, d, begin, estart, - estop, (chr **) NULL, &hitend); + estop, (chr **)NULL, &hitend); else end = longest(v, d, begin, estop, - &hitend); + &hitend); if (hitend && cold == NULL) cold = begin; if (end == NULL) @@ -467,23 +433,19 @@ cfindloop(struct vars * v, zapsubs(v->pmatch, v->nmatch); zapmem(v, v->g->tree); er = cdissect(v, v->g->tree, begin, end); - if (er == REG_OKAY) - { - if (v->nmatch > 0) - { + if (er == REG_OKAY) { + if (v->nmatch > 0) { v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); } *coldp = cold; return REG_OKAY; } - if (er != REG_NOMATCH) - { + if (er != REG_NOMATCH) { ERR(er); return er; } - if ((shorter) ? end == estop : end == begin) - { + if ((shorter) ? end == estop : end == begin) { /* no point in trying again */ *coldp = cold; return REG_NOMATCH; @@ -502,35 +464,37 @@ cfindloop(struct vars * v, } /* - * zapsubs - initialize the subexpression matches to "no match" + - zapsubs - initialize the subexpression matches to "no match" + ^ static VOID zapsubs(regmatch_t *, size_t); */ -static void -zapsubs(regmatch_t *p, - size_t n) +static VOID +zapsubs(p, n) +regmatch_t *p; +size_t n; { - size_t i; + size_t i; - for (i = n - 1; i > 0; i--) - { + for (i = n-1; i > 0; i--) { p[i].rm_so = -1; p[i].rm_eo = -1; } } /* - * zapmem - initialize the retry memory of a subtree to zeros + - zapmem - initialize the retry memory of a subtree to zeros + ^ static VOID zapmem(struct vars *, struct subre *); */ -static void -zapmem(struct vars * v, - struct subre * t) +static VOID +zapmem(v, t) +struct vars *v; +struct subre *t; { if (t == NULL) return; assert(v->mem != NULL); v->mem[t->retry] = 0; - if (t->op == '(') - { + if (t->op == '(') { assert(t->subno > 0); v->pmatch[t->subno].rm_so = -1; v->pmatch[t->subno].rm_eo = -1; @@ -543,18 +507,20 @@ zapmem(struct vars * v, } /* - * subset - set any subexpression relevant to a successful subre + - subset - set any subexpression relevant to a successful subre + ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); */ -static void -subset(struct vars * v, - struct subre * sub, - chr *begin, - chr *end) +static VOID +subset(v, sub, begin, end) +struct vars *v; +struct subre *sub; +chr *begin; +chr *end; { - int n = sub->subno; + int n = sub->subno; assert(n > 0); - if ((size_t) n >= v->nmatch) + if ((size_t)n >= v->nmatch) return; MDEBUG(("setting %d\n", n)); @@ -563,61 +529,64 @@ subset(struct vars * v, } /* - * dissect - determine subexpression matches (uncomplicated case) + - dissect - determine subexpression matches (uncomplicated case) + ^ static int dissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -dissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +dissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { assert(t != NULL); MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); - switch (t->op) - { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return altdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - return REG_ASSERT; - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return condissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - subset(v, t, begin, end); - return dissect(v, t->left, begin, end); - break; - default: - return REG_ASSERT; - break; + switch (t->op) { + case '=': /* terminal node */ + assert(t->left == NULL && t->right == NULL); + return REG_OKAY; /* no action, parent did the work */ + break; + case '|': /* alternation */ + assert(t->left != NULL); + return altdissect(v, t, begin, end); + break; + case 'b': /* back ref -- shouldn't be calling us! */ + return REG_ASSERT; + break; + case '.': /* concatenation */ + assert(t->left != NULL && t->right != NULL); + return condissect(v, t, begin, end); + break; + case '(': /* capturing */ + assert(t->left != NULL && t->right == NULL); + assert(t->subno > 0); + subset(v, t, begin, end); + return dissect(v, t->left, begin, end); + break; + default: + return REG_ASSERT; + break; } } /* - * condissect - determine concatenation subexpression matches (uncomplicated) + - condissect - determine concatenation subexpression matches (uncomplicated) + ^ static int condissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -condissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +condissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int i; - int shorter = (t->left->flags & SHORTER) ? 1 : 0; - chr *stop = (shorter) ? end : begin; + chr *mid; + int i; + int shorter = (t->left->flags&SHORTER) ? 1 : 0; + chr *stop = (shorter) ? end : begin; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); @@ -626,8 +595,7 @@ condissect(struct vars * v, d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); NOERR(); d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); - if (ISERR()) - { + if (ISERR()) { assert(d2 == NULL); freedfa(d); return v->err; @@ -635,12 +603,11 @@ condissect(struct vars * v, /* pick a tentative midpoint */ if (shorter) - mid = shortest(v, d, begin, begin, end, (chr **) NULL, - (int *) NULL); + mid = shortest(v, d, begin, begin, end, (chr **)NULL, + (int *)NULL); else - mid = longest(v, d, begin, end, (int *) NULL); - if (mid == NULL) - { + mid = longest(v, d, begin, end, (int *)NULL); + if (mid == NULL) { freedfa(d); freedfa(d2); return REG_ASSERT; @@ -648,11 +615,9 @@ condissect(struct vars * v, MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ - while (longest(v, d2, mid, end, (int *) NULL) != end) - { + while (longest(v, d2, mid, end, (int *)NULL) != end) { /* that midpoint didn't work, find a new one */ - if (mid == stop) - { + if (mid == stop) { /* all possibilities exhausted! */ MDEBUG(("no midpoint!\n")); freedfa(d); @@ -660,12 +625,11 @@ condissect(struct vars * v, return REG_ASSERT; } if (shorter) - mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, - (int *) NULL); + mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, + (int *)NULL); else - mid = longest(v, d, begin, mid - 1, (int *) NULL); - if (mid == NULL) - { + mid = longest(v, d, begin, mid-1, (int *)NULL); + if (mid == NULL) { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); freedfa(d); @@ -686,168 +650,162 @@ condissect(struct vars * v, } /* - * altdissect - determine alternative subexpression matches (uncomplicated) + - altdissect - determine alternative subexpression matches (uncomplicated) + ^ static int altdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -altdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +altdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; - int i; + int i; assert(t != NULL); assert(t->op == '|'); - for (i = 0; t != NULL; t = t->right, i++) - { + for (i = 0; t != NULL; t = t->right, i++) { MDEBUG(("trying %dth\n", i)); assert(t->left != NULL && t->left->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); if (ISERR()) return v->err; - if (longest(v, d, begin, end, (int *) NULL) == end) - { + if (longest(v, d, begin, end, (int *)NULL) == end) { MDEBUG(("success\n")); freedfa(d); return dissect(v, t->left, begin, end); } freedfa(d); } - return REG_ASSERT; /* none of them matched?!? */ + return REG_ASSERT; /* none of them matched?!? */ } /* - * cdissect - determine subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, + - cdissect - determine subexpression matches (with complications) + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". + ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -cdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +cdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { - int er; + int er; assert(t != NULL); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); - switch (t->op) - { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return caltdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - assert(t->left == NULL && t->right == NULL); - return cbrdissect(v, t, begin, end); - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return ccondissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - er = cdissect(v, t->left, begin, end); - if (er == REG_OKAY) - subset(v, t, begin, end); - return er; - break; - default: - return REG_ASSERT; - break; + switch (t->op) { + case '=': /* terminal node */ + assert(t->left == NULL && t->right == NULL); + return REG_OKAY; /* no action, parent did the work */ + break; + case '|': /* alternation */ + assert(t->left != NULL); + return caltdissect(v, t, begin, end); + break; + case 'b': /* back ref -- shouldn't be calling us! */ + assert(t->left == NULL && t->right == NULL); + return cbrdissect(v, t, begin, end); + break; + case '.': /* concatenation */ + assert(t->left != NULL && t->right != NULL); + return ccondissect(v, t, begin, end); + break; + case '(': /* capturing */ + assert(t->left != NULL && t->right == NULL); + assert(t->subno > 0); + er = cdissect(v, t->left, begin, end); + if (er == REG_OKAY) + subset(v, t, begin, end); + return er; + break; + default: + return REG_ASSERT; + break; } } /* - * ccondissect - concatenation subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, + - ccondissect - concatenation subexpression matches (with complications) + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". + ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -ccondissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +ccondissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int er; + chr *mid; + int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); - if (t->left->flags & SHORTER) /* reverse scan */ + if (t->left->flags&SHORTER) /* reverse scan */ return crevdissect(v, t, begin, end); d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - { + if (ISERR()) { freedfa(d); return v->err; } MDEBUG(("cconcat %d\n", t->retry)); /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) - { - mid = longest(v, d, begin, end, (int *) NULL); - if (mid == NULL) - { + if (v->mem[t->retry] == 0) { + mid = longest(v, d, begin, end, (int *)NULL); + if (mid == NULL) { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; - } - else - { + } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ - for (;;) - { + for (;;) { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && - longest(v, d2, mid, end, (int *) NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) - { + longest(v, d2, mid, end, (int *)NULL) == end && + (er = cdissect(v, t->right, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ - if (mid == begin) - { + if (mid == begin) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - mid = longest(v, d, begin, mid - 1, (int *) NULL); - if (mid == NULL) - { + mid = longest(v, d, begin, mid-1, (int *)NULL); + if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); @@ -868,86 +826,79 @@ ccondissect(struct vars * v, } /* - * crevdissect - determine backref shortest-first subexpression matches - * The retry memory stores the offset of the trial midpoint from begin, + - crevdissect - determine backref shortest-first subexpression matches + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". + ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -crevdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +crevdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int er; + chr *mid; + int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); - assert(t->left->flags & SHORTER); + assert(t->left->flags&SHORTER); /* concatenation -- need to split the substring between parts */ d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - { + if (ISERR()) { freedfa(d); return v->err; } MDEBUG(("crev %d\n", t->retry)); /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) - { - mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); - if (mid == NULL) - { + if (v->mem[t->retry] == 0) { + mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL); + if (mid == NULL) { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; - } - else - { + } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ - for (;;) - { + for (;;) { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && - longest(v, d2, mid, end, (int *) NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) - { + longest(v, d2, mid, end, (int *)NULL) == end && + (er = cdissect(v, t->right, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ - if (mid == end) - { + if (mid == end) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); - if (mid == NULL) - { + mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL); + if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); @@ -968,27 +919,29 @@ crevdissect(struct vars * v, } /* - * cbrdissect - determine backref subexpression matches + - cbrdissect - determine backref subexpression matches + ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -cbrdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +cbrdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { - int i; - int n = t->subno; - size_t len; - chr *paren; - chr *p; - chr *stop; - int min = t->min; - int max = t->max; + int i; + int n = t->subno; + size_t len; + chr *paren; + chr *p; + chr *stop; + int min = t->min; + int max = t->max; assert(t != NULL); assert(t->op == 'b'); assert(n >= 0); - assert((size_t) n < v->nmatch); + assert((size_t)n < v->nmatch); MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); @@ -1003,8 +956,7 @@ cbrdissect(struct vars * v, v->mem[t->retry] = 1; /* special-case zero-length string */ - if (len == 0) - { + if (len == 0) { if (begin == end) return REG_OKAY; return REG_NOMATCH; @@ -1012,44 +964,43 @@ cbrdissect(struct vars * v, /* and too-short string */ assert(end >= begin); - if ((size_t) (end - begin) < len) + if ((size_t)(end - begin) < len) return REG_NOMATCH; stop = end - len; /* count occurrences */ i = 0; - for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) - { - if ((*v->g->compare) (paren, p, len) != 0) - break; + for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { + if ((*v->g->compare)(paren, p, len) != 0) + break; i++; } MDEBUG(("cbackref found %d\n", i)); /* and sort it out */ - if (p != end) /* didn't consume all of it */ + if (p != end) /* didn't consume all of it */ return REG_NOMATCH; if (min <= i && (i <= max || max == INFINITY)) return REG_OKAY; - return REG_NOMATCH; /* out of range */ + return REG_NOMATCH; /* out of range */ } /* - * caltdissect - determine alternative subexpression matches (w. complications) + - caltdissect - determine alternative subexpression matches (w. complications) + ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -caltdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +caltdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; - int er; - -#define UNTRIED 0 /* not yet tried at all */ -#define TRYING 1 /* top matched, trying submatches */ -#define TRIED 2 /* top didn't match or submatches - * exhausted */ + int er; +# define UNTRIED 0 /* not yet tried at all */ +# define TRYING 1 /* top matched, trying submatches */ +# define TRIED 2 /* top didn't match or submatches exhausted */ if (t == NULL) return REG_NOMATCH; @@ -1060,13 +1011,11 @@ caltdissect(struct vars * v, MDEBUG(("calt n%d\n", t->retry)); assert(t->left != NULL); - if (v->mem[t->retry] == UNTRIED) - { + if (v->mem[t->retry] == UNTRIED) { d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; - if (longest(v, d, begin, end, (int *) NULL) != end) - { + if (longest(v, d, begin, end, (int *)NULL) != end) { freedfa(d); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end);