X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/451458f4c989298d91b85a2bbe5337a250ec7e60..d1558f3d532c2e1c15e385d17c6200069f72714b:/src/regex/regexec.c diff --git a/src/regex/regexec.c b/src/regex/regexec.c index 41d49bdab5..dbae952f71 100644 --- a/src/regex/regexec.c +++ b/src/regex/regexec.c @@ -1,21 +1,21 @@ /* * re_*exec and friends - match REs * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,6 +27,8 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * + * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regexec.c,v 1.23 2003/08/08 21:41:56 momjian Exp $ + * */ #include "regguts.h" @@ -34,151 +36,164 @@ /* lazy-DFA representation */ -struct arcp { /* "pointer" to an outarc */ +struct arcp +{ /* "pointer" to an outarc */ struct sset *ss; - color co; + color co; }; -struct sset { /* state set */ - unsigned *states; /* pointer to bitvector */ - unsigned hash; /* hash of bitvector */ -# define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) -# define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ +struct sset +{ /* state set */ + unsigned *states; /* pointer to bitvector */ + unsigned hash; /* hash of bitvector */ +#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) +#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) - int flags; -# define STARTER 01 /* the initial state set */ -# define POSTSTATE 02 /* includes the goal state */ -# define LOCKED 04 /* locked in cache */ -# define NOPROGRESS 010 /* zero-progress state set */ - struct arcp ins; /* chain of inarcs pointing here */ - chr *lastseen; /* last entered on arrival here */ - struct sset **outs; /* outarc vector indexed by color */ - struct arcp *inchain; /* chain-pointer vector for outarcs */ + int flags; +#define STARTER 01 /* the initial state set */ +#define POSTSTATE 02 /* includes the goal state */ +#define LOCKED 04 /* locked in cache */ +#define NOPROGRESS 010 /* zero-progress state set */ + struct arcp ins; /* chain of inarcs pointing here */ + chr *lastseen; /* last entered on arrival here */ + struct sset **outs; /* outarc vector indexed by color */ + struct arcp *inchain; /* chain-pointer vector for outarcs */ }; -struct dfa { - int nssets; /* size of cache */ - int nssused; /* how many entries occupied yet */ - int nstates; /* number of states */ - int ncolors; /* length of outarc and inchain vectors */ - int wordsper; /* length of state-set bitvectors */ - struct sset *ssets; /* state-set cache */ - unsigned *statesarea; /* bitvector storage */ - unsigned *work; /* pointer to work area within statesarea */ - struct sset **outsarea; /* outarc-vector storage */ - struct arcp *incarea; /* inchain storage */ +struct dfa +{ + int nssets; /* size of cache */ + int nssused; /* how many entries occupied yet */ + int nstates; /* number of states */ + int ncolors; /* length of outarc and inchain vectors */ + int wordsper; /* length of state-set bitvectors */ + struct sset *ssets; /* state-set cache */ + unsigned *statesarea; /* bitvector storage */ + unsigned *work; /* pointer to work area within statesarea */ + struct sset **outsarea; /* outarc-vector storage */ + struct arcp *incarea; /* inchain storage */ struct cnfa *cnfa; struct colormap *cm; - chr *lastpost; /* location of last cache-flushed success */ - chr *lastnopr; /* location of last cache-flushed NOPROGRESS */ - struct sset *search; /* replacement-search-pointer memory */ - int cptsmalloced; /* were the areas individually malloced? */ - char *mallocarea; /* self, or master malloced area, or NULL */ + chr *lastpost; /* location of last cache-flushed success */ + chr *lastnopr; /* location of last cache-flushed + * NOPROGRESS */ + struct sset *search; /* replacement-search-pointer memory */ + int cptsmalloced; /* were the areas individually malloced? */ + char *mallocarea; /* self, or master malloced area, or NULL */ }; -#define WORK 1 /* number of work bitvectors needed */ +#define WORK 1 /* number of work bitvectors needed */ /* setup for non-malloc allocation for small cases */ -#define FEWSTATES 20 /* must be less than UBITS */ -#define FEWCOLORS 15 -struct smalldfa { - struct dfa dfa; - struct sset ssets[FEWSTATES*2]; - unsigned statesarea[FEWSTATES*2 + WORK]; - struct sset *outsarea[FEWSTATES*2 * FEWCOLORS]; - struct arcp incarea[FEWSTATES*2 * FEWCOLORS]; +#define FEWSTATES 20 /* must be less than UBITS */ +#define FEWCOLORS 15 +struct smalldfa +{ + struct dfa dfa; + struct sset ssets[FEWSTATES * 2]; + unsigned statesarea[FEWSTATES * 2 + WORK]; + struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS]; + struct arcp incarea[FEWSTATES * 2 * FEWCOLORS]; }; -#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ + +#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ /* internal variables, bundled for easy passing around */ -struct vars { - regex_t *re; +struct vars +{ + regex_t *re; struct guts *g; - int eflags; /* copies of arguments */ - size_t nmatch; + int eflags; /* copies of arguments */ + size_t nmatch; regmatch_t *pmatch; rm_detail_t *details; - chr *start; /* start of string */ - chr *stop; /* just past end of string */ - int err; /* error code if any (0 none) */ - regoff_t *mem; /* memory vector for backtracking */ + chr *start; /* start of string */ + chr *stop; /* just past end of string */ + int err; /* error code if any (0 none) */ + regoff_t *mem; /* memory vector for backtracking */ struct smalldfa dfa1; struct smalldfa dfa2; }; -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ -#define OFF(p) ((p) - v->start) -#define LOFF(p) ((long)OFF(p)) + +#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ +#define ISERR() VISERR(v) +#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) +#define ERR(e) VERR(v, e) /* record an error */ +#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return + * it */ +#define OFF(p) ((p) - v->start) +#define LOFF(p) ((long)OFF(p)) /* * forward declarations */ -/* =====^!^===== begin forwards =====^!^===== */ -/* automatically gathered by fwd; do not hand-edit */ /* === regexec.c === */ -int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); -static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); -static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); -static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)); -static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t)); -static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *)); -static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -/* === rege_dfa.c === */ -static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *)); -static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)); -static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *)); -static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)); -static VOID freedfa _ANSI_ARGS_((struct dfa *)); -static unsigned hash _ANSI_ARGS_((unsigned *, int)); -static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *)); -static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *)); -static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor)); -static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); -static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); -/* automatically gathered by fwd; do not hand-edit */ -/* =====^!^===== end forwards =====^!^===== */ +static int find(struct vars *, struct cnfa *, struct colormap *); +static int cfind(struct vars *, struct cnfa *, struct colormap *); +static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **); +static void zapsubs(regmatch_t *, size_t); +static void zapmem(struct vars *, struct subre *); +static void subset(struct vars *, struct subre *, chr *, chr *); +static int dissect(struct vars *, struct subre *, chr *, chr *); +static int condissect(struct vars *, struct subre *, chr *, chr *); +static int altdissect(struct vars *, struct subre *, chr *, chr *); +static int cdissect(struct vars *, struct subre *, chr *, chr *); +static int ccondissect(struct vars *, struct subre *, chr *, chr *); +static int crevdissect(struct vars *, struct subre *, chr *, chr *); +static int cbrdissect(struct vars *, struct subre *, chr *, chr *); +static int caltdissect(struct vars *, struct subre *, chr *, chr *); +/* === rege_dfa.c === */ +static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); +static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *); +static chr *lastcold(struct vars *, struct dfa *); +static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *); +static void freedfa(struct dfa *); +static unsigned hash(unsigned *, int); +static struct sset *initialize(struct vars *, struct dfa *, chr *); +static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *); +static int lacon(struct vars *, struct cnfa *, chr *, pcolor); +static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); +static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); /* - - exec - match regular expression - ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, - ^ size_t, regmatch_t [], int); + * regexec - match regular expression */ int -exec(re, string, len, details, nmatch, pmatch, flags) -regex_t *re; -CONST chr *string; -size_t len; -rm_detail_t *details; -size_t nmatch; -regmatch_t pmatch[]; -int flags; +regexec(regex_t *re, + const chr *string, + size_t nmatch, + regmatch_t pmatch[], + int flags) +{ + rm_detail_t det; + return wx_regexec(re, string, wx_strlen(string), &det, nmatch, pmatch, flags); +} +int +wx_regexec(regex_t *re, + const chr *string, + size_t len, + rm_detail_t *details, + size_t nmatch, + regmatch_t pmatch[], + int flags) { struct vars var; register struct vars *v = &var; - int st; - size_t n; - int backref; -# define LOCALMAT 20 - regmatch_t mat[LOCALMAT]; -# define LOCALMEM 40 - regoff_t mem[LOCALMEM]; + int st; + size_t n; + int backref; + +#define LOCALMAT 20 + regmatch_t mat[LOCALMAT]; + +#define LOCALMEM 40 + regoff_t mem[LOCALMEM]; /* sanity checks */ if (re == NULL || string == NULL || re->re_magic != REMAGIC) @@ -188,46 +203,51 @@ int flags; /* setup */ v->re = re; - v->g = (struct guts *)re->re_guts; - if ((v->g->cflags®_EXPECT) && details == NULL) + v->g = (struct guts *) re->re_guts; + if ((v->g->cflags & REG_EXPECT) && details == NULL) return REG_INVARG; - if (v->g->info®_UIMPOSSIBLE) + if (v->g->info & REG_UIMPOSSIBLE) return REG_NOMATCH; - backref = (v->g->info®_UBACKREF) ? 1 : 0; + backref = (v->g->info & REG_UBACKREF) ? 1 : 0; v->eflags = flags; - if (v->g->cflags®_NOSUB) - nmatch = 0; /* override client */ + if (v->g->cflags & REG_NOSUB) + nmatch = 0; /* override client */ v->nmatch = nmatch; - if (backref) { + if (backref) + { /* need work area */ if (v->g->nsub + 1 <= LOCALMAT) v->pmatch = mat; else - v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) * - sizeof(regmatch_t)); + v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) * + sizeof(regmatch_t)); if (v->pmatch == NULL) return REG_ESPACE; v->nmatch = v->g->nsub + 1; - } else + } + else v->pmatch = pmatch; v->details = details; - v->start = (chr *)string; - v->stop = (chr *)string + len; + v->start = (chr *) string; + v->stop = (chr *) string + len; v->err = 0; - if (backref) { + if (backref) + { /* need retry memory */ assert(v->g->ntree >= 0); - n = (size_t)v->g->ntree; + n = (size_t) v->g->ntree; if (n <= LOCALMEM) v->mem = mem; else - v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t)); - if (v->mem == NULL) { + v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t)); + if (v->mem == NULL) + { if (v->pmatch != pmatch && v->pmatch != mat) FREE(v->pmatch); return REG_ESPACE; } - } else + } + else v->mem = NULL; /* do it */ @@ -238,10 +258,11 @@ int flags; st = find(v, &v->g->tree->cnfa, &v->g->cmap); /* copy (portion of) match vector over if necessary */ - if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { + if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) + { zapsubs(pmatch, nmatch); n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t)); + memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t)); } /* clean up */ @@ -253,24 +274,23 @@ int flags; } /* - - find - find a match for the main NFA (no-complications case) - ^ static int find(struct vars *, struct cnfa *, struct colormap *); + * find - find a match for the main NFA (no-complications case) */ static int -find(v, cnfa, cm) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; +find(struct vars * v, + struct cnfa * cnfa, + struct colormap * cm) { struct dfa *s; struct dfa *d; - chr *begin; - chr *end = NULL; - chr *cold; - chr *open; /* open and close of range of possible starts */ - chr *close; - int hitend; - int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0; + chr *begin; + chr *end = NULL; + chr *cold; + chr *open; /* open and close of range of possible + * starts */ + chr *close; + int hitend; + int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0; /* first, a shot with the search RE */ s = newdfa(v, &v->g->search, cm, &v->dfa1); @@ -278,20 +298,21 @@ struct colormap *cm; NOERR(); MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; - close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL); + close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL); freedfa(s); NOERR(); - if (v->g->cflags®_EXPECT) { + if (v->g->cflags & REG_EXPECT) + { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } - if (close == NULL) /* not found */ + if (close == NULL) /* not found */ return REG_NOMATCH; - if (v->nmatch == 0) /* found, don't need exact location */ + if (v->nmatch == 0) /* found, don't need exact location */ return REG_OKAY; /* find starting point and match */ @@ -302,18 +323,19 @@ struct colormap *cm; d = newdfa(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); - for (begin = open; begin <= close; begin++) { + for (begin = open; begin <= close; begin++) + { MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) end = shortest(v, d, begin, begin, v->stop, - (chr **)NULL, &hitend); + (chr **) NULL, &hitend); else end = longest(v, d, begin, v->stop, &hitend); NOERR(); if (hitend && cold == NULL) cold = begin; if (end != NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } assert(end != NULL); /* search RE succeeded so loop should */ freedfa(d); @@ -322,14 +344,15 @@ struct colormap *cm; assert(v->nmatch > 0); v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); - if (v->g->cflags®_EXPECT) { + if (v->g->cflags & REG_EXPECT) + { if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } - if (v->nmatch == 1) /* no need for submatches */ + if (v->nmatch == 1) /* no need for submatches */ return REG_OKAY; /* submatches */ @@ -338,24 +361,23 @@ struct colormap *cm; } /* - - cfind - find a match for the main NFA (with complications) - ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); + * cfind - find a match for the main NFA (with complications) */ static int -cfind(v, cnfa, cm) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; +cfind(struct vars * v, + struct cnfa * cnfa, + struct colormap * cm) { struct dfa *s; struct dfa *d; - chr *cold; - int ret; + chr *cold; + int ret; s = newdfa(v, &v->g->search, cm, &v->dfa1); NOERR(); d = newdfa(v, cnfa, cm, &v->dfa2); - if (ISERR()) { + if (ISERR()) + { assert(d == NULL); freedfa(s); return v->err; @@ -366,65 +388,67 @@ struct colormap *cm; freedfa(d); freedfa(s); NOERR(); - if (v->g->cflags®_EXPECT) { + if (v->g->cflags & REG_EXPECT) + { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } return ret; } /* - - cfindloop - the heart of cfind - ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *, - ^ struct dfa *, struct dfa *, chr **); + * cfindloop - the heart of cfind */ static int -cfindloop(v, cnfa, cm, d, s, coldp) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; -struct dfa *d; -struct dfa *s; -chr **coldp; /* where to put coldstart pointer */ +cfindloop(struct vars * v, + struct cnfa * cnfa, + struct colormap * cm, + struct dfa * d, + struct dfa * s, + chr **coldp) /* where to put coldstart pointer */ { - chr *begin; - chr *end; - chr *cold; - chr *open; /* open and close of range of possible starts */ - chr *close; - chr *estart; - chr *estop; - int er; - int shorter = v->g->tree->flags&SHORTER; - int hitend; + chr *begin; + chr *end; + chr *cold; + chr *open; /* open and close of range of possible + * starts */ + chr *close; + chr *estart; + chr *estop; + int er; + int shorter = v->g->tree->flags & SHORTER; + int hitend; assert(d != NULL && s != NULL); cold = NULL; close = v->start; - do { + do + { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); - close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL); + close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); if (close == NULL) break; /* NOTE BREAK */ assert(cold != NULL); open = cold; cold = NULL; MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); - for (begin = open; begin <= close; begin++) { + for (begin = open; begin <= close; begin++) + { MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); estart = begin; estop = v->stop; - for (;;) { + for (;;) + { if (shorter) end = shortest(v, d, begin, estart, - estop, (chr **)NULL, &hitend); + estop, (chr **) NULL, &hitend); else end = longest(v, d, begin, estop, - &hitend); + &hitend); if (hitend && cold == NULL) cold = begin; if (end == NULL) @@ -433,19 +457,23 @@ chr **coldp; /* where to put coldstart pointer */ zapsubs(v->pmatch, v->nmatch); zapmem(v, v->g->tree); er = cdissect(v, v->g->tree, begin, end); - if (er == REG_OKAY) { - if (v->nmatch > 0) { + if (er == REG_OKAY) + { + if (v->nmatch > 0) + { v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); } *coldp = cold; return REG_OKAY; } - if (er != REG_NOMATCH) { + if (er != REG_NOMATCH) + { ERR(er); return er; } - if ((shorter) ? end == estop : end == begin) { + if ((shorter) ? end == estop : end == begin) + { /* no point in trying again */ *coldp = cold; return REG_NOMATCH; @@ -464,37 +492,35 @@ chr **coldp; /* where to put coldstart pointer */ } /* - - zapsubs - initialize the subexpression matches to "no match" - ^ static VOID zapsubs(regmatch_t *, size_t); + * zapsubs - initialize the subexpression matches to "no match" */ -static VOID -zapsubs(p, n) -regmatch_t *p; -size_t n; +static void +zapsubs(regmatch_t *p, + size_t n) { - size_t i; + size_t i; - for (i = n-1; i > 0; i--) { + for (i = n - 1; i > 0; i--) + { p[i].rm_so = -1; p[i].rm_eo = -1; } } /* - - zapmem - initialize the retry memory of a subtree to zeros - ^ static VOID zapmem(struct vars *, struct subre *); + * zapmem - initialize the retry memory of a subtree to zeros */ -static VOID -zapmem(v, t) -struct vars *v; -struct subre *t; +static void +zapmem(struct vars * v, + struct subre * t) { if (t == NULL) return; assert(v->mem != NULL); v->mem[t->retry] = 0; - if (t->op == '(') { + if (t->op == '(') + { assert(t->subno > 0); v->pmatch[t->subno].rm_so = -1; v->pmatch[t->subno].rm_eo = -1; @@ -507,20 +533,18 @@ struct subre *t; } /* - - subset - set any subexpression relevant to a successful subre - ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); + * subset - set any subexpression relevant to a successful subre */ -static VOID -subset(v, sub, begin, end) -struct vars *v; -struct subre *sub; -chr *begin; -chr *end; +static void +subset(struct vars * v, + struct subre * sub, + chr *begin, + chr *end) { - int n = sub->subno; + int n = sub->subno; assert(n > 0); - if ((size_t)n >= v->nmatch) + if ((size_t) n >= v->nmatch) return; MDEBUG(("setting %d\n", n)); @@ -529,64 +553,61 @@ chr *end; } /* - - dissect - determine subexpression matches (uncomplicated case) - ^ static int dissect(struct vars *, struct subre *, chr *, chr *); + * dissect - determine subexpression matches (uncomplicated case) */ -static int /* regexec return code */ -dissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ +static int /* regexec return code */ +dissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ { assert(t != NULL); MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); - switch (t->op) { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return altdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - return REG_ASSERT; - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return condissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - subset(v, t, begin, end); - return dissect(v, t->left, begin, end); - break; - default: - return REG_ASSERT; - break; + switch (t->op) + { + case '=': /* terminal node */ + assert(t->left == NULL && t->right == NULL); + return REG_OKAY; /* no action, parent did the work */ + break; + case '|': /* alternation */ + assert(t->left != NULL); + return altdissect(v, t, begin, end); + break; + case 'b': /* back ref -- shouldn't be calling us! */ + return REG_ASSERT; + break; + case '.': /* concatenation */ + assert(t->left != NULL && t->right != NULL); + return condissect(v, t, begin, end); + break; + case '(': /* capturing */ + assert(t->left != NULL && t->right == NULL); + assert(t->subno > 0); + subset(v, t, begin, end); + return dissect(v, t->left, begin, end); + break; + default: + return REG_ASSERT; + break; } } /* - - condissect - determine concatenation subexpression matches (uncomplicated) - ^ static int condissect(struct vars *, struct subre *, chr *, chr *); + * condissect - determine concatenation subexpression matches (uncomplicated) */ -static int /* regexec return code */ -condissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ +static int /* regexec return code */ +condissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int i; - int shorter = (t->left->flags&SHORTER) ? 1 : 0; - chr *stop = (shorter) ? end : begin; + chr *mid; + int i; + int shorter = (t->left->flags & SHORTER) ? 1 : 0; + chr *stop = (shorter) ? end : begin; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); @@ -595,7 +616,8 @@ chr *end; /* end of same */ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); NOERR(); d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); - if (ISERR()) { + if (ISERR()) + { assert(d2 == NULL); freedfa(d); return v->err; @@ -603,11 +625,12 @@ chr *end; /* end of same */ /* pick a tentative midpoint */ if (shorter) - mid = shortest(v, d, begin, begin, end, (chr **)NULL, - (int *)NULL); + mid = shortest(v, d, begin, begin, end, (chr **) NULL, + (int *) NULL); else - mid = longest(v, d, begin, end, (int *)NULL); - if (mid == NULL) { + mid = longest(v, d, begin, end, (int *) NULL); + if (mid == NULL) + { freedfa(d); freedfa(d2); return REG_ASSERT; @@ -615,9 +638,11 @@ chr *end; /* end of same */ MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ - while (longest(v, d2, mid, end, (int *)NULL) != end) { + while (longest(v, d2, mid, end, (int *) NULL) != end) + { /* that midpoint didn't work, find a new one */ - if (mid == stop) { + if (mid == stop) + { /* all possibilities exhausted! */ MDEBUG(("no midpoint!\n")); freedfa(d); @@ -625,11 +650,12 @@ chr *end; /* end of same */ return REG_ASSERT; } if (shorter) - mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, - (int *)NULL); + mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, + (int *) NULL); else - mid = longest(v, d, begin, mid-1, (int *)NULL); - if (mid == NULL) { + mid = longest(v, d, begin, mid - 1, (int *) NULL); + if (mid == NULL) + { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); freedfa(d); @@ -650,162 +676,168 @@ chr *end; /* end of same */ } /* - - altdissect - determine alternative subexpression matches (uncomplicated) - ^ static int altdissect(struct vars *, struct subre *, chr *, chr *); + * altdissect - determine alternative subexpression matches (uncomplicated) */ -static int /* regexec return code */ -altdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ +static int /* regexec return code */ +altdissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ { struct dfa *d; - int i; + int i; assert(t != NULL); assert(t->op == '|'); - for (i = 0; t != NULL; t = t->right, i++) { + for (i = 0; t != NULL; t = t->right, i++) + { MDEBUG(("trying %dth\n", i)); assert(t->left != NULL && t->left->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); if (ISERR()) return v->err; - if (longest(v, d, begin, end, (int *)NULL) == end) { + if (longest(v, d, begin, end, (int *) NULL) == end) + { MDEBUG(("success\n")); freedfa(d); return dissect(v, t->left, begin, end); } freedfa(d); } - return REG_ASSERT; /* none of them matched?!? */ + return REG_ASSERT; /* none of them matched?!? */ } /* - - cdissect - determine subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, + * cdissect - determine subexpression matches (with complications) + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". - ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -cdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ +static int /* regexec return code */ +cdissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ { - int er; + int er; assert(t != NULL); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); - switch (t->op) { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return caltdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - assert(t->left == NULL && t->right == NULL); - return cbrdissect(v, t, begin, end); - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return ccondissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - er = cdissect(v, t->left, begin, end); - if (er == REG_OKAY) - subset(v, t, begin, end); - return er; - break; - default: - return REG_ASSERT; - break; + switch (t->op) + { + case '=': /* terminal node */ + assert(t->left == NULL && t->right == NULL); + return REG_OKAY; /* no action, parent did the work */ + break; + case '|': /* alternation */ + assert(t->left != NULL); + return caltdissect(v, t, begin, end); + break; + case 'b': /* back ref -- shouldn't be calling us! */ + assert(t->left == NULL && t->right == NULL); + return cbrdissect(v, t, begin, end); + break; + case '.': /* concatenation */ + assert(t->left != NULL && t->right != NULL); + return ccondissect(v, t, begin, end); + break; + case '(': /* capturing */ + assert(t->left != NULL && t->right == NULL); + assert(t->subno > 0); + er = cdissect(v, t->left, begin, end); + if (er == REG_OKAY) + subset(v, t, begin, end); + return er; + break; + default: + return REG_ASSERT; + break; } } /* - - ccondissect - concatenation subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, + * ccondissect - concatenation subexpression matches (with complications) + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". - ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -ccondissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ +static int /* regexec return code */ +ccondissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int er; + chr *mid; + int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); - if (t->left->flags&SHORTER) /* reverse scan */ + if (t->left->flags & SHORTER) /* reverse scan */ return crevdissect(v, t, begin, end); d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { + if (ISERR()) + { freedfa(d); return v->err; } MDEBUG(("cconcat %d\n", t->retry)); /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) { - mid = longest(v, d, begin, end, (int *)NULL); - if (mid == NULL) { + if (v->mem[t->retry] == 0) + { + mid = longest(v, d, begin, end, (int *) NULL); + if (mid == NULL) + { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; - } else { + } + else + { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ - for (;;) { + for (;;) + { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && - longest(v, d2, mid, end, (int *)NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) { + longest(v, d2, mid, end, (int *) NULL) == end && + (er = cdissect(v, t->right, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) + { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ - if (mid == begin) { + if (mid == begin) + { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - mid = longest(v, d, begin, mid-1, (int *)NULL); - if (mid == NULL) { + mid = longest(v, d, begin, mid - 1, (int *) NULL); + if (mid == NULL) + { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); @@ -826,79 +858,86 @@ chr *end; /* end of same */ } /* - - crevdissect - determine backref shortest-first subexpression matches - * The retry memory stores the offset of the trial midpoint from begin, + * crevdissect - determine backref shortest-first subexpression matches + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". - ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -crevdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ +static int /* regexec return code */ +crevdissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int er; + chr *mid; + int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); - assert(t->left->flags&SHORTER); + assert(t->left->flags & SHORTER); /* concatenation -- need to split the substring between parts */ d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { + if (ISERR()) + { freedfa(d); return v->err; } MDEBUG(("crev %d\n", t->retry)); /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) { - mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL); - if (mid == NULL) { + if (v->mem[t->retry] == 0) + { + mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); + if (mid == NULL) + { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; - } else { + } + else + { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ - for (;;) { + for (;;) + { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && - longest(v, d2, mid, end, (int *)NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) { + longest(v, d2, mid, end, (int *) NULL) == end && + (er = cdissect(v, t->right, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) + { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ - if (mid == end) { + if (mid == end) + { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL); - if (mid == NULL) { + mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); + if (mid == NULL) + { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); @@ -919,29 +958,27 @@ chr *end; /* end of same */ } /* - - cbrdissect - determine backref subexpression matches - ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *); + * cbrdissect - determine backref subexpression matches */ -static int /* regexec return code */ -cbrdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ +static int /* regexec return code */ +cbrdissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ { - int i; - int n = t->subno; - size_t len; - chr *paren; - chr *p; - chr *stop; - int min = t->min; - int max = t->max; + int i; + int n = t->subno; + size_t len; + chr *paren; + chr *p; + chr *stop; + int min = t->min; + int max = t->max; assert(t != NULL); assert(t->op == 'b'); assert(n >= 0); - assert((size_t)n < v->nmatch); + assert((size_t) n < v->nmatch); MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); @@ -956,7 +993,8 @@ chr *end; /* end of same */ v->mem[t->retry] = 1; /* special-case zero-length string */ - if (len == 0) { + if (len == 0) + { if (begin == end) return REG_OKAY; return REG_NOMATCH; @@ -964,43 +1002,44 @@ chr *end; /* end of same */ /* and too-short string */ assert(end >= begin); - if ((size_t)(end - begin) < len) + if ((size_t) (end - begin) < len) return REG_NOMATCH; stop = end - len; /* count occurrences */ i = 0; - for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { - if ((*v->g->compare)(paren, p, len) != 0) - break; + for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) + { + if ((*v->g->compare) (paren, p, len) != 0) + break; i++; } MDEBUG(("cbackref found %d\n", i)); /* and sort it out */ - if (p != end) /* didn't consume all of it */ + if (p != end) /* didn't consume all of it */ return REG_NOMATCH; if (min <= i && (i <= max || max == INFINITY)) return REG_OKAY; - return REG_NOMATCH; /* out of range */ + return REG_NOMATCH; /* out of range */ } /* - - caltdissect - determine alternative subexpression matches (w. complications) - ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); + * caltdissect - determine alternative subexpression matches (w. complications) */ -static int /* regexec return code */ -caltdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ +static int /* regexec return code */ +caltdissect(struct vars * v, + struct subre * t, + chr *begin, /* beginning of relevant substring */ + chr *end) /* end of same */ { struct dfa *d; - int er; -# define UNTRIED 0 /* not yet tried at all */ -# define TRYING 1 /* top matched, trying submatches */ -# define TRIED 2 /* top didn't match or submatches exhausted */ + int er; + +#define UNTRIED 0 /* not yet tried at all */ +#define TRYING 1 /* top matched, trying submatches */ +#define TRIED 2 /* top didn't match or submatches + * exhausted */ if (t == NULL) return REG_NOMATCH; @@ -1011,11 +1050,13 @@ chr *end; /* end of same */ MDEBUG(("calt n%d\n", t->retry)); assert(t->left != NULL); - if (v->mem[t->retry] == UNTRIED) { + if (v->mem[t->retry] == UNTRIED) + { d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; - if (longest(v, d, begin, end, (int *)NULL) != end) { + if (longest(v, d, begin, end, (int *) NULL) != end) + { freedfa(d); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end);