X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/1f0db2d9942edbea6a3435cfd1a9b120138e1d14..c56ae04274fda26269c6d06be34cf59a45eb70ce:/src/regex/regexec.c?ds=inline

diff --git a/src/regex/regexec.c b/src/regex/regexec.c
index 2128927b90..41d49bdab5 100644
--- a/src/regex/regexec.c
+++ b/src/regex/regexec.c
@@ -1,136 +1,1038 @@
 /*
- * the outer shell of regexec()
+ * re_*exec and friends - match REs
  *
- * This file includes engine.c *twice*, after muchos fiddling with the
- * macros that code uses.  This lets the same code operate on two different
- * representations for state sets.
- */
-#include <sys/types.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <ctype.h>
-#include <regex.h>
-
-#include "utils.h"
-#include "regex2.h"
-
-/* macros for manipulating states, small version */
-#define	states	unsigned
-#define	states1	unsigned	/* for later use in regexec() decision */
-#define	CLEAR(v)	((v) = 0)
-#define	SET0(v, n)	((v) &= ~((unsigned)1 << (n)))
-#define	SET1(v, n)	((v) |= (unsigned)1 << (n))
-#define	ISSET(v, n)	((v) & ((unsigned)1 << (n)))
-#define	ASSIGN(d, s)	((d) = (s))
-#define	EQ(a, b)	((a) == (b))
-#define	STATEVARS	int dummy	/* dummy version */
-#define	STATESETUP(m, n)	/* nothing */
-#define	STATETEARDOWN(m)	/* nothing */
-#define	SETUP(v)	((v) = 0)
-#define	onestate	unsigned
-#define	INIT(o, n)	((o) = (unsigned)1 << (n))
-#define	INC(o)	((o) <<= 1)
-#define	ISSTATEIN(v, o)	((v) & (o))
-/* some abbreviations; note that some of these know variable names! */
-/* do "if I'm here, I can also be there" etc without branches */
-#define	FWD(dst, src, n)	((dst) |= ((unsigned)(src)&(here)) << (n))
-#define	BACK(dst, src, n)	((dst) |= ((unsigned)(src)&(here)) >> (n))
-#define	ISSETBACK(v, n)	((v) & ((unsigned)here >> (n)))
-/* function names */
-#define SNAMES			/* engine.c looks after details */
-
-#include "engine.c"
-
-/* now undo things */
-#undef	states
-#undef	CLEAR
-#undef	SET0
-#undef	SET1
-#undef	ISSET
-#undef	ASSIGN
-#undef	EQ
-#undef	STATEVARS
-#undef	STATESETUP
-#undef	STATETEARDOWN
-#undef	SETUP
-#undef	onestate
-#undef	INIT
-#undef	INC
-#undef	ISSTATEIN
-#undef	FWD
-#undef	BACK
-#undef	ISSETBACK
-#undef	SNAMES
-
-/* macros for manipulating states, large version */
-#define	states	char *
-#define	CLEAR(v)	memset(v, 0, m->g->nstates)
-#define	SET0(v, n)	((v)[n] = 0)
-#define	SET1(v, n)	((v)[n] = 1)
-#define	ISSET(v, n)	((v)[n])
-#define	ASSIGN(d, s)	memcpy(d, s, m->g->nstates)
-#define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
-#define	STATEVARS	int vn; char *space
-#define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
-				if ((m)->space == NULL) return(REG_ESPACE); \
-				(m)->vn = 0; }
-#define	STATETEARDOWN(m)	{ free((m)->space); }
-#define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
-#define	onestate	int
-#define	INIT(o, n)	((o) = (n))
-#define	INC(o)	((o)++)
-#define	ISSTATEIN(v, o)	((v)[o])
-/* some abbreviations; note that some of these know variable names! */
-/* do "if I'm here, I can also be there" etc without branches */
-#define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
-#define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
-#define	ISSETBACK(v, n)	((v)[here - (n)])
-/* function names */
-#define	LNAMES			/* flag */
-
-#include "engine.c"
-
-/*
- - regexec - interface for matching
- = extern int regexec(const regex_t *, const char *, size_t, \
- =					regmatch_t [], int);
- = #define	REG_NOTBOL	00001
- = #define	REG_NOTEOL	00002
- = #define	REG_STARTEND	00004
- = #define	REG_TRACE	00400	// tracing of execution
- = #define	REG_LARGE	01000	// force large representation
- = #define	REG_BACKR	02000	// force use of backref code
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ * 
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them. 
+ * 
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ * 
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ * 
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * We put this here so we can exploit knowledge of the state representation
- * when choosing which matcher to call.  Also, by this point the matchers
- * have been prototyped.
- */
-int				/* 0 success, REG_NOMATCH failure */
-regexec(preg, string, nmatch, pmatch, eflags)
-const regex_t *preg;
-const char *string;
+ */
+
+#include "regguts.h"
+
+
+
+/* lazy-DFA representation */
+struct arcp {			/* "pointer" to an outarc */
+	struct sset *ss;
+	color co;
+};
+
+struct sset {			/* state set */
+	unsigned *states;	/* pointer to bitvector */
+	unsigned hash;		/* hash of bitvector */
+#		define	HASH(bv, nw)	(((nw) == 1) ? *(bv) : hash(bv, nw))
+#	define	HIT(h,bv,ss,nw)	((ss)->hash == (h) && ((nw) == 1 || \
+		memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
+	int flags;
+#		define	STARTER		01	/* the initial state set */
+#		define	POSTSTATE	02	/* includes the goal state */
+#		define	LOCKED		04	/* locked in cache */
+#		define	NOPROGRESS	010	/* zero-progress state set */
+	struct arcp ins;	/* chain of inarcs pointing here */
+	chr *lastseen;		/* last entered on arrival here */
+	struct sset **outs;	/* outarc vector indexed by color */
+	struct arcp *inchain;	/* chain-pointer vector for outarcs */
+};
+
+struct dfa {
+	int nssets;		/* size of cache */
+	int nssused;		/* how many entries occupied yet */
+	int nstates;		/* number of states */
+	int ncolors;		/* length of outarc and inchain vectors */
+	int wordsper;		/* length of state-set bitvectors */
+	struct sset *ssets;	/* state-set cache */
+	unsigned *statesarea;	/* bitvector storage */
+	unsigned *work;		/* pointer to work area within statesarea */
+	struct sset **outsarea;	/* outarc-vector storage */
+	struct arcp *incarea;	/* inchain storage */
+	struct cnfa *cnfa;
+	struct colormap *cm;
+	chr *lastpost;		/* location of last cache-flushed success */
+	chr *lastnopr;		/* location of last cache-flushed NOPROGRESS */
+	struct sset *search;	/* replacement-search-pointer memory */
+	int cptsmalloced;	/* were the areas individually malloced? */
+	char *mallocarea;	/* self, or master malloced area, or NULL */
+};
+
+#define	WORK	1		/* number of work bitvectors needed */
+
+/* setup for non-malloc allocation for small cases */
+#define	FEWSTATES	20	/* must be less than UBITS */
+#define	FEWCOLORS	15
+struct smalldfa {
+	struct dfa dfa;
+	struct sset ssets[FEWSTATES*2];
+	unsigned statesarea[FEWSTATES*2 + WORK];
+	struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
+	struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
+};
+#define	DOMALLOC	((struct smalldfa *)NULL)	/* force malloc */
+
+
+
+/* internal variables, bundled for easy passing around */
+struct vars {
+	regex_t *re;
+	struct guts *g;
+	int eflags;		/* copies of arguments */
+	size_t nmatch;
+	regmatch_t *pmatch;
+	rm_detail_t *details;
+	chr *start;		/* start of string */
+	chr *stop;		/* just past end of string */
+	int err;		/* error code if any (0 none) */
+	regoff_t *mem;		/* memory vector for backtracking */
+	struct smalldfa dfa1;
+	struct smalldfa dfa2;
+};
+#define	VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */
+#define	ISERR()	VISERR(v)
+#define	VERR(vv,e)	(((vv)->err) ? (vv)->err : ((vv)->err = (e)))
+#define	ERR(e)	VERR(v, e)		/* record an error */
+#define	NOERR()	{if (ISERR()) return v->err;}	/* if error seen, return it */
+#define	OFF(p)	((p) - v->start)
+#define	LOFF(p)	((long)OFF(p))
+
+
+
+/*
+ * forward declarations
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regexec.c === */
+int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
+static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
+static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *));
+static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **));
+static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t));
+static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *));
+static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *));
+/* === rege_dfa.c === */
+static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *));
+static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *));
+static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *));
+static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *));
+static VOID freedfa _ANSI_ARGS_((struct dfa *));
+static unsigned hash _ANSI_ARGS_((unsigned *, int));
+static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *));
+static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *));
+static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor));
+static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
+static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *));
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+
+
+/*
+ - exec - match regular expression
+ ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *,
+ ^					size_t, regmatch_t [], int);
+ */
+int
+exec(re, string, len, details, nmatch, pmatch, flags)
+regex_t *re;
+CONST chr *string;
+size_t len;
+rm_detail_t *details;
 size_t nmatch;
 regmatch_t pmatch[];
-int eflags;
+int flags;
 {
-	register struct re_guts *g = preg->re_g;
-#ifdef REDEBUG
-#	define	GOODFLAGS(f)	(f)
-#else
-#	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
-#endif
-
-	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
-		return(REG_BADPAT);
-	assert(!(g->iflags&BAD));
-	if (g->iflags&BAD)		/* backstop for no-debug case */
-		return(REG_BADPAT);
-	eflags = GOODFLAGS(eflags);
-
-	if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
-		return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
+	struct vars var;
+	register struct vars *v = &var;
+	int st;
+	size_t n;
+	int backref;
+#	define	LOCALMAT	20
+	regmatch_t mat[LOCALMAT];
+#	define	LOCALMEM	40
+	regoff_t mem[LOCALMEM];
+
+	/* sanity checks */
+	if (re == NULL || string == NULL || re->re_magic != REMAGIC)
+		return REG_INVARG;
+	if (re->re_csize != sizeof(chr))
+		return REG_MIXED;
+
+	/* setup */
+	v->re = re;
+	v->g = (struct guts *)re->re_guts;
+	if ((v->g->cflags&REG_EXPECT) && details == NULL)
+		return REG_INVARG;
+	if (v->g->info&REG_UIMPOSSIBLE)
+		return REG_NOMATCH;
+	backref = (v->g->info&REG_UBACKREF) ? 1 : 0;
+	v->eflags = flags;
+	if (v->g->cflags&REG_NOSUB)
+		nmatch = 0;		/* override client */
+	v->nmatch = nmatch;
+	if (backref) {
+		/* need work area */
+		if (v->g->nsub + 1 <= LOCALMAT)
+			v->pmatch = mat;
+		else
+			v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) *
+							sizeof(regmatch_t));
+		if (v->pmatch == NULL)
+			return REG_ESPACE;
+		v->nmatch = v->g->nsub + 1;
+	} else
+		v->pmatch = pmatch;
+	v->details = details;
+	v->start = (chr *)string;
+	v->stop = (chr *)string + len;
+	v->err = 0;
+	if (backref) {
+		/* need retry memory */
+		assert(v->g->ntree >= 0);
+		n = (size_t)v->g->ntree;
+		if (n <= LOCALMEM)
+			v->mem = mem;
+		else
+			v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t));
+		if (v->mem == NULL) {
+			if (v->pmatch != pmatch && v->pmatch != mat)
+				FREE(v->pmatch);
+			return REG_ESPACE;
+		}
+	} else
+		v->mem = NULL;
+
+	/* do it */
+	assert(v->g->tree != NULL);
+	if (backref)
+		st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
 	else
-		return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
+		st = find(v, &v->g->tree->cnfa, &v->g->cmap);
+
+	/* copy (portion of) match vector over if necessary */
+	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
+		zapsubs(pmatch, nmatch);
+		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
+		memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
+	}
+
+	/* clean up */
+	if (v->pmatch != pmatch && v->pmatch != mat)
+		FREE(v->pmatch);
+	if (v->mem != NULL && v->mem != mem)
+		FREE(v->mem);
+	return st;
+}
+
+/*
+ - find - find a match for the main NFA (no-complications case)
+ ^ static int find(struct vars *, struct cnfa *, struct colormap *);
+ */
+static int
+find(v, cnfa, cm)
+struct vars *v;
+struct cnfa *cnfa;
+struct colormap *cm;
+{
+	struct dfa *s;
+	struct dfa *d;
+	chr *begin;
+	chr *end = NULL;
+	chr *cold;
+	chr *open;		/* open and close of range of possible starts */
+	chr *close;
+	int hitend;
+	int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
+
+	/* first, a shot with the search RE */
+	s = newdfa(v, &v->g->search, cm, &v->dfa1);
+	assert(!(ISERR() && s != NULL));
+	NOERR();
+	MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
+	cold = NULL;
+	close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL);
+	freedfa(s);
+	NOERR();
+	if (v->g->cflags&REG_EXPECT) {
+		assert(v->details != NULL);
+		if (cold != NULL)
+			v->details->rm_extend.rm_so = OFF(cold);
+		else
+			v->details->rm_extend.rm_so = OFF(v->stop);
+		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
+	}
+	if (close == NULL)		/* not found */
+		return REG_NOMATCH;
+	if (v->nmatch == 0)		/* found, don't need exact location */
+		return REG_OKAY;
+
+	/* find starting point and match */
+	assert(cold != NULL);
+	open = cold;
+	cold = NULL;
+	MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
+	d = newdfa(v, cnfa, cm, &v->dfa1);
+	assert(!(ISERR() && d != NULL));
+	NOERR();
+	for (begin = open; begin <= close; begin++) {
+		MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
+		if (shorter)
+			end = shortest(v, d, begin, begin, v->stop,
+							(chr **)NULL, &hitend);
+		else
+			end = longest(v, d, begin, v->stop, &hitend);
+		NOERR();
+		if (hitend && cold == NULL)
+			cold = begin;
+		if (end != NULL)
+			break;		/* NOTE BREAK OUT */
+	}
+	assert(end != NULL);		/* search RE succeeded so loop should */
+	freedfa(d);
+
+	/* and pin down details */
+	assert(v->nmatch > 0);
+	v->pmatch[0].rm_so = OFF(begin);
+	v->pmatch[0].rm_eo = OFF(end);
+	if (v->g->cflags&REG_EXPECT) {
+		if (cold != NULL)
+			v->details->rm_extend.rm_so = OFF(cold);
+		else
+			v->details->rm_extend.rm_so = OFF(v->stop);
+		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
+	}
+	if (v->nmatch == 1)		/* no need for submatches */
+		return REG_OKAY;
+
+	/* submatches */
+	zapsubs(v->pmatch, v->nmatch);
+	return dissect(v, v->g->tree, begin, end);
+}
+
+/*
+ - cfind - find a match for the main NFA (with complications)
+ ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
+ */
+static int
+cfind(v, cnfa, cm)
+struct vars *v;
+struct cnfa *cnfa;
+struct colormap *cm;
+{
+	struct dfa *s;
+	struct dfa *d;
+	chr *cold;
+	int ret;
+
+	s = newdfa(v, &v->g->search, cm, &v->dfa1);
+	NOERR();
+	d = newdfa(v, cnfa, cm, &v->dfa2);
+	if (ISERR()) {
+		assert(d == NULL);
+		freedfa(s);
+		return v->err;
+	}
+
+	ret = cfindloop(v, cnfa, cm, d, s, &cold);
+
+	freedfa(d);
+	freedfa(s);
+	NOERR();
+	if (v->g->cflags&REG_EXPECT) {
+		assert(v->details != NULL);
+		if (cold != NULL)
+			v->details->rm_extend.rm_so = OFF(cold);
+		else
+			v->details->rm_extend.rm_so = OFF(v->stop);
+		v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
+	}
+	return ret;
+}
+
+/*
+ - cfindloop - the heart of cfind
+ ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
+ ^	struct dfa *, struct dfa *, chr **);
+ */
+static int
+cfindloop(v, cnfa, cm, d, s, coldp)
+struct vars *v;
+struct cnfa *cnfa;
+struct colormap *cm;
+struct dfa *d;
+struct dfa *s;
+chr **coldp;			/* where to put coldstart pointer */
+{
+	chr *begin;
+	chr *end;
+	chr *cold;
+	chr *open;		/* open and close of range of possible starts */
+	chr *close;
+	chr *estart;
+	chr *estop;
+	int er;
+	int shorter = v->g->tree->flags&SHORTER;
+	int hitend;
+
+	assert(d != NULL && s != NULL);
+	cold = NULL;
+	close = v->start;
+	do {
+		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
+		close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL);
+		if (close == NULL)
+			break;				/* NOTE BREAK */
+		assert(cold != NULL);
+		open = cold;
+		cold = NULL;
+		MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
+		for (begin = open; begin <= close; begin++) {
+			MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
+			estart = begin;
+			estop = v->stop;
+			for (;;) {
+				if (shorter)
+					end = shortest(v, d, begin, estart,
+						estop, (chr **)NULL, &hitend);
+				else
+					end = longest(v, d, begin, estop,
+								&hitend);
+				if (hitend && cold == NULL)
+					cold = begin;
+				if (end == NULL)
+					break;		/* NOTE BREAK OUT */
+				MDEBUG(("tentative end %ld\n", LOFF(end)));
+				zapsubs(v->pmatch, v->nmatch);
+				zapmem(v, v->g->tree);
+				er = cdissect(v, v->g->tree, begin, end);
+				if (er == REG_OKAY) {
+					if (v->nmatch > 0) {
+						v->pmatch[0].rm_so = OFF(begin);
+						v->pmatch[0].rm_eo = OFF(end);
+					}
+					*coldp = cold;
+					return REG_OKAY;
+				}
+				if (er != REG_NOMATCH) {
+					ERR(er);
+					return er;
+				}
+				if ((shorter) ? end == estop : end == begin) {
+					/* no point in trying again */
+					*coldp = cold;
+					return REG_NOMATCH;
+				}
+				/* go around and try again */
+				if (shorter)
+					estart = end + 1;
+				else
+					estop = end - 1;
+			}
+		}
+	} while (close < v->stop);
+
+	*coldp = cold;
+	return REG_NOMATCH;
+}
+
+/*
+ - zapsubs - initialize the subexpression matches to "no match"
+ ^ static VOID zapsubs(regmatch_t *, size_t);
+ */
+static VOID
+zapsubs(p, n)
+regmatch_t *p;
+size_t n;
+{
+	size_t i;
+
+	for (i = n-1; i > 0; i--) {
+		p[i].rm_so = -1;
+		p[i].rm_eo = -1;
+	}
 }
+
+/*
+ - zapmem - initialize the retry memory of a subtree to zeros
+ ^ static VOID zapmem(struct vars *, struct subre *);
+ */
+static VOID
+zapmem(v, t)
+struct vars *v;
+struct subre *t;
+{
+	if (t == NULL)
+		return;
+
+	assert(v->mem != NULL);
+	v->mem[t->retry] = 0;
+	if (t->op == '(') {
+		assert(t->subno > 0);
+		v->pmatch[t->subno].rm_so = -1;
+		v->pmatch[t->subno].rm_eo = -1;
+	}
+
+	if (t->left != NULL)
+		zapmem(v, t->left);
+	if (t->right != NULL)
+		zapmem(v, t->right);
+}
+
+/*
+ - subset - set any subexpression relevant to a successful subre
+ ^ static VOID subset(struct vars *, struct subre *, chr *, chr *);
+ */
+static VOID
+subset(v, sub, begin, end)
+struct vars *v;
+struct subre *sub;
+chr *begin;
+chr *end;
+{
+	int n = sub->subno;
+
+	assert(n > 0);
+	if ((size_t)n >= v->nmatch)
+		return;
+
+	MDEBUG(("setting %d\n", n));
+	v->pmatch[n].rm_so = OFF(begin);
+	v->pmatch[n].rm_eo = OFF(end);
+}
+
+/*
+ - dissect - determine subexpression matches (uncomplicated case)
+ ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+dissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	assert(t != NULL);
+	MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
+
+	switch (t->op) {
+	case '=':		/* terminal node */
+		assert(t->left == NULL && t->right == NULL);
+		return REG_OKAY;	/* no action, parent did the work */
+		break;
+	case '|':		/* alternation */
+		assert(t->left != NULL);
+		return altdissect(v, t, begin, end);
+		break;
+	case 'b':		/* back ref -- shouldn't be calling us! */
+		return REG_ASSERT;
+		break;
+	case '.':		/* concatenation */
+		assert(t->left != NULL && t->right != NULL);
+		return condissect(v, t, begin, end);
+		break;
+	case '(':		/* capturing */
+		assert(t->left != NULL && t->right == NULL);
+		assert(t->subno > 0);
+		subset(v, t, begin, end);
+		return dissect(v, t->left, begin, end);
+		break;
+	default:
+		return REG_ASSERT;
+		break;
+	}
+}
+
+/*
+ - condissect - determine concatenation subexpression matches (uncomplicated)
+ ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+condissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	struct dfa *d;
+	struct dfa *d2;
+	chr *mid;
+	int i;
+	int shorter = (t->left->flags&SHORTER) ? 1 : 0;
+	chr *stop = (shorter) ? end : begin;
+
+	assert(t->op == '.');
+	assert(t->left != NULL && t->left->cnfa.nstates > 0);
+	assert(t->right != NULL && t->right->cnfa.nstates > 0);
+
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
+	NOERR();
+	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
+	if (ISERR()) {
+		assert(d2 == NULL);
+		freedfa(d);
+		return v->err;
+	}
+
+	/* pick a tentative midpoint */
+	if (shorter)
+		mid = shortest(v, d, begin, begin, end, (chr **)NULL,
+								(int *)NULL);
+	else
+		mid = longest(v, d, begin, end, (int *)NULL);
+	if (mid == NULL) {
+		freedfa(d);
+		freedfa(d2);
+		return REG_ASSERT;
+	}
+	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
+
+	/* iterate until satisfaction or failure */
+	while (longest(v, d2, mid, end, (int *)NULL) != end) {
+		/* that midpoint didn't work, find a new one */
+		if (mid == stop) {
+			/* all possibilities exhausted! */
+			MDEBUG(("no midpoint!\n"));
+			freedfa(d);
+			freedfa(d2);
+			return REG_ASSERT;
+		}
+		if (shorter)
+			mid = shortest(v, d, begin, mid+1, end, (chr **)NULL,
+								(int *)NULL);
+		else
+			mid = longest(v, d, begin, mid-1, (int *)NULL);
+		if (mid == NULL) {
+			/* failed to find a new one! */
+			MDEBUG(("failed midpoint!\n"));
+			freedfa(d);
+			freedfa(d2);
+			return REG_ASSERT;
+		}
+		MDEBUG(("new midpoint %ld\n", LOFF(mid)));
+	}
+
+	/* satisfaction */
+	MDEBUG(("successful\n"));
+	freedfa(d);
+	freedfa(d2);
+	i = dissect(v, t->left, begin, mid);
+	if (i != REG_OKAY)
+		return i;
+	return dissect(v, t->right, mid, end);
+}
+
+/*
+ - altdissect - determine alternative subexpression matches (uncomplicated)
+ ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+altdissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	struct dfa *d;
+	int i;
+
+	assert(t != NULL);
+	assert(t->op == '|');
+
+	for (i = 0; t != NULL; t = t->right, i++) {
+		MDEBUG(("trying %dth\n", i));
+		assert(t->left != NULL && t->left->cnfa.nstates > 0);
+		d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
+		if (ISERR())
+			return v->err;
+		if (longest(v, d, begin, end, (int *)NULL) == end) {
+			MDEBUG(("success\n"));
+			freedfa(d);
+			return dissect(v, t->left, begin, end);
+		}
+		freedfa(d);
+	}
+	return REG_ASSERT;	/* none of them matched?!? */
+}
+
+/*
+ - cdissect - determine subexpression matches (with complications)
+ * The retry memory stores the offset of the trial midpoint from begin, 
+ * plus 1 so that 0 uniquely means "clean slate".
+ ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+cdissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	int er;
+
+	assert(t != NULL);
+	MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
+
+	switch (t->op) {
+	case '=':		/* terminal node */
+		assert(t->left == NULL && t->right == NULL);
+		return REG_OKAY;	/* no action, parent did the work */
+		break;
+	case '|':		/* alternation */
+		assert(t->left != NULL);
+		return caltdissect(v, t, begin, end);
+		break;
+	case 'b':		/* back ref -- shouldn't be calling us! */
+		assert(t->left == NULL && t->right == NULL);
+		return cbrdissect(v, t, begin, end);
+		break;
+	case '.':		/* concatenation */
+		assert(t->left != NULL && t->right != NULL);
+		return ccondissect(v, t, begin, end);
+		break;
+	case '(':		/* capturing */
+		assert(t->left != NULL && t->right == NULL);
+		assert(t->subno > 0);
+		er = cdissect(v, t->left, begin, end);
+		if (er == REG_OKAY)
+			subset(v, t, begin, end);
+		return er;
+		break;
+	default:
+		return REG_ASSERT;
+		break;
+	}
+}
+
+/*
+ - ccondissect - concatenation subexpression matches (with complications)
+ * The retry memory stores the offset of the trial midpoint from begin, 
+ * plus 1 so that 0 uniquely means "clean slate".
+ ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+ccondissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	struct dfa *d;
+	struct dfa *d2;
+	chr *mid;
+	int er;
+
+	assert(t->op == '.');
+	assert(t->left != NULL && t->left->cnfa.nstates > 0);
+	assert(t->right != NULL && t->right->cnfa.nstates > 0);
+
+	if (t->left->flags&SHORTER)		/* reverse scan */
+		return crevdissect(v, t, begin, end);
+
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
+	if (ISERR())
+		return v->err;
+	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
+	if (ISERR()) {
+		freedfa(d);
+		return v->err;
+	}
+	MDEBUG(("cconcat %d\n", t->retry));
+
+	/* pick a tentative midpoint */
+	if (v->mem[t->retry] == 0) {
+		mid = longest(v, d, begin, end, (int *)NULL);
+		if (mid == NULL) {
+			freedfa(d);
+			freedfa(d2);
+			return REG_NOMATCH;
+		}
+		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
+		v->mem[t->retry] = (mid - begin) + 1;
+	} else {
+		mid = begin + (v->mem[t->retry] - 1);
+		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
+	}
+
+	/* iterate until satisfaction or failure */
+	for (;;) {
+		/* try this midpoint on for size */
+		er = cdissect(v, t->left, begin, mid);
+		if (er == REG_OKAY &&
+				longest(v, d2, mid, end, (int *)NULL) == end &&
+				(er = cdissect(v, t->right, mid, end)) == 
+								REG_OKAY)
+			break;			/* NOTE BREAK OUT */
+		if (er != REG_OKAY && er != REG_NOMATCH) {
+			freedfa(d);
+			freedfa(d2);
+			return er;
+		}
+
+		/* that midpoint didn't work, find a new one */
+		if (mid == begin) {
+			/* all possibilities exhausted */
+			MDEBUG(("%d no midpoint\n", t->retry));
+			freedfa(d);
+			freedfa(d2);
+			return REG_NOMATCH;
+		}
+		mid = longest(v, d, begin, mid-1, (int *)NULL);
+		if (mid == NULL) {
+			/* failed to find a new one */
+			MDEBUG(("%d failed midpoint\n", t->retry));
+			freedfa(d);
+			freedfa(d2);
+			return REG_NOMATCH;
+		}
+		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
+		v->mem[t->retry] = (mid - begin) + 1;
+		zapmem(v, t->left);
+		zapmem(v, t->right);
+	}
+
+	/* satisfaction */
+	MDEBUG(("successful\n"));
+	freedfa(d);
+	freedfa(d2);
+	return REG_OKAY;
+}
+
+/*
+ - crevdissect - determine backref shortest-first subexpression matches
+ * The retry memory stores the offset of the trial midpoint from begin, 
+ * plus 1 so that 0 uniquely means "clean slate".
+ ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+crevdissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	struct dfa *d;
+	struct dfa *d2;
+	chr *mid;
+	int er;
+
+	assert(t->op == '.');
+	assert(t->left != NULL && t->left->cnfa.nstates > 0);
+	assert(t->right != NULL && t->right->cnfa.nstates > 0);
+	assert(t->left->flags&SHORTER);
+
+	/* concatenation -- need to split the substring between parts */
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
+	if (ISERR())
+		return v->err;
+	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
+	if (ISERR()) {
+		freedfa(d);
+		return v->err;
+	}
+	MDEBUG(("crev %d\n", t->retry));
+
+	/* pick a tentative midpoint */
+	if (v->mem[t->retry] == 0) {
+		mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL);
+		if (mid == NULL) {
+			freedfa(d);
+			freedfa(d2);
+			return REG_NOMATCH;
+		}
+		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
+		v->mem[t->retry] = (mid - begin) + 1;
+	} else {
+		mid = begin + (v->mem[t->retry] - 1);
+		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
+	}
+
+	/* iterate until satisfaction or failure */
+	for (;;) {
+		/* try this midpoint on for size */
+		er = cdissect(v, t->left, begin, mid);
+		if (er == REG_OKAY &&
+				longest(v, d2, mid, end, (int *)NULL) == end &&
+				(er = cdissect(v, t->right, mid, end)) == 
+								REG_OKAY)
+			break;			/* NOTE BREAK OUT */
+		if (er != REG_OKAY && er != REG_NOMATCH) {
+			freedfa(d);
+			freedfa(d2);
+			return er;
+		}
+
+		/* that midpoint didn't work, find a new one */
+		if (mid == end) {
+			/* all possibilities exhausted */
+			MDEBUG(("%d no midpoint\n", t->retry));
+			freedfa(d);
+			freedfa(d2);
+			return REG_NOMATCH;
+		}
+		mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL);
+		if (mid == NULL) {
+			/* failed to find a new one */
+			MDEBUG(("%d failed midpoint\n", t->retry));
+			freedfa(d);
+			freedfa(d2);
+			return REG_NOMATCH;
+		}
+		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
+		v->mem[t->retry] = (mid - begin) + 1;
+		zapmem(v, t->left);
+		zapmem(v, t->right);
+	}
+
+	/* satisfaction */
+	MDEBUG(("successful\n"));
+	freedfa(d);
+	freedfa(d2);
+	return REG_OKAY;
+}
+
+/*
+ - cbrdissect - determine backref subexpression matches
+ ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+cbrdissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	int i;
+	int n = t->subno;
+	size_t len;
+	chr *paren;
+	chr *p;
+	chr *stop;
+	int min = t->min;
+	int max = t->max;
+
+	assert(t != NULL);
+	assert(t->op == 'b');
+	assert(n >= 0);
+	assert((size_t)n < v->nmatch);
+
+	MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
+
+	if (v->pmatch[n].rm_so == -1)
+		return REG_NOMATCH;
+	paren = v->start + v->pmatch[n].rm_so;
+	len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
+
+	/* no room to maneuver -- retries are pointless */
+	if (v->mem[t->retry])
+		return REG_NOMATCH;
+	v->mem[t->retry] = 1;
+
+	/* special-case zero-length string */
+	if (len == 0) {
+		if (begin == end)
+			return REG_OKAY;
+		return REG_NOMATCH;
+	}
+
+	/* and too-short string */
+	assert(end >= begin);
+	if ((size_t)(end - begin) < len)
+		return REG_NOMATCH;
+	stop = end - len;
+
+	/* count occurrences */
+	i = 0;
+	for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
+		if ((*v->g->compare)(paren, p, len) != 0)
+				break;
+		i++;
+	}
+	MDEBUG(("cbackref found %d\n", i));
+
+	/* and sort it out */
+	if (p != end)			/* didn't consume all of it */
+		return REG_NOMATCH;
+	if (min <= i && (i <= max || max == INFINITY))
+		return REG_OKAY;
+	return REG_NOMATCH;		/* out of range */
+}
+
+/*
+ - caltdissect - determine alternative subexpression matches (w. complications)
+ ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+caltdissect(v, t, begin, end)
+struct vars *v;
+struct subre *t;
+chr *begin;			/* beginning of relevant substring */
+chr *end;			/* end of same */
+{
+	struct dfa *d;
+	int er;
+#	define	UNTRIED	0	/* not yet tried at all */
+#	define	TRYING	1	/* top matched, trying submatches */
+#	define	TRIED	2	/* top didn't match or submatches exhausted */
+
+	if (t == NULL)
+		return REG_NOMATCH;
+	assert(t->op == '|');
+	if (v->mem[t->retry] == TRIED)
+		return caltdissect(v, t->right, begin, end);
+
+	MDEBUG(("calt n%d\n", t->retry));
+	assert(t->left != NULL);
+
+	if (v->mem[t->retry] == UNTRIED) {
+		d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
+		if (ISERR())
+			return v->err;
+		if (longest(v, d, begin, end, (int *)NULL) != end) {
+			freedfa(d);
+			v->mem[t->retry] = TRIED;
+			return caltdissect(v, t->right, begin, end);
+		}
+		freedfa(d);
+		MDEBUG(("calt matched\n"));
+		v->mem[t->retry] = TRYING;
+	}
+
+	er = cdissect(v, t->left, begin, end);
+	if (er != REG_NOMATCH)
+		return er;
+
+	v->mem[t->retry] = TRIED;
+	return caltdissect(v, t->right, begin, end);
+}
+
+
+
+#include "rege_dfa.c"