From: Václav Slavík Date: Thu, 19 Feb 2004 16:47:02 +0000 (+0000) Subject: reverted to TCL version of regex library X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/3ca4086b22723db16d7e0fc23aa9021b4785096a reverted to TCL version of regex library git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@25865 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- diff --git a/src/regex/regc_color.c b/src/regex/regc_color.c index 3e93f8eae9..5aed21c630 100644 --- a/src/regex/regc_color.c +++ b/src/regex/regc_color.c @@ -2,21 +2,21 @@ * colorings of characters * This file is #included by regcomp.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,7 +28,6 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header$ * * * Note that there are some incestuous relationships between this code and @@ -37,20 +36,22 @@ -#define CISERR() VISERR(cm->v) -#define CERR(e) VERR(cm->v, (e)) +#define CISERR() VISERR(cm->v) +#define CERR(e) VERR(cm->v, (e)) /* - * initcm - set up new colormap + - initcm - set up new colormap + ^ static VOID initcm(struct vars *, struct colormap *); */ -static void -initcm(struct vars * v, - struct colormap * cm) +static VOID +initcm(v, cm) +struct vars *v; +struct colormap *cm; { - int i; - int j; + int i; + int j; union tree *t; union tree *nextt; struct colordesc *cd; @@ -63,41 +64,41 @@ initcm(struct vars * v, cm->max = 0; cm->free = 0; - cd = cm->cd; /* cm->cd[WHITE] */ + cd = cm->cd; /* cm->cd[WHITE] */ cd->sub = NOSUB; cd->arcs = NULL; cd->flags = 0; cd->nchrs = CHR_MAX - CHR_MIN + 1; /* upper levels of tree */ - for (t = &cm->tree[0], j = NBYTS - 1; j > 0; t = nextt, j--) - { + for (t = &cm->tree[0], j = NBYTS-1; j > 0; t = nextt, j--) { nextt = t + 1; - for (i = BYTTAB - 1; i >= 0; i--) + for (i = BYTTAB-1; i >= 0; i--) t->tptr[i] = nextt; } /* bottom level is solid white */ - t = &cm->tree[NBYTS - 1]; - for (i = BYTTAB - 1; i >= 0; i--) + t = &cm->tree[NBYTS-1]; + for (i = BYTTAB-1; i >= 0; i--) t->tcolor[i] = WHITE; cd->block = t; } /* - * freecm - free dynamically-allocated things in a colormap + - freecm - free dynamically-allocated things in a colormap + ^ static VOID freecm(struct colormap *); */ -static void -freecm(struct colormap * cm) +static VOID +freecm(cm) +struct colormap *cm; { - size_t i; + size_t i; union tree *cb; cm->magic = 0; if (NBYTS > 1) cmtreefree(cm, cm->tree, 0); for (i = 1; i <= cm->max; i++) /* skip WHITE */ - if (!UNUSEDCOLOR(&cm->cd[i])) - { + if (!UNUSEDCOLOR(&cm->cd[i])) { cb = cm->cd[i].block; if (cb != NULL) FREE(cb); @@ -107,32 +108,29 @@ freecm(struct colormap * cm) } /* - * cmtreefree - free a non-terminal part of a colormap tree + - cmtreefree - free a non-terminal part of a colormap tree + ^ static VOID cmtreefree(struct colormap *, union tree *, int); */ -static void -cmtreefree(struct colormap * cm, - union tree * tree, - int level) /* level number (top == 0) of this block */ +static VOID +cmtreefree(cm, tree, level) +struct colormap *cm; +union tree *tree; +int level; /* level number (top == 0) of this block */ { - int i; + int i; union tree *t; - union tree *fillt = &cm->tree[level + 1]; + union tree *fillt = &cm->tree[level+1]; union tree *cb; - assert(level < NBYTS - 1); /* this level has pointers */ - for (i = BYTTAB - 1; i >= 0; i--) - { + assert(level < NBYTS-1); /* this level has pointers */ + for (i = BYTTAB-1; i >= 0; i--) { t = tree->tptr[i]; assert(t != NULL); - if (t != fillt) - { - if (level < NBYTS - 2) - { /* more pointer blocks below */ - cmtreefree(cm, t, level + 1); + if (t != fillt) { + if (level < NBYTS-2) { /* more pointer blocks below */ + cmtreefree(cm, t, level+1); FREE(t); - } - else - { /* color block below */ + } else { /* color block below */ cb = cm->cd[t->tcolor[0]].block; if (t != cb) /* not a solid block */ FREE(t); @@ -142,24 +140,26 @@ cmtreefree(struct colormap * cm, } /* - * setcolor - set the color of a character in a colormap + - setcolor - set the color of a character in a colormap + ^ static color setcolor(struct colormap *, pchr, pcolor); */ -static color /* previous color */ -setcolor(struct colormap * cm, - chr c, - pcolor co) +static color /* previous color */ +setcolor(cm, c, co) +struct colormap *cm; +pchr c; +pcolor co; { - uchr uc = c; - int shift; - int level; - int b; - int bottom; + uchr uc = c; + int shift; + int level; + int b; + int bottom; union tree *t; union tree *newt; union tree *fillt; union tree *lastt; union tree *cb; - color prev; + color prev; assert(cm->magic == CMMAGIC); if (CISERR() || co == COLORLESS) @@ -167,30 +167,27 @@ setcolor(struct colormap * cm, t = cm->tree; for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) - { + level++, shift -= BYTBITS) { b = (uc >> shift) & BYTMASK; lastt = t; t = lastt->tptr[b]; assert(t != NULL); - fillt = &cm->tree[level + 1]; + fillt = &cm->tree[level+1]; bottom = (shift <= BYTBITS) ? 1 : 0; cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt; - if (t == fillt || t == cb) - { /* must allocate a new block */ - newt = (union tree *) MALLOC((bottom) ? - sizeof(struct colors) : sizeof(struct ptrs)); - if (newt == NULL) - { + if (t == fillt || t == cb) { /* must allocate a new block */ + newt = (union tree *)MALLOC((bottom) ? + sizeof(struct colors) : sizeof(struct ptrs)); + if (newt == NULL) { CERR(REG_ESPACE); return COLORLESS; } if (bottom) memcpy(VS(newt->tcolor), VS(t->tcolor), - BYTTAB * sizeof(color)); + BYTTAB*sizeof(color)); else memcpy(VS(newt->tptr), VS(t->tptr), - BYTTAB * sizeof(union tree *)); + BYTTAB*sizeof(union tree *)); t = newt; lastt->tptr[b] = t; } @@ -198,67 +195,63 @@ setcolor(struct colormap * cm, b = uc & BYTMASK; prev = t->tcolor[b]; - t->tcolor[b] = (color) co; + t->tcolor[b] = (color)co; return prev; } /* - * maxcolor - report largest color number in use + - maxcolor - report largest color number in use + ^ static color maxcolor(struct colormap *); */ static color -maxcolor(struct colormap * cm) +maxcolor(cm) +struct colormap *cm; { if (CISERR()) return COLORLESS; - return (color) cm->max; + return (color)cm->max; } /* - * newcolor - find a new color (must be subject of setcolor at once) - * Beware: may relocate the colordescs. + - newcolor - find a new color (must be subject of setcolor at once) + * Beware: may relocate the colordescs. + ^ static color newcolor(struct colormap *); */ -static color /* COLORLESS for error */ -newcolor(struct colormap * cm) +static color /* COLORLESS for error */ +newcolor(cm) +struct colormap *cm; { struct colordesc *cd; struct colordesc *new; - size_t n; + size_t n; if (CISERR()) return COLORLESS; - if (cm->free != 0) - { + if (cm->free != 0) { assert(cm->free > 0); - assert((size_t) cm->free < cm->ncds); + assert((size_t)cm->free < cm->ncds); cd = &cm->cd[cm->free]; assert(UNUSEDCOLOR(cd)); assert(cd->arcs == NULL); cm->free = cd->sub; - } - else if (cm->max < cm->ncds - 1) - { + } else if (cm->max < cm->ncds - 1) { cm->max++; cd = &cm->cd[cm->max]; - } - else - { + } else { /* oops, must allocate more */ n = cm->ncds * 2; - if (cm->cd == cm->cdspace) - { - new = (struct colordesc *) MALLOC(n * - sizeof(struct colordesc)); + if (cm->cd == cm->cdspace) { + new = (struct colordesc *)MALLOC(n * + sizeof(struct colordesc)); if (new != NULL) memcpy(VS(new), VS(cm->cdspace), cm->ncds * - sizeof(struct colordesc)); - } - else - new = (struct colordesc *) REALLOC(cm->cd, - n * sizeof(struct colordesc)); - if (new == NULL) - { + sizeof(struct colordesc)); + } else + new = (struct colordesc *)REALLOC(cm->cd, + n * sizeof(struct colordesc)); + if (new == NULL) { CERR(REG_ESPACE); return COLORLESS; } @@ -275,19 +268,20 @@ newcolor(struct colormap * cm) cd->flags = 0; cd->block = NULL; - return (color) (cd - cm->cd); + return (color)(cd - cm->cd); } /* - * freecolor - free a color (must have no arcs or subcolor) + - freecolor - free a color (must have no arcs or subcolor) + ^ static VOID freecolor(struct colormap *, pcolor); */ -static void -freecolor(struct colormap * cm, - pcolor co) +static VOID +freecolor(cm, co) +struct colormap *cm; +pcolor co; { struct colordesc *cd = &cm->cd[co]; - color pco, - nco; /* for freelist scan */ + color pco, nco; /* for freelist scan */ assert(co >= 0); if (co == WHITE) @@ -297,53 +291,47 @@ freecolor(struct colormap * cm, assert(cd->sub == NOSUB); assert(cd->nchrs == 0); cd->flags = FREECOL; - if (cd->block != NULL) - { + if (cd->block != NULL) { FREE(cd->block); - cd->block = NULL; /* just paranoia */ + cd->block = NULL; /* just paranoia */ } - if ((size_t) co == cm->max) - { + if ((size_t)co == cm->max) { while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max])) cm->max--; assert(cm->free >= 0); - while ((size_t) cm->free > cm->max) + while ((size_t)cm->free > cm->max) cm->free = cm->cd[cm->free].sub; - if (cm->free > 0) - { + if (cm->free > 0) { assert(cm->free < cm->max); pco = cm->free; nco = cm->cd[pco].sub; while (nco > 0) - if ((size_t) nco > cm->max) - { + if ((size_t)nco > cm->max) { /* take this one out of freelist */ nco = cm->cd[nco].sub; cm->cd[pco].sub = nco; - } - else - { + } else { assert(nco < cm->max); pco = nco; nco = cm->cd[pco].sub; } } - } - else - { + } else { cd->sub = cm->free; - cm->free = (color) (cd - cm->cd); + cm->free = (color)(cd - cm->cd); } } /* - * pseudocolor - allocate a false color, to be managed by other means + - pseudocolor - allocate a false color, to be managed by other means + ^ static color pseudocolor(struct colormap *); */ static color -pseudocolor(struct colormap * cm) +pseudocolor(cm) +struct colormap *cm; { - color co; + color co; co = newcolor(cm); if (CISERR()) @@ -354,13 +342,16 @@ pseudocolor(struct colormap * cm) } /* - * subcolor - allocate a new subcolor (if necessary) to this chr + - subcolor - allocate a new subcolor (if necessary) to this chr + ^ static color subcolor(struct colormap *, pchr c); */ static color -subcolor(struct colormap * cm, chr c) +subcolor(cm, c) +struct colormap *cm; +pchr c; { - color co; /* current color of c */ - color sco; /* new subcolor */ + color co; /* current color of c */ + color sco; /* new subcolor */ co = GETCOLOR(cm, c); sco = newsub(cm, co); @@ -368,8 +359,8 @@ subcolor(struct colormap * cm, chr c) return COLORLESS; assert(sco != COLORLESS); - if (co == sco) /* already in an open subcolor */ - return co; /* rest is redundant */ + if (co == sco) /* already in an open subcolor */ + return co; /* rest is redundant */ cm->cd[co].nchrs--; cm->cd[sco].nchrs++; setcolor(cm, c, sco); @@ -377,22 +368,22 @@ subcolor(struct colormap * cm, chr c) } /* - * newsub - allocate a new subcolor (if necessary) for a color + - newsub - allocate a new subcolor (if necessary) for a color + ^ static color newsub(struct colormap *, pcolor); */ static color -newsub(struct colormap * cm, - pcolor co) +newsub(cm, co) +struct colormap *cm; +pcolor co; { - color sco; /* new subcolor */ + color sco; /* new subcolor */ sco = cm->cd[co].sub; - if (sco == NOSUB) - { /* color has no open subcolor */ - if (cm->cd[co].nchrs == 1) /* optimization */ + if (sco == NOSUB) { /* color has no open subcolor */ + if (cm->cd[co].nchrs == 1) /* optimization */ return co; - sco = newcolor(cm); /* must create subcolor */ - if (sco == COLORLESS) - { + sco = newcolor(cm); /* must create subcolor */ + if (sco == COLORLESS) { assert(CISERR()); return COLORLESS; } @@ -405,26 +396,29 @@ newsub(struct colormap * cm, } /* - * subrange - allocate new subcolors to this range of chrs, fill in arcs + - subrange - allocate new subcolors to this range of chrs, fill in arcs + ^ static VOID subrange(struct vars *, pchr, pchr, struct state *, + ^ struct state *); */ -static void -subrange(struct vars * v, - chr from, - chr to, - struct state * lp, - struct state * rp) +static VOID +subrange(v, from, to, lp, rp) +struct vars *v; +pchr from; +pchr to; +struct state *lp; +struct state *rp; { - uchr uf; - int i; + uchr uf; + int i; assert(from <= to); /* first, align "from" on a tree-block boundary */ - uf = (uchr) from; - i = (int) (((uf + BYTTAB - 1) & (uchr) ~BYTMASK) - uf); + uf = (uchr)from; + i = (int)( ((uf + BYTTAB-1) & (uchr)~BYTMASK) - uf ); for (; from <= to && i > 0; i--, from++) newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); - if (from > to) /* didn't reach a boundary */ + if (from > to) /* didn't reach a boundary */ return; /* deal with whole blocks */ @@ -437,28 +431,30 @@ subrange(struct vars * v, } /* - * subblock - allocate new subcolors for one tree block of chrs, fill in arcs + - subblock - allocate new subcolors for one tree block of chrs, fill in arcs + ^ static VOID subblock(struct vars *, pchr, struct state *, struct state *); */ -static void -subblock(struct vars * v, - chr start, /* first of BYTTAB chrs */ - struct state * lp, - struct state * rp) +static VOID +subblock(v, start, lp, rp) +struct vars *v; +pchr start; /* first of BYTTAB chrs */ +struct state *lp; +struct state *rp; { - uchr uc = start; + uchr uc = start; struct colormap *cm = v->cm; - int shift; - int level; - int i; - int b; + int shift; + int level; + int i; + int b; union tree *t; union tree *cb; union tree *fillt; union tree *lastt; - int previ; - int ndone; - color co; - color sco; + int previ; + int ndone; + color co; + color sco; assert((uc % BYTTAB) == 0); @@ -466,23 +462,20 @@ subblock(struct vars * v, t = cm->tree; fillt = NULL; for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) - { + level++, shift -= BYTBITS) { b = (uc >> shift) & BYTMASK; lastt = t; t = lastt->tptr[b]; assert(t != NULL); - fillt = &cm->tree[level + 1]; - if (t == fillt && shift > BYTBITS) - { /* need new ptr block */ - t = (union tree *) MALLOC(sizeof(struct ptrs)); - if (t == NULL) - { + fillt = &cm->tree[level+1]; + if (t == fillt && shift > BYTBITS) { /* need new ptr block */ + t = (union tree *)MALLOC(sizeof(struct ptrs)); + if (t == NULL) { CERR(REG_ESPACE); return; } memcpy(VS(t->tptr), VS(fillt->tptr), - BYTTAB * sizeof(union tree *)); + BYTTAB*sizeof(union tree *)); lastt->tptr[b] = t; } } @@ -490,16 +483,13 @@ subblock(struct vars * v, /* special cases: fill block or solid block */ co = t->tcolor[0]; cb = cm->cd[co].block; - if (t == fillt || t == cb) - { + if (t == fillt || t == cb) { /* either way, we want a subcolor solid block */ sco = newsub(cm, co); t = cm->cd[sco].block; - if (t == NULL) - { /* must set it up */ - t = (union tree *) MALLOC(sizeof(struct colors)); - if (t == NULL) - { + if (t == NULL) { /* must set it up */ + t = (union tree *)MALLOC(sizeof(struct colors)); + if (t == NULL) { CERR(REG_ESPACE); return; } @@ -517,14 +507,12 @@ subblock(struct vars * v, /* general case, a mixed block to be altered */ i = 0; - while (i < BYTTAB) - { + while (i < BYTTAB) { co = t->tcolor[i]; sco = newsub(cm, co); newarc(v->nfa, PLAIN, sco, lp, rp); previ = i; - do - { + do { t->tcolor[i++] = sco; } while (i < BYTTAB && t->tcolor[i] == co); ndone = i - previ; @@ -534,40 +522,35 @@ subblock(struct vars * v, } /* - * okcolors - promote subcolors to full colors + - okcolors - promote subcolors to full colors + ^ static VOID okcolors(struct nfa *, struct colormap *); */ -static void -okcolors(struct nfa * nfa, - struct colormap * cm) +static VOID +okcolors(nfa, cm) +struct nfa *nfa; +struct colormap *cm; { struct colordesc *cd; struct colordesc *end = CDEND(cm); struct colordesc *scd; struct arc *a; - color co; - color sco; + color co; + color sco; - for (cd = cm->cd, co = 0; cd < end; cd++, co++) - { + for (cd = cm->cd, co = 0; cd < end; cd++, co++) { sco = cd->sub; - if (UNUSEDCOLOR(cd) || sco == NOSUB) - { + if (UNUSEDCOLOR(cd) || sco == NOSUB) { /* has no subcolor, no further action */ - } - else if (sco == co) - { + } else if (sco == co) { /* is subcolor, let parent deal with it */ - } - else if (cd->nchrs == 0) - { + } else if (cd->nchrs == 0) { /* parent empty, its arcs change color to subcolor */ cd->sub = NOSUB; scd = &cm->cd[sco]; assert(scd->nchrs > 0); assert(scd->sub == sco); scd->sub = NOSUB; - while ((a = cd->arcs) != NULL) - { + while ((a = cd->arcs) != NULL) { assert(a->co == co); /* uncolorchain(cm, a); */ cd->arcs = a->colorchain; @@ -577,17 +560,14 @@ okcolors(struct nfa * nfa, scd->arcs = a; } freecolor(cm, co); - } - else - { + } else { /* parent's arcs must gain parallel subcolor arcs */ cd->sub = NOSUB; scd = &cm->cd[sco]; assert(scd->nchrs > 0); assert(scd->sub == sco); scd->sub = NOSUB; - for (a = cd->arcs; a != NULL; a = a->colorchain) - { + for (a = cd->arcs; a != NULL; a = a->colorchain) { assert(a->co == co); newarc(nfa, a->type, sco, a->from, a->to); } @@ -596,11 +576,13 @@ okcolors(struct nfa * nfa, } /* - * colorchain - add this arc to the color chain of its color + - colorchain - add this arc to the color chain of its color + ^ static VOID colorchain(struct colormap *, struct arc *); */ -static void -colorchain(struct colormap * cm, - struct arc * a) +static VOID +colorchain(cm, a) +struct colormap *cm; +struct arc *a; { struct colordesc *cd = &cm->cd[a->co]; @@ -609,36 +591,39 @@ colorchain(struct colormap * cm, } /* - * uncolorchain - delete this arc from the color chain of its color + - uncolorchain - delete this arc from the color chain of its color + ^ static VOID uncolorchain(struct colormap *, struct arc *); */ -static void -uncolorchain(struct colormap * cm, - struct arc * a) +static VOID +uncolorchain(cm, a) +struct colormap *cm; +struct arc *a; { struct colordesc *cd = &cm->cd[a->co]; struct arc *aa; aa = cd->arcs; - if (aa == a) /* easy case */ + if (aa == a) /* easy case */ cd->arcs = a->colorchain; - else - { + else { for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) continue; assert(aa != NULL); aa->colorchain = a->colorchain; } - a->colorchain = NULL; /* paranoia */ + a->colorchain = NULL; /* paranoia */ } /* - * singleton - is this character in its own color? + - singleton - is this character in its own color? + ^ static int singleton(struct colormap *, pchr c); */ -static int /* predicate */ -singleton(struct colormap * cm, - chr c) +static int /* predicate */ +singleton(cm, c) +struct colormap *cm; +pchr c; { - color co; /* color of c */ + color co; /* color of c */ co = GETCOLOR(cm, c); if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB) @@ -647,81 +632,90 @@ singleton(struct colormap * cm, } /* - * rainbow - add arcs of all full colors (but one) between specified states + - rainbow - add arcs of all full colors (but one) between specified states + ^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor, + ^ struct state *, struct state *); */ -static void -rainbow(struct nfa * nfa, - struct colormap * cm, - int type, - pcolor but, /* COLORLESS if no exceptions */ - struct state * from, - struct state * to) +static VOID +rainbow(nfa, cm, type, but, from, to) +struct nfa *nfa; +struct colormap *cm; +int type; +pcolor but; /* COLORLESS if no exceptions */ +struct state *from; +struct state *to; { struct colordesc *cd; struct colordesc *end = CDEND(cm); - color co; + color co; for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but && - !(cd->flags & PSEUDO)) + !(cd->flags&PSEUDO)) newarc(nfa, type, co, from, to); } /* - * colorcomplement - add arcs of complementary colors - * + - colorcomplement - add arcs of complementary colors * The calling sequence ought to be reconciled with cloneouts(). + ^ static VOID colorcomplement(struct nfa *, struct colormap *, int, + ^ struct state *, struct state *, struct state *); */ -static void -colorcomplement(struct nfa * nfa, - struct colormap * cm, - int type, - struct state * of, /* complements of this guy's PLAIN - * outarcs */ - struct state * from, - struct state * to) +static VOID +colorcomplement(nfa, cm, type, of, from, to) +struct nfa *nfa; +struct colormap *cm; +int type; +struct state *of; /* complements of this guy's PLAIN outarcs */ +struct state *from; +struct state *to; { struct colordesc *cd; struct colordesc *end = CDEND(cm); - color co; + color co; assert(of != from); for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) - if (!UNUSEDCOLOR(cd) && !(cd->flags & PSEUDO)) + if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) if (findarc(of, PLAIN, co) == NULL) newarc(nfa, type, co, from, to); } + #ifdef REG_DEBUG +/* + ^ #ifdef REG_DEBUG + */ /* - * dumpcolors - debugging output + - dumpcolors - debugging output + ^ static VOID dumpcolors(struct colormap *, FILE *); */ -static void -dumpcolors(struct colormap * cm, - FILE *f) +static VOID +dumpcolors(cm, f) +struct colormap *cm; +FILE *f; { struct colordesc *cd; struct colordesc *end; - color co; - chr c; - char *has; + color co; + chr c; + char *has; - fprintf(f, "max %ld\n", (long) cm->max); + fprintf(f, "max %ld\n", (long)cm->max); if (NBYTS > 1) fillcheck(cm, cm->tree, 0, f); end = CDEND(cm); - for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */ - if (!UNUSEDCOLOR(cd)) - { + for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */ + if (!UNUSEDCOLOR(cd)) { assert(cd->nchrs > 0); has = (cd->block != NULL) ? "#" : ""; - if (cd->flags & PSEUDO) - fprintf(f, "#%2ld%s(ps): ", (long) co, has); + if (cd->flags&PSEUDO) + fprintf(f, "#%2ld%s(ps): ", (long)co, has); else - fprintf(f, "#%2ld%s(%2d): ", (long) co, - has, cd->nchrs); + fprintf(f, "#%2ld%s(%2d): ", (long)co, + has, cd->nchrs); /* it's hard to do this more efficiently */ for (c = CHR_MIN; c < CHR_MAX; c++) if (GETCOLOR(cm, c) == co) @@ -734,51 +728,51 @@ dumpcolors(struct colormap * cm, } /* - * fillcheck - check proper filling of a tree + - fillcheck - check proper filling of a tree + ^ static VOID fillcheck(struct colormap *, union tree *, int, FILE *); */ -static void -fillcheck(struct colormap * cm, - union tree * tree, - int level, /* level number (top == 0) of this block */ - FILE *f) +static VOID +fillcheck(cm, tree, level, f) +struct colormap *cm; +union tree *tree; +int level; /* level number (top == 0) of this block */ +FILE *f; { - int i; + int i; union tree *t; - union tree *fillt = &cm->tree[level + 1]; + union tree *fillt = &cm->tree[level+1]; - assert(level < NBYTS - 1); /* this level has pointers */ - for (i = BYTTAB - 1; i >= 0; i--) - { + assert(level < NBYTS-1); /* this level has pointers */ + for (i = BYTTAB-1; i >= 0; i--) { t = tree->tptr[i]; if (t == NULL) fprintf(f, "NULL found in filled tree!\n"); else if (t == fillt) - { - } - else if (level < NBYTS - 2) /* more pointer blocks below */ - fillcheck(cm, t, level + 1, f); + {} + else if (level < NBYTS-2) /* more pointer blocks below */ + fillcheck(cm, t, level+1, f); } } /* - * dumpchr - print a chr - * + - dumpchr - print a chr * Kind of char-centric but works well enough for debug use. + ^ static VOID dumpchr(pchr, FILE *); */ -static void -dumpchr(chr c, - FILE *f) +static VOID +dumpchr(c, f) +pchr c; +FILE *f; { -#if wxUSE_UNICODE - fprintf(f, "Debugging not implemented in unicode mode"); -#else if (c == '\\') fprintf(f, "\\\\"); else if (c > ' ' && c <= '~') - putc((char) c, f); + putc((char)c, f); else - fprintf(f, "\\u%04lx", (long) c); -#endif + fprintf(f, "\\u%04lx", (long)c); } -#endif /* REG_DEBUG */ +/* + ^ #endif + */ +#endif /* ifdef REG_DEBUG */ diff --git a/src/regex/regc_cvec.c b/src/regex/regc_cvec.c index b6aa8c98f1..d2d56fc70a 100644 --- a/src/regex/regc_cvec.c +++ b/src/regex/regc_cvec.c @@ -2,21 +2,21 @@ * Utility functions for handling cvecs * This file is #included by regcomp.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,162 +28,181 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header$ - * */ /* - * newcvec - allocate a new cvec + - newcvec - allocate a new cvec + ^ static struct cvec *newcvec(int, int, int); */ static struct cvec * -newcvec(int nchrs, /* to hold this many chrs... */ - int nranges, /* ... and this many ranges... */ - int nmcces) /* ... and this many MCCEs */ +newcvec(nchrs, nranges, nmcces) + int nchrs; /* to hold this many chrs... */ + int nranges; /* ... and this many ranges... */ + int nmcces; /* ... and this many MCCEs */ { - size_t n; - size_t nc; - struct cvec *cv; - - nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2; - - n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *) - + nc * sizeof(chr); - cv = (struct cvec *) MALLOC(n); - if (cv == NULL) - return NULL; - cv->chrspace = nchrs; - cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE - * ptrs */ - cv->mccespace = nmcces; - cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1); - cv->rangespace = nranges; - return clearcvec(cv); + size_t n; + size_t nc; + struct cvec *cv; + + nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2; + n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) + + nc*sizeof(chr); + cv = (struct cvec *)MALLOC(n); + if (cv == NULL) { + return NULL; + } + cv->chrspace = nchrs; + cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ + cv->mccespace = nmcces; + cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1); + cv->rangespace = nranges; + return clearcvec(cv); } /* - * clearcvec - clear a possibly-new cvec + - clearcvec - clear a possibly-new cvec * Returns pointer as convenience. + ^ static struct cvec *clearcvec(struct cvec *); */ static struct cvec * -clearcvec(struct cvec * cv) +clearcvec(cv) + struct cvec *cv; /* character vector */ { - int i; - - assert(cv != NULL); - cv->nchrs = 0; - assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]); - cv->nmcces = 0; - cv->nmccechrs = 0; - cv->nranges = 0; - for (i = 0; i < cv->mccespace; i++) - cv->mcces[i] = NULL; - - return cv; + int i; + + assert(cv != NULL); + cv->nchrs = 0; + assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]); + cv->nmcces = 0; + cv->nmccechrs = 0; + cv->nranges = 0; + for (i = 0; i < cv->mccespace; i++) { + cv->mcces[i] = NULL; + } + + return cv; } /* - * addchr - add a chr to a cvec + - addchr - add a chr to a cvec + ^ static VOID addchr(struct cvec *, pchr); */ -static void -addchr(struct cvec * cv, /* character vector */ - chr c) /* character to add */ +static VOID +addchr(cv, c) + struct cvec *cv; /* character vector */ + pchr c; /* character to add */ { - assert(cv->nchrs < cv->chrspace - cv->nmccechrs); - cv->chrs[cv->nchrs++] = (chr) c; + assert(cv->nchrs < cv->chrspace - cv->nmccechrs); + cv->chrs[cv->nchrs++] = (chr)c; } /* - * addrange - add a range to a cvec + - addrange - add a range to a cvec + ^ static VOID addrange(struct cvec *, pchr, pchr); */ -static void -addrange(struct cvec * cv, /* character vector */ - chr from, /* first character of range */ - chr to) /* last character of range */ +static VOID +addrange(cv, from, to) + struct cvec *cv; /* character vector */ + pchr from; /* first character of range */ + pchr to; /* last character of range */ { - assert(cv->nranges < cv->rangespace); - cv->ranges[cv->nranges * 2] = (chr) from; - cv->ranges[cv->nranges * 2 + 1] = (chr) to; - cv->nranges++; + assert(cv->nranges < cv->rangespace); + cv->ranges[cv->nranges*2] = (chr)from; + cv->ranges[cv->nranges*2 + 1] = (chr)to; + cv->nranges++; } /* - * addmcce - add an MCCE to a cvec + - addmcce - add an MCCE to a cvec + ^ static VOID addmcce(struct cvec *, chr *, chr *); */ -static void -addmcce(struct cvec * cv, /* character vector */ - chr *startp, /* beginning of text */ - chr *endp) /* just past end of text */ +static VOID +addmcce(cv, startp, endp) + struct cvec *cv; /* character vector */ + chr *startp; /* beginning of text */ + chr *endp; /* just past end of text */ { - int len; - int i; - chr *s; - chr *d; - - if (startp == NULL && endp == NULL) - return; - len = endp - startp; - assert(len > 0); - assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); - assert(cv->nmcces < cv->mccespace); - d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; - cv->mcces[cv->nmcces++] = d; - for (s = startp, i = len; i > 0; s++, i--) - *d++ = *s; - *d++ = 0; /* endmarker */ - assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); - cv->nmccechrs += len + 1; + int len; + int i; + chr *s; + chr *d; + + if (startp == NULL && endp == NULL) { + return; + } + len = endp - startp; + assert(len > 0); + assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); + assert(cv->nmcces < cv->mccespace); + d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; + cv->mcces[cv->nmcces++] = d; + for (s = startp, i = len; i > 0; s++, i--) { + *d++ = *s; + } + *d++ = 0; /* endmarker */ + assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); + cv->nmccechrs += len + 1; } /* - * haschr - does a cvec contain this chr? + - haschr - does a cvec contain this chr? + ^ static int haschr(struct cvec *, pchr); */ -static int /* predicate */ -haschr(struct cvec * cv, /* character vector */ - chr c) /* character to test for */ +static int /* predicate */ +haschr(cv, c) + struct cvec *cv; /* character vector */ + pchr c; /* character to test for */ { - int i; - chr *p; + int i; + chr *p; - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) - { - if (*p == c) - return 1; + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { + if (*p == c) { + return 1; } - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) - { - if ((*p <= c) && (c <= *(p + 1))) - return 1; + } + for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { + if ((*p <= c) && (c <= *(p+1))) { + return 1; } - return 0; + } + return 0; } /* - * getcvec - get a cvec, remembering it as v->cv + - getcvec - get a cvec, remembering it as v->cv + ^ static struct cvec *getcvec(struct vars *, int, int, int); */ static struct cvec * -getcvec(struct vars * v, /* context */ - int nchrs, /* to hold this many chrs... */ - int nranges, /* ... and this many ranges... */ - int nmcces) /* ... and this many MCCEs */ +getcvec(v, nchrs, nranges, nmcces) + struct vars *v; /* context */ + int nchrs; /* to hold this many chrs... */ + int nranges; /* ... and this many ranges... */ + int nmcces; /* ... and this many MCCEs */ { - if (v->cv != NULL && nchrs <= v->cv->chrspace && - nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) - return clearcvec(v->cv); - - if (v->cv != NULL) - freecvec(v->cv); - v->cv = newcvec(nchrs, nranges, nmcces); - if (v->cv == NULL) - ERR(REG_ESPACE); - - return v->cv; + if (v->cv != NULL && nchrs <= v->cv->chrspace && + nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) { + return clearcvec(v->cv); + } + + if (v->cv != NULL) { + freecvec(v->cv); + } + v->cv = newcvec(nchrs, nranges, nmcces); + if (v->cv == NULL) { + ERR(REG_ESPACE); + } + + return v->cv; } /* - * freecvec - free a cvec + - freecvec - free a cvec + ^ static VOID freecvec(struct cvec *); */ -static void -freecvec(struct cvec * cv) +static VOID +freecvec(cv) + struct cvec *cv; /* character vector */ { - FREE(cv); + FREE(cv); } diff --git a/src/regex/regc_lex.c b/src/regex/regc_lex.c index a24290d1a1..1acc3f4cae 100644 --- a/src/regex/regc_lex.c +++ b/src/regex/regc_lex.c @@ -2,21 +2,21 @@ * lexical analyzer * This file is #included by regcomp.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,183 +28,177 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header$ - * */ /* scanning macros (know about v) */ -#define ATEOS() (v->now >= v->stop) -#define HAVE(n) (v->stop - v->now >= (n)) -#define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) -#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) -#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ +#define ATEOS() (v->now >= v->stop) +#define HAVE(n) (v->stop - v->now >= (n)) +#define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) +#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) +#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ *(v->now+1) == CHR(b) && \ *(v->now+2) == CHR(c)) -#define SET(c) (v->nexttype = (c)) -#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) -#define RET(c) return (SET(c), 1) -#define RETV(c, n) return (SETV(c, n), 1) -#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ -#define LASTTYPE(t) (v->lasttype == (t)) +#define SET(c) (v->nexttype = (c)) +#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) +#define RET(c) return (SET(c), 1) +#define RETV(c, n) return (SETV(c, n), 1) +#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ +#define LASTTYPE(t) (v->lasttype == (t)) /* lexical contexts */ -#define L_ERE 1 /* mainline ERE/ARE */ -#define L_BRE 2 /* mainline BRE */ -#define L_Q 3 /* REG_QUOTE */ -#define L_EBND 4 /* ERE/ARE bound */ -#define L_BBND 5 /* BRE bound */ -#define L_BRACK 6 /* brackets */ -#define L_CEL 7 /* collating element */ -#define L_ECL 8 /* equivalence class */ -#define L_CCL 9 /* character class */ -#define INTOCON(c) (v->lexcon = (c)) -#define INCON(con) (v->lexcon == (con)) +#define L_ERE 1 /* mainline ERE/ARE */ +#define L_BRE 2 /* mainline BRE */ +#define L_Q 3 /* REG_QUOTE */ +#define L_EBND 4 /* ERE/ARE bound */ +#define L_BBND 5 /* BRE bound */ +#define L_BRACK 6 /* brackets */ +#define L_CEL 7 /* collating element */ +#define L_ECL 8 /* equivalence class */ +#define L_CCL 9 /* character class */ +#define INTOCON(c) (v->lexcon = (c)) +#define INCON(con) (v->lexcon == (con)) /* construct pointer past end of chr array */ -#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) +#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) /* - * lexstart - set up lexical stuff, scan leading options + - lexstart - set up lexical stuff, scan leading options + ^ static VOID lexstart(struct vars *); */ -static void -lexstart(struct vars * v) +static VOID +lexstart(v) +struct vars *v; { - prefixes(v); /* may turn on new type bits etc. */ + prefixes(v); /* may turn on new type bits etc. */ NOERR(); - if (v->cflags & REG_QUOTE) - { - assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE))); + if (v->cflags®_QUOTE) { + assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))); INTOCON(L_Q); - } - else if (v->cflags & REG_EXTENDED) - { - assert(!(v->cflags & REG_QUOTE)); + } else if (v->cflags®_EXTENDED) { + assert(!(v->cflags®_QUOTE)); INTOCON(L_ERE); - } - else - { - assert(!(v->cflags & (REG_QUOTE | REG_ADVF))); + } else { + assert(!(v->cflags&(REG_QUOTE|REG_ADVF))); INTOCON(L_BRE); } v->nexttype = EMPTY; /* remember we were at the start */ - next(v); /* set up the first token */ + next(v); /* set up the first token */ } /* - * prefixes - implement various special prefixes + - prefixes - implement various special prefixes + ^ static VOID prefixes(struct vars *); */ -static void -prefixes(struct vars * v) +static VOID +prefixes(v) +struct vars *v; { /* literal string doesn't get any of this stuff */ - if (v->cflags & REG_QUOTE) + if (v->cflags®_QUOTE) return; - /* initial "***" gets special things */ + /* initial "***" gets special things */ if (HAVE(4) && NEXT3('*', '*', '*')) - switch (*(v->now + 3)) - { - case CHR('?'): /* "***?" error, msg shows version */ - ERR(REG_BADPAT); - return; /* proceed no further */ - break; - case CHR('='): /* "***=" shifts to literal string */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_QUOTE; - v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE); - v->now += 4; - return; /* and there can be no more prefixes */ - break; - case CHR(':'): /* "***:" shifts to AREs */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_ADVANCED; - v->now += 4; - break; - default: /* otherwise *** is just an error */ - ERR(REG_BADRPT); - return; - break; + switch (*(v->now + 3)) { + case CHR('?'): /* "***?" error, msg shows version */ + ERR(REG_BADPAT); + return; /* proceed no further */ + break; + case CHR('='): /* "***=" shifts to literal string */ + NOTE(REG_UNONPOSIX); + v->cflags |= REG_QUOTE; + v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE); + v->now += 4; + return; /* and there can be no more prefixes */ + break; + case CHR(':'): /* "***:" shifts to AREs */ + NOTE(REG_UNONPOSIX); + v->cflags |= REG_ADVANCED; + v->now += 4; + break; + default: /* otherwise *** is just an error */ + ERR(REG_BADRPT); + return; + break; } /* BREs and EREs don't get embedded options */ - if ((v->cflags & REG_ADVANCED) != REG_ADVANCED) + if ((v->cflags®_ADVANCED) != REG_ADVANCED) return; /* embedded options (AREs only) */ - if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) - { + if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) { NOTE(REG_UNONPOSIX); v->now += 2; for (; !ATEOS() && iscalpha(*v->now); v->now++) - switch (*v->now) - { - case CHR('b'): /* BREs (but why???) */ - v->cflags &= ~(REG_ADVANCED | REG_QUOTE); - break; - case CHR('c'): /* case sensitive */ - v->cflags &= ~REG_ICASE; - break; - case CHR('e'): /* plain EREs */ - v->cflags |= REG_EXTENDED; - v->cflags &= ~(REG_ADVF | REG_QUOTE); - break; - case CHR('i'): /* case insensitive */ - v->cflags |= REG_ICASE; - break; - case CHR('m'): /* Perloid synonym for n */ - case CHR('n'): /* \n affects ^ $ . [^ */ - v->cflags |= REG_NEWLINE; - break; - case CHR('p'): /* ~Perl, \n affects . [^ */ - v->cflags |= REG_NLSTOP; - v->cflags &= ~REG_NLANCH; - break; - case CHR('q'): /* literal string */ - v->cflags |= REG_QUOTE; - v->cflags &= ~REG_ADVANCED; - break; - case CHR('s'): /* single line, \n ordinary */ - v->cflags &= ~REG_NEWLINE; - break; - case CHR('t'): /* tight syntax */ - v->cflags &= ~REG_EXPANDED; - break; - case CHR('w'): /* weird, \n affects ^ $ only */ - v->cflags &= ~REG_NLSTOP; - v->cflags |= REG_NLANCH; - break; - case CHR('x'): /* expanded syntax */ - v->cflags |= REG_EXPANDED; - break; - default: - ERR(REG_BADOPT); - return; + switch (*v->now) { + case CHR('b'): /* BREs (but why???) */ + v->cflags &= ~(REG_ADVANCED|REG_QUOTE); + break; + case CHR('c'): /* case sensitive */ + v->cflags &= ~REG_ICASE; + break; + case CHR('e'): /* plain EREs */ + v->cflags |= REG_EXTENDED; + v->cflags &= ~(REG_ADVF|REG_QUOTE); + break; + case CHR('i'): /* case insensitive */ + v->cflags |= REG_ICASE; + break; + case CHR('m'): /* Perloid synonym for n */ + case CHR('n'): /* \n affects ^ $ . [^ */ + v->cflags |= REG_NEWLINE; + break; + case CHR('p'): /* ~Perl, \n affects . [^ */ + v->cflags |= REG_NLSTOP; + v->cflags &= ~REG_NLANCH; + break; + case CHR('q'): /* literal string */ + v->cflags |= REG_QUOTE; + v->cflags &= ~REG_ADVANCED; + break; + case CHR('s'): /* single line, \n ordinary */ + v->cflags &= ~REG_NEWLINE; + break; + case CHR('t'): /* tight syntax */ + v->cflags &= ~REG_EXPANDED; + break; + case CHR('w'): /* weird, \n affects ^ $ only */ + v->cflags &= ~REG_NLSTOP; + v->cflags |= REG_NLANCH; + break; + case CHR('x'): /* expanded syntax */ + v->cflags |= REG_EXPANDED; + break; + default: + ERR(REG_BADOPT); + return; } - if (!NEXT1(')')) - { + if (!NEXT1(')')) { ERR(REG_BADOPT); return; } v->now++; - if (v->cflags & REG_QUOTE) - v->cflags &= ~(REG_EXPANDED | REG_NEWLINE); + if (v->cflags®_QUOTE) + v->cflags &= ~(REG_EXPANDED|REG_NEWLINE); } } /* - * lexnest - "call a subroutine", interpolating string at the lexical level - * + - lexnest - "call a subroutine", interpolating string at the lexical level * Note, this is not a very general facility. There are a number of * implicit assumptions about what sorts of strings can be subroutines. + ^ static VOID lexnest(struct vars *, chr *, chr *); */ -static void -lexnest(struct vars * v, - chr *beginp, /* start of interpolation */ - chr *endp) /* one past end of interpolation */ +static VOID +lexnest(v, beginp, endp) +struct vars *v; +chr *beginp; /* start of interpolation */ +chr *endp; /* one past end of interpolation */ { - assert(v->savenow == NULL); /* only one level of nesting */ + assert(v->savenow == NULL); /* only one level of nesting */ v->savenow = v->now; v->savestop = v->stop; v->now = beginp; @@ -214,124 +208,123 @@ lexnest(struct vars * v, /* * string constants to interpolate as expansions of things like \d */ -static chr backd[] = { /* \d */ +static chr backd[] = { /* \d */ CHR('['), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']'), CHR(']') }; -static chr backD[] = { /* \D */ +static chr backD[] = { /* \D */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']'), CHR(']') }; -static chr brbackd[] = { /* \d within brackets */ +static chr brbackd[] = { /* \d within brackets */ CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']') }; -static chr backs[] = { /* \s */ +static chr backs[] = { /* \s */ CHR('['), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']'), CHR(']') }; -static chr backS[] = { /* \S */ +static chr backS[] = { /* \S */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']'), CHR(']') }; -static chr brbacks[] = { /* \s within brackets */ +static chr brbacks[] = { /* \s within brackets */ CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']') }; -static chr backw[] = { /* \w */ +static chr backw[] = { /* \w */ CHR('['), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_'), CHR(']') }; -static chr backW[] = { /* \W */ +static chr backW[] = { /* \W */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_'), CHR(']') }; -static chr brbackw[] = { /* \w within brackets */ +static chr brbackw[] = { /* \w within brackets */ CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_') }; /* - * lexword - interpolate a bracket expression for word characters + - lexword - interpolate a bracket expression for word characters * Possibly ought to inquire whether there is a "word" character class. + ^ static VOID lexword(struct vars *); */ -static void -lexword(struct vars * v) +static VOID +lexword(v) +struct vars *v; { lexnest(v, backw, ENDOF(backw)); } /* - * next - get next token + - next - get next token + ^ static int next(struct vars *); */ -static int /* 1 normal, 0 failure */ -next(struct vars * v) +static int /* 1 normal, 0 failure */ +next(v) +struct vars *v; { - chr c; + chr c; /* errors yield an infinite sequence of failures */ if (ISERR()) - return 0; /* the error has set nexttype to EOS */ + return 0; /* the error has set nexttype to EOS */ /* remember flavor of last token */ v->lasttype = v->nexttype; /* REG_BOSONLY */ - if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY)) - { + if (v->nexttype == EMPTY && (v->cflags®_BOSONLY)) { /* at start of a REG_BOSONLY RE */ RETV(SBEGIN, 0); /* same as \A */ } /* if we're nested and we've hit end, return to outer level */ - if (v->savenow != NULL && ATEOS()) - { + if (v->savenow != NULL && ATEOS()) { v->now = v->savenow; v->stop = v->savestop; v->savenow = v->savestop = NULL; } /* skip white space etc. if appropriate (not in literal or []) */ - if (v->cflags & REG_EXPANDED) - switch (v->lexcon) - { - case L_ERE: - case L_BRE: - case L_EBND: - case L_BBND: - skip(v); - break; + if (v->cflags®_EXPANDED) + switch (v->lexcon) { + case L_ERE: + case L_BRE: + case L_EBND: + case L_BBND: + skip(v); + break; } /* handle EOS, depending on context */ - if (ATEOS()) - { - switch (v->lexcon) - { - case L_ERE: - case L_BRE: - case L_Q: - RET(EOS); - break; - case L_EBND: - case L_BBND: - FAILW(REG_EBRACE); - break; - case L_BRACK: - case L_CEL: - case L_ECL: - case L_CCL: - FAILW(REG_EBRACK); - break; + if (ATEOS()) { + switch (v->lexcon) { + case L_ERE: + case L_BRE: + case L_Q: + RET(EOS); + break; + case L_EBND: + case L_BBND: + FAILW(REG_EBRACE); + break; + case L_BRACK: + case L_CEL: + case L_ECL: + case L_CCL: + FAILW(REG_EBRACK); + break; } assert(NOTREACHED); } @@ -340,365 +333,314 @@ next(struct vars * v) c = *v->now++; /* deal with the easy contexts, punt EREs to code below */ - switch (v->lexcon) - { - case L_BRE: /* punt BREs to separate function */ - return brenext(v, c); + switch (v->lexcon) { + case L_BRE: /* punt BREs to separate function */ + return brenext(v, c); + break; + case L_ERE: /* see below */ + break; + case L_Q: /* literal strings are easy */ + RETV(PLAIN, c); + break; + case L_BBND: /* bounds are fairly simple */ + case L_EBND: + switch (c) { + case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): + case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): + case CHR('8'): case CHR('9'): + RETV(DIGIT, (chr)DIGITVAL(c)); break; - case L_ERE: /* see below */ + case CHR(','): + RET(','); break; - case L_Q: /* literal strings are easy */ - RETV(PLAIN, c); + case CHR('}'): /* ERE bound ends with } */ + if (INCON(L_EBND)) { + INTOCON(L_ERE); + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('}', 0); + } + RETV('}', 1); + } else + FAILW(REG_BADBR); break; - case L_BBND: /* bounds are fairly simple */ - case L_EBND: - switch (c) - { - case CHR('0'): - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - RETV(DIGIT, (chr) DIGITVAL(c)); - break; - case CHR(','): - RET(','); - break; - case CHR('}'): /* ERE bound ends with } */ - if (INCON(L_EBND)) - { - INTOCON(L_ERE); - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('}', 0); - } - RETV('}', 1); - } - else - FAILW(REG_BADBR); - break; - case CHR('\\'): /* BRE bound ends with \} */ - if (INCON(L_BBND) && NEXT1('}')) - { - v->now++; - INTOCON(L_BRE); - RET('}'); - } - else - FAILW(REG_BADBR); - break; - default: - FAILW(REG_BADBR); - break; + case CHR('\\'): /* BRE bound ends with \} */ + if (INCON(L_BBND) && NEXT1('}')) { + v->now++; + INTOCON(L_BRE); + RET('}'); + } else + FAILW(REG_BADBR); + break; + default: + FAILW(REG_BADBR); + break; + } + assert(NOTREACHED); + break; + case L_BRACK: /* brackets are not too hard */ + switch (c) { + case CHR(']'): + if (LASTTYPE('[')) + RETV(PLAIN, c); + else { + INTOCON((v->cflags®_EXTENDED) ? + L_ERE : L_BRE); + RET(']'); } - assert(NOTREACHED); break; - case L_BRACK: /* brackets are not too hard */ - switch (c) - { - case CHR(']'): - if (LASTTYPE('[')) - RETV(PLAIN, c); - else - { - INTOCON((v->cflags & REG_EXTENDED) ? - L_ERE : L_BRE); - RET(']'); - } - break; - case CHR('\\'): - NOTE(REG_UBBS); - if (!(v->cflags & REG_ADVF)) - RETV(PLAIN, c); - NOTE(REG_UNONPOSIX); - if (ATEOS()) - FAILW(REG_EESCAPE); - (DISCARD) lexescape(v); - switch (v->nexttype) - { /* not all escapes okay here */ - case PLAIN: - return 1; - break; - case CCLASS: - switch (v->nextvalue) - { - case 'd': - lexnest(v, brbackd, ENDOF(brbackd)); - break; - case 's': - lexnest(v, brbacks, ENDOF(brbacks)); - break; - case 'w': - lexnest(v, brbackw, ENDOF(brbackw)); - break; - default: - FAILW(REG_EESCAPE); - break; - } - /* lexnest done, back up and try again */ - v->nexttype = v->lasttype; - return next(v); - break; - } - /* not one of the acceptable escapes */ - FAILW(REG_EESCAPE); + case CHR('\\'): + NOTE(REG_UBBS); + if (!(v->cflags®_ADVF)) + RETV(PLAIN, c); + NOTE(REG_UNONPOSIX); + if (ATEOS()) + FAILW(REG_EESCAPE); + (DISCARD)lexescape(v); + switch (v->nexttype) { /* not all escapes okay here */ + case PLAIN: + return 1; + break; + case CCLASS: + switch (v->nextvalue) { + case 'd': + lexnest(v, brbackd, ENDOF(brbackd)); break; - case CHR('-'): - if (LASTTYPE('[') || NEXT1(']')) - RETV(PLAIN, c); - else - RETV(RANGE, c); + case 's': + lexnest(v, brbacks, ENDOF(brbacks)); break; - case CHR('['): - if (ATEOS()) - FAILW(REG_EBRACK); - switch (*v->now++) - { - case CHR('.'): - INTOCON(L_CEL); - /* might or might not be locale-specific */ - RET(COLLEL); - break; - case CHR('='): - INTOCON(L_ECL); - NOTE(REG_ULOCALE); - RET(ECLASS); - break; - case CHR(':'): - INTOCON(L_CCL); - NOTE(REG_ULOCALE); - RET(CCLASS); - break; - default: /* oops */ - v->now--; - RETV(PLAIN, c); - break; - } - assert(NOTREACHED); + case 'w': + lexnest(v, brbackw, ENDOF(brbackw)); break; default: - RETV(PLAIN, c); + FAILW(REG_EESCAPE); break; + } + /* lexnest done, back up and try again */ + v->nexttype = v->lasttype; + return next(v); + break; } - assert(NOTREACHED); + /* not one of the acceptable escapes */ + FAILW(REG_EESCAPE); break; - case L_CEL: /* collating elements are easy */ - if (c == CHR('.') && NEXT1(']')) - { - v->now++; - INTOCON(L_BRACK); - RETV(END, '.'); - } - else + case CHR('-'): + if (LASTTYPE('[') || NEXT1(']')) RETV(PLAIN, c); - break; - case L_ECL: /* ditto equivalence classes */ - if (c == CHR('=') && NEXT1(']')) - { - v->now++; - INTOCON(L_BRACK); - RETV(END, '='); - } else - RETV(PLAIN, c); + RETV(RANGE, c); break; - case L_CCL: /* ditto character classes */ - if (c == CHR(':') && NEXT1(']')) - { - v->now++; - INTOCON(L_BRACK); - RETV(END, ':'); - } - else + case CHR('['): + if (ATEOS()) + FAILW(REG_EBRACK); + switch (*v->now++) { + case CHR('.'): + INTOCON(L_CEL); + /* might or might not be locale-specific */ + RET(COLLEL); + break; + case CHR('='): + INTOCON(L_ECL); + NOTE(REG_ULOCALE); + RET(ECLASS); + break; + case CHR(':'): + INTOCON(L_CCL); + NOTE(REG_ULOCALE); + RET(CCLASS); + break; + default: /* oops */ + v->now--; RETV(PLAIN, c); + break; + } + assert(NOTREACHED); break; default: - assert(NOTREACHED); + RETV(PLAIN, c); break; + } + assert(NOTREACHED); + break; + case L_CEL: /* collating elements are easy */ + if (c == CHR('.') && NEXT1(']')) { + v->now++; + INTOCON(L_BRACK); + RETV(END, '.'); + } else + RETV(PLAIN, c); + break; + case L_ECL: /* ditto equivalence classes */ + if (c == CHR('=') && NEXT1(']')) { + v->now++; + INTOCON(L_BRACK); + RETV(END, '='); + } else + RETV(PLAIN, c); + break; + case L_CCL: /* ditto character classes */ + if (c == CHR(':') && NEXT1(']')) { + v->now++; + INTOCON(L_BRACK); + RETV(END, ':'); + } else + RETV(PLAIN, c); + break; + default: + assert(NOTREACHED); + break; } /* that got rid of everything except EREs and AREs */ assert(INCON(L_ERE)); /* deal with EREs and AREs, except for backslashes */ - switch (c) - { - case CHR('|'): - RET('|'); - break; - case CHR('*'): - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('*', 0); - } - RETV('*', 1); - break; - case CHR('+'): - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('+', 0); - } - RETV('+', 1); - break; - case CHR('?'): - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('?', 0); - } - RETV('?', 1); - break; - case CHR('{'): /* bounds start or plain character */ - if (v->cflags & REG_EXPANDED) - skip(v); - if (ATEOS() || !iscdigit(*v->now)) - { - NOTE(REG_UBRACES); - NOTE(REG_UUNSPEC); - RETV(PLAIN, c); - } - else - { - NOTE(REG_UBOUNDS); - INTOCON(L_EBND); - RET('{'); + switch (c) { + case CHR('|'): + RET('|'); + break; + case CHR('*'): + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('*', 0); + } + RETV('*', 1); + break; + case CHR('+'): + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('+', 0); + } + RETV('+', 1); + break; + case CHR('?'): + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('?', 0); + } + RETV('?', 1); + break; + case CHR('{'): /* bounds start or plain character */ + if (v->cflags®_EXPANDED) + skip(v); + if (ATEOS() || !iscdigit(*v->now)) { + NOTE(REG_UBRACES); + NOTE(REG_UUNSPEC); + RETV(PLAIN, c); + } else { + NOTE(REG_UBOUNDS); + INTOCON(L_EBND); + RET('{'); + } + assert(NOTREACHED); + break; + case CHR('('): /* parenthesis, or advanced extension */ + if ((v->cflags®_ADVF) && NEXT1('?')) { + NOTE(REG_UNONPOSIX); + v->now++; + switch (*v->now++) { + case CHR(':'): /* non-capturing paren */ + RETV('(', 0); + break; + case CHR('#'): /* comment */ + while (!ATEOS() && *v->now != CHR(')')) + v->now++; + if (!ATEOS()) + v->now++; + assert(v->nexttype == v->lasttype); + return next(v); + break; + case CHR('='): /* positive lookahead */ + NOTE(REG_ULOOKAHEAD); + RETV(LACON, 1); + break; + case CHR('!'): /* negative lookahead */ + NOTE(REG_ULOOKAHEAD); + RETV(LACON, 0); + break; + default: + FAILW(REG_BADRPT); + break; } assert(NOTREACHED); - break; - case CHR('('): /* parenthesis, or advanced extension */ - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - NOTE(REG_UNONPOSIX); - v->now++; - switch (*v->now++) - { - case CHR(':'): /* non-capturing paren */ - RETV('(', 0); - break; - case CHR('#'): /* comment */ - while (!ATEOS() && *v->now != CHR(')')) - v->now++; - if (!ATEOS()) - v->now++; - assert(v->nexttype == v->lasttype); - return next(v); - break; - case CHR('='): /* positive lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 1); - break; - case CHR('!'): /* negative lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 0); - break; - default: - FAILW(REG_BADRPT); - break; - } - assert(NOTREACHED); - } - if (v->cflags & REG_NOSUB) - RETV('(', 0); /* all parens non-capturing */ - else - RETV('(', 1); - break; - case CHR(')'): - if (LASTTYPE('(')) - NOTE(REG_UUNSPEC); - RETV(')', c); - break; - case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ - if (HAVE(6) && *(v->now + 0) == CHR('[') && - *(v->now + 1) == CHR(':') && - (*(v->now + 2) == CHR('<') || - *(v->now + 2) == CHR('>')) && - *(v->now + 3) == CHR(':') && - *(v->now + 4) == CHR(']') && - *(v->now + 5) == CHR(']')) - { - c = *(v->now + 2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) - { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - RET('^'); - break; - case CHR('$'): - RET('$'); - break; - case CHR('\\'): /* mostly punt backslashes to code below */ - if (ATEOS()) - FAILW(REG_EESCAPE); - break; - default: /* ordinary character */ - RETV(PLAIN, c); - break; + } + if (v->cflags®_NOSUB) + RETV('(', 0); /* all parens non-capturing */ + else + RETV('(', 1); + break; + case CHR(')'): + if (LASTTYPE('(')) { + NOTE(REG_UUNSPEC); + } + RETV(')', c); + break; + case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ + if (HAVE(6) && *(v->now+0) == CHR('[') && + *(v->now+1) == CHR(':') && + (*(v->now+2) == CHR('<') || + *(v->now+2) == CHR('>')) && + *(v->now+3) == CHR(':') && + *(v->now+4) == CHR(']') && + *(v->now+5) == CHR(']')) { + c = *(v->now+2); + v->now += 6; + NOTE(REG_UNONPOSIX); + RET((c == CHR('<')) ? '<' : '>'); + } + INTOCON(L_BRACK); + if (NEXT1('^')) { + v->now++; + RETV('[', 0); + } + RETV('[', 1); + break; + case CHR('.'): + RET('.'); + break; + case CHR('^'): + RET('^'); + break; + case CHR('$'): + RET('$'); + break; + case CHR('\\'): /* mostly punt backslashes to code below */ + if (ATEOS()) + FAILW(REG_EESCAPE); + break; + default: /* ordinary character */ + RETV(PLAIN, c); + break; } /* ERE/ARE backslash handling; backslash already eaten */ assert(!ATEOS()); - if (!(v->cflags & REG_ADVF)) - { /* only AREs have non-trivial escapes */ - if (iscalnum(*v->now)) - { + if (!(v->cflags®_ADVF)) { /* only AREs have non-trivial escapes */ + if (iscalnum(*v->now)) { NOTE(REG_UBSALNUM); NOTE(REG_UUNSPEC); } RETV(PLAIN, *v->now++); } - (DISCARD) lexescape(v); + (DISCARD)lexescape(v); if (ISERR()) FAILW(REG_EESCAPE); - if (v->nexttype == CCLASS) - { /* fudge at lexical level */ - switch (v->nextvalue) - { - case 'd': - lexnest(v, backd, ENDOF(backd)); - break; - case 'D': - lexnest(v, backD, ENDOF(backD)); - break; - case 's': - lexnest(v, backs, ENDOF(backs)); - break; - case 'S': - lexnest(v, backS, ENDOF(backS)); - break; - case 'w': - lexnest(v, backw, ENDOF(backw)); - break; - case 'W': - lexnest(v, backW, ENDOF(backW)); - break; - default: - assert(NOTREACHED); - FAILW(REG_ASSERT); - break; + if (v->nexttype == CCLASS) { /* fudge at lexical level */ + switch (v->nextvalue) { + case 'd': lexnest(v, backd, ENDOF(backd)); break; + case 'D': lexnest(v, backD, ENDOF(backD)); break; + case 's': lexnest(v, backs, ENDOF(backs)); break; + case 'S': lexnest(v, backS, ENDOF(backS)); break; + case 'w': lexnest(v, backw, ENDOF(backw)); break; + case 'W': lexnest(v, backW, ENDOF(backW)); break; + default: + assert(NOTREACHED); + FAILW(REG_ASSERT); + break; } /* lexnest done, back up and try again */ v->nexttype = v->lasttype; @@ -709,23 +651,24 @@ next(struct vars * v) } /* - * lexescape - parse an ARE backslash escape (backslash already eaten) + - lexescape - parse an ARE backslash escape (backslash already eaten) * Note slightly nonstandard use of the CCLASS type code. + ^ static int lexescape(struct vars *); */ -static int /* not actually used, but convenient for - * RETV */ -lexescape(struct vars * v) +static int /* not actually used, but convenient for RETV */ +lexescape(v) +struct vars *v; { - chr c; - static chr alert[] = { + chr c; + static chr alert[] = { CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') }; - static chr esc[] = { + static chr esc[] = { CHR('E'), CHR('S'), CHR('C') }; - chr *save; + chr *save; - assert(v->cflags & REG_ADVF); + assert(v->cflags®_ADVF); assert(!ATEOS()); c = *v->now++; @@ -733,295 +676,255 @@ lexescape(struct vars * v) RETV(PLAIN, c); NOTE(REG_UNONPOSIX); - switch (c) - { - case CHR('a'): - RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); - break; - case CHR('A'): - RETV(SBEGIN, 0); - break; - case CHR('b'): - RETV(PLAIN, CHR('\b')); - break; - case CHR('B'): - RETV(PLAIN, CHR('\\')); - break; - case CHR('c'): - NOTE(REG_UUNPORT); - if (ATEOS()) - FAILW(REG_EESCAPE); - RETV(PLAIN, (chr) (*v->now++ & 037)); - break; - case CHR('d'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'd'); - break; - case CHR('D'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'D'); - break; - case CHR('e'): - NOTE(REG_UUNPORT); - RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); - break; - case CHR('f'): - RETV(PLAIN, CHR('\f')); - break; - case CHR('m'): - RET('<'); - break; - case CHR('M'): - RET('>'); - break; - case CHR('n'): - RETV(PLAIN, CHR('\n')); - break; - case CHR('r'): - RETV(PLAIN, CHR('\r')); - break; - case CHR('s'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 's'); - break; - case CHR('S'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'S'); - break; - case CHR('t'): - RETV(PLAIN, CHR('\t')); - break; - case CHR('u'): - c = lexdigits(v, 16, 4, 4); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('U'): - c = lexdigits(v, 16, 8, 8); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('v'): - RETV(PLAIN, CHR('\v')); - break; - case CHR('w'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'w'); - break; - case CHR('W'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'W'); - break; - case CHR('x'): - NOTE(REG_UUNPORT); - c = lexdigits(v, 16, 1, 255); /* REs >255 long outside - * spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('y'): - NOTE(REG_ULOCALE); - RETV(WBDRY, 0); - break; - case CHR('Y'): - NOTE(REG_ULOCALE); - RETV(NWBDRY, 0); - break; - case CHR('Z'): - RETV(SEND, 0); - break; - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - save = v->now; - v->now--; /* put first digit back */ - c = lexdigits(v, 10, 1, 255); /* REs >255 long outside - * spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - /* ugly heuristic (first test is "exactly 1 digit?") */ - if (v->now - save == 0 || (int) c <= v->nsubexp) - { - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr) c); - } - /* oops, doesn't look like it's a backref after all... */ - v->now = save; - /* and fall through into octal number */ - case CHR('0'): - NOTE(REG_UUNPORT); - v->now--; /* put first digit back */ - c = lexdigits(v, 8, 1, 3); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - default: - assert(iscalpha(c)); - FAILW(REG_EESCAPE); /* unknown alphabetic escape */ - break; + switch (c) { + case CHR('a'): + RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); + break; + case CHR('A'): + RETV(SBEGIN, 0); + break; + case CHR('b'): + RETV(PLAIN, CHR('\b')); + break; + case CHR('B'): + RETV(PLAIN, CHR('\\')); + break; + case CHR('c'): + NOTE(REG_UUNPORT); + if (ATEOS()) + FAILW(REG_EESCAPE); + RETV(PLAIN, (chr)(*v->now++ & 037)); + break; + case CHR('d'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'd'); + break; + case CHR('D'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'D'); + break; + case CHR('e'): + NOTE(REG_UUNPORT); + RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); + break; + case CHR('f'): + RETV(PLAIN, CHR('\f')); + break; + case CHR('m'): + RET('<'); + break; + case CHR('M'): + RET('>'); + break; + case CHR('n'): + RETV(PLAIN, CHR('\n')); + break; + case CHR('r'): + RETV(PLAIN, CHR('\r')); + break; + case CHR('s'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 's'); + break; + case CHR('S'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'S'); + break; + case CHR('t'): + RETV(PLAIN, CHR('\t')); + break; + case CHR('u'): + c = lexdigits(v, 16, 4, 4); + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + case CHR('U'): + c = lexdigits(v, 16, 8, 8); + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + case CHR('v'): + RETV(PLAIN, CHR('\v')); + break; + case CHR('w'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'w'); + break; + case CHR('W'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'W'); + break; + case CHR('x'): + NOTE(REG_UUNPORT); + c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + case CHR('y'): + NOTE(REG_ULOCALE); + RETV(WBDRY, 0); + break; + case CHR('Y'): + NOTE(REG_ULOCALE); + RETV(NWBDRY, 0); + break; + case CHR('Z'): + RETV(SEND, 0); + break; + case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): + case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): + case CHR('9'): + save = v->now; + v->now--; /* put first digit back */ + c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ + if (ISERR()) + FAILW(REG_EESCAPE); + /* ugly heuristic (first test is "exactly 1 digit?") */ + if (v->now - save == 0 || (int)c <= v->nsubexp) { + NOTE(REG_UBACKREF); + RETV(BACKREF, (chr)c); + } + /* oops, doesn't look like it's a backref after all... */ + v->now = save; + /* and fall through into octal number */ + case CHR('0'): + NOTE(REG_UUNPORT); + v->now--; /* put first digit back */ + c = lexdigits(v, 8, 1, 3); + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + default: + assert(iscalpha(c)); + FAILW(REG_EESCAPE); /* unknown alphabetic escape */ + break; } assert(NOTREACHED); } /* - * lexdigits - slurp up digits and return chr value + - lexdigits - slurp up digits and return chr value + ^ static chr lexdigits(struct vars *, int, int, int); */ -static chr /* chr value; errors signalled via ERR */ -lexdigits(struct vars * v, - int base, - int minlen, - int maxlen) +static chr /* chr value; errors signalled via ERR */ +lexdigits(v, base, minlen, maxlen) +struct vars *v; +int base; +int minlen; +int maxlen; { - uchr n; /* unsigned to avoid overflow misbehavior */ - int len; - chr c; - int d; - const uchr ub = (uchr) base; + uchr n; /* unsigned to avoid overflow misbehavior */ + int len; + chr c; + int d; + CONST uchr ub = (uchr) base; n = 0; - for (len = 0; len < maxlen && !ATEOS(); len++) - { + for (len = 0; len < maxlen && !ATEOS(); len++) { c = *v->now++; - switch (c) - { - case CHR('0'): - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - d = DIGITVAL(c); - break; - case CHR('a'): - case CHR('A'): - d = 10; - break; - case CHR('b'): - case CHR('B'): - d = 11; - break; - case CHR('c'): - case CHR('C'): - d = 12; - break; - case CHR('d'): - case CHR('D'): - d = 13; - break; - case CHR('e'): - case CHR('E'): - d = 14; - break; - case CHR('f'): - case CHR('F'): - d = 15; - break; - default: - v->now--; /* oops, not a digit at all */ - d = -1; - break; + switch (c) { + case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): + case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): + case CHR('8'): case CHR('9'): + d = DIGITVAL(c); + break; + case CHR('a'): case CHR('A'): d = 10; break; + case CHR('b'): case CHR('B'): d = 11; break; + case CHR('c'): case CHR('C'): d = 12; break; + case CHR('d'): case CHR('D'): d = 13; break; + case CHR('e'): case CHR('E'): d = 14; break; + case CHR('f'): case CHR('F'): d = 15; break; + default: + v->now--; /* oops, not a digit at all */ + d = -1; + break; } - if (d >= base) - { /* not a plausible digit */ + if (d >= base) { /* not a plausible digit */ v->now--; d = -1; } if (d < 0) - break; /* NOTE BREAK OUT */ - n = n * ub + (uchr) d; + break; /* NOTE BREAK OUT */ + n = n*ub + (uchr)d; } if (len < minlen) ERR(REG_EESCAPE); - return (chr) n; + return (chr)n; } /* - * brenext - get next BRE token - * + - brenext - get next BRE token * This is much like EREs except for all the stupid backslashes and the * context-dependency of some things. + ^ static int brenext(struct vars *, pchr); */ -static int /* 1 normal, 0 failure */ -brenext(struct vars * v, - chr pc) +static int /* 1 normal, 0 failure */ +brenext(v, pc) +struct vars *v; +pchr pc; { - chr c = (chr) pc; + chr c = (chr)pc; - switch (c) - { - case CHR('*'): - if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) - RETV(PLAIN, c); - RET('*'); - break; - case CHR('['): - if (HAVE(6) && *(v->now + 0) == CHR('[') && - *(v->now + 1) == CHR(':') && - (*(v->now + 2) == CHR('<') || - *(v->now + 2) == CHR('>')) && - *(v->now + 3) == CHR(':') && - *(v->now + 4) == CHR(']') && - *(v->now + 5) == CHR(']')) - { - c = *(v->now + 2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) - { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - if (LASTTYPE(EMPTY)) - RET('^'); - if (LASTTYPE('(')) - { - NOTE(REG_UUNSPEC); - RET('^'); - } - RETV(PLAIN, c); - break; - case CHR('$'): - if (v->cflags & REG_EXPANDED) - skip(v); - if (ATEOS()) - RET('$'); - if (NEXT2('\\', ')')) - { - NOTE(REG_UUNSPEC); - RET('$'); - } + switch (c) { + case CHR('*'): + if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) RETV(PLAIN, c); - break; - case CHR('\\'): - break; /* see below */ - default: - RETV(PLAIN, c); - break; + RET('*'); + break; + case CHR('['): + if (HAVE(6) && *(v->now+0) == CHR('[') && + *(v->now+1) == CHR(':') && + (*(v->now+2) == CHR('<') || + *(v->now+2) == CHR('>')) && + *(v->now+3) == CHR(':') && + *(v->now+4) == CHR(']') && + *(v->now+5) == CHR(']')) { + c = *(v->now+2); + v->now += 6; + NOTE(REG_UNONPOSIX); + RET((c == CHR('<')) ? '<' : '>'); + } + INTOCON(L_BRACK); + if (NEXT1('^')) { + v->now++; + RETV('[', 0); + } + RETV('[', 1); + break; + case CHR('.'): + RET('.'); + break; + case CHR('^'): + if (LASTTYPE(EMPTY)) + RET('^'); + if (LASTTYPE('(')) { + NOTE(REG_UUNSPEC); + RET('^'); + } + RETV(PLAIN, c); + break; + case CHR('$'): + if (v->cflags®_EXPANDED) + skip(v); + if (ATEOS()) + RET('$'); + if (NEXT2('\\', ')')) { + NOTE(REG_UUNSPEC); + RET('$'); + } + RETV(PLAIN, c); + break; + case CHR('\\'): + break; /* see below */ + default: + RETV(PLAIN, c); + break; } assert(c == CHR('\\')); @@ -1030,64 +933,57 @@ brenext(struct vars * v, FAILW(REG_EESCAPE); c = *v->now++; - switch (c) - { - case CHR('{'): - INTOCON(L_BBND); - NOTE(REG_UBOUNDS); - RET('{'); - break; - case CHR('('): - RETV('(', 1); - break; - case CHR(')'): - RETV(')', c); - break; - case CHR('<'): - NOTE(REG_UNONPOSIX); - RET('<'); - break; - case CHR('>'): - NOTE(REG_UNONPOSIX); - RET('>'); - break; - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr) DIGITVAL(c)); - break; - default: - if (iscalnum(c)) - { - NOTE(REG_UBSALNUM); - NOTE(REG_UUNSPEC); - } - RETV(PLAIN, c); - break; + switch (c) { + case CHR('{'): + INTOCON(L_BBND); + NOTE(REG_UBOUNDS); + RET('{'); + break; + case CHR('('): + RETV('(', 1); + break; + case CHR(')'): + RETV(')', c); + break; + case CHR('<'): + NOTE(REG_UNONPOSIX); + RET('<'); + break; + case CHR('>'): + NOTE(REG_UNONPOSIX); + RET('>'); + break; + case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): + case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): + case CHR('9'): + NOTE(REG_UBACKREF); + RETV(BACKREF, (chr)DIGITVAL(c)); + break; + default: + if (iscalnum(c)) { + NOTE(REG_UBSALNUM); + NOTE(REG_UUNSPEC); + } + RETV(PLAIN, c); + break; } assert(NOTREACHED); } /* - * skip - skip white space and comments in expanded form + - skip - skip white space and comments in expanded form + ^ static VOID skip(struct vars *); */ -static void -skip(struct vars * v) +static VOID +skip(v) +struct vars *v; { - chr *start = v->now; + chr *start = v->now; - assert(v->cflags & REG_EXPANDED); + assert(v->cflags®_EXPANDED); - for (;;) - { + for (;;) { while (!ATEOS() && iscspace(*v->now)) v->now++; if (ATEOS() || *v->now != CHR('#')) @@ -1103,31 +999,50 @@ skip(struct vars * v) } /* - * newline - return the chr for a newline - * + - newline - return the chr for a newline * This helps confine use of CHR to this source file. + ^ static chr newline(NOPARMS); */ static chr -newline(void) +newline() { return CHR('\n'); } /* - * chrnamed - return the chr known by a given (chr string) name - * + - ch - return the chr sequence for regc_locale.c's fake collating element ch + * This helps confine use of CHR to this source file. Beware that the caller + * knows how long the sequence is. + ^ #ifdef REG_DEBUG + ^ static chr *ch(NOPARMS); + ^ #endif + */ +#ifdef REG_DEBUG +static chr * +ch() +{ + static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') }; + + return chstr; +} +#endif + +/* + - chrnamed - return the chr known by a given (chr string) name * The code is a bit clumsy, but this routine gets only such specialized * use that it hardly matters. + ^ static chr chrnamed(struct vars *, chr *, chr *, pchr); */ static chr -chrnamed(struct vars * v, - chr *startp, /* start of name */ - chr *endp, /* just past end of name */ - chr lastresort) /* what to return if name lookup fails */ +chrnamed(v, startp, endp, lastresort) +struct vars *v; +chr *startp; /* start of name */ +chr *endp; /* just past end of name */ +pchr lastresort; /* what to return if name lookup fails */ { - celt c; - int errsave; - int e; + celt c; + int errsave; + int e; struct cvec *cv; errsave = v->err; @@ -1137,10 +1052,10 @@ chrnamed(struct vars * v, v->err = errsave; if (e != 0) - return (chr) lastresort; + return (chr)lastresort; cv = range(v, c, c, 0); if (cv->nchrs == 0) - return (chr) lastresort; + return (chr)lastresort; return cv->chrs[0]; } diff --git a/src/regex/regc_locale.c b/src/regex/regc_locale.c index 664611c6b5..695b665b1f 100644 --- a/src/regex/regc_locale.c +++ b/src/regex/regc_locale.c @@ -1,800 +1,989 @@ -/* +/* * regc_locale.c -- * - * This file contains locale-specific regexp routines. + * This file contains the Unicode locale specific regexp routines. * This file is #included by regcomp.c. * * Copyright (c) 1998 by Scriptics Corporation. * - * This software is copyrighted by the Regents of the University of - * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState - * Corporation and other parties. The following terms apply to all files - * associated with the software unless explicitly disclaimed in - * individual files. - * - * The authors hereby grant permission to use, copy, modify, distribute, - * and license this software and its documentation for any purpose, provided - * that existing copyright notices are retained in all copies and that this - * notice is included verbatim in any distributions. No written agreement, - * license, or royalty fee is required for any of the authorized uses. - * Modifications to this software may be copyrighted by their authors - * and need not follow the licensing terms described here, provided that - * the new terms are clearly indicated on the first page of each file where - * they apply. - * - * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY - * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES - * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY - * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE - * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE - * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR - * MODIFICATIONS. + * See the file "license.terms" for information on usage and redistribution + * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * GOVERNMENT USE: If you are acquiring this software on behalf of the - * U.S. government, the Government shall have only "Restricted Rights" - * in the software and related documentation as defined in the Federal - * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you - * are acquiring the software on behalf of the Department of Defense, the - * software shall be classified as "Commercial Computer Software" and the - * Government shall have only "Restricted Rights" as defined in Clause - * 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the - * authors grant the U.S. Government and others acting in its behalf - * permission to use and distribute the software in accordance with the - * terms specified in this license. - * - * $Header$ + * RCS: @(#) $Id$ */ -int char_and_wchar_strncmp (const char* cp, const wx_wchar* wp, size_t nNum) -{ - while(*cp++ == (const char)*wp++ && --nNum){} - return nNum; -} +/* ASCII character-name table */ -int wx_isdigit(wx_wchar c) {return wxIsdigit(c);} -int wx_isalpha(wx_wchar c) {return wxIsalpha(c);} -int wx_isalnum(wx_wchar c) {return wxIsalnum(c);} -int wx_isupper(wx_wchar c) {return wxIsupper(c);} -int wx_islower(wx_wchar c) {return wxIslower(c);} -int wx_isgraph(wx_wchar c) {return wxIsgraph(c);} -int wx_ispunct(wx_wchar c) {return wxIspunct(c);} -int wx_isspace(wx_wchar c) {return wxIsspace(c);} +static struct cname { + char *name; + char code; +} cnames[] = { + {"NUL", '\0'}, + {"SOH", '\001'}, + {"STX", '\002'}, + {"ETX", '\003'}, + {"EOT", '\004'}, + {"ENQ", '\005'}, + {"ACK", '\006'}, + {"BEL", '\007'}, + {"alert", '\007'}, + {"BS", '\010'}, + {"backspace", '\b'}, + {"HT", '\011'}, + {"tab", '\t'}, + {"LF", '\012'}, + {"newline", '\n'}, + {"VT", '\013'}, + {"vertical-tab", '\v'}, + {"FF", '\014'}, + {"form-feed", '\f'}, + {"CR", '\015'}, + {"carriage-return", '\r'}, + {"SO", '\016'}, + {"SI", '\017'}, + {"DLE", '\020'}, + {"DC1", '\021'}, + {"DC2", '\022'}, + {"DC3", '\023'}, + {"DC4", '\024'}, + {"NAK", '\025'}, + {"SYN", '\026'}, + {"ETB", '\027'}, + {"CAN", '\030'}, + {"EM", '\031'}, + {"SUB", '\032'}, + {"ESC", '\033'}, + {"IS4", '\034'}, + {"FS", '\034'}, + {"IS3", '\035'}, + {"GS", '\035'}, + {"IS2", '\036'}, + {"RS", '\036'}, + {"IS1", '\037'}, + {"US", '\037'}, + {"space", ' '}, + {"exclamation-mark",'!'}, + {"quotation-mark", '"'}, + {"number-sign", '#'}, + {"dollar-sign", '$'}, + {"percent-sign", '%'}, + {"ampersand", '&'}, + {"apostrophe", '\''}, + {"left-parenthesis",'('}, + {"right-parenthesis", ')'}, + {"asterisk", '*'}, + {"plus-sign", '+'}, + {"comma", ','}, + {"hyphen", '-'}, + {"hyphen-minus", '-'}, + {"period", '.'}, + {"full-stop", '.'}, + {"slash", '/'}, + {"solidus", '/'}, + {"zero", '0'}, + {"one", '1'}, + {"two", '2'}, + {"three", '3'}, + {"four", '4'}, + {"five", '5'}, + {"six", '6'}, + {"seven", '7'}, + {"eight", '8'}, + {"nine", '9'}, + {"colon", ':'}, + {"semicolon", ';'}, + {"less-than-sign", '<'}, + {"equals-sign", '='}, + {"greater-than-sign", '>'}, + {"question-mark", '?'}, + {"commercial-at", '@'}, + {"left-square-bracket", '['}, + {"backslash", '\\'}, + {"reverse-solidus", '\\'}, + {"right-square-bracket", ']'}, + {"circumflex", '^'}, + {"circumflex-accent", '^'}, + {"underscore", '_'}, + {"low-line", '_'}, + {"grave-accent", '`'}, + {"left-brace", '{'}, + {"left-curly-bracket", '{'}, + {"vertical-line", '|'}, + {"right-brace", '}'}, + {"right-curly-bracket", '}'}, + {"tilde", '~'}, + {"DEL", '\177'}, + {NULL, 0} +}; -wx_wchar wx_toupper(wx_wchar c) -{ - return wxToupper(c); -} +/* Unicode character-class tables */ -wx_wchar wx_tolower(wx_wchar c) -{ - return wxTolower(c); -} +typedef struct crange { + chr start; + chr end; +} crange; -int wx_strlen(const wx_wchar* szString) -{ - /* - Generic -- note that some clib functions also test for eol character '^Z' - - int nLength = 0; - for (; *(szString + nLength) != '\0'; nLength++); - return nLength; - */ - return szString == NULL ? 0 : wxStrlen_(szString); -} -/* ASCII character-name table */ +/* + * Declarations of Unicode character ranges. This code + * is automatically generated by the tools/uniClass.tcl script + * and used in generic/regc_locale.c. Do not modify by hand. + */ -static struct cname -{ - char *name; - char code; -} cnames[] = +/* Unicode: alphabetic characters */ + +static crange alphaRangeTable[] = { + {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6}, + {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8}, + {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1}, + {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481}, + {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587}, + {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a}, + {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5}, + {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8}, + {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a}, + {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74}, + {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, + {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30}, + {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90}, + {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9}, + {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33}, + {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, + {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, + {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, + {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46}, + {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0}, + {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b}, + {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5}, + {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9}, + {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, + {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, + {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, + {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, + {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4}, + {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea}, + {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b}, + {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, + {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, + {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, + {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, + {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131}, + {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa}, + {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7}, + {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3}, + {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28}, + {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d}, + {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72}, + {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe}, + {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc} +}; -{ - { - "NUL", '\0' - }, - { - "SOH", '\001' - }, - { - "STX", '\002' - }, - { - "ETX", '\003' - }, - { - "EOT", '\004' - }, - { - "ENQ", '\005' - }, - { - "ACK", '\006' - }, - { - "BEL", '\007' - }, - { - "alert", '\007' - }, - { - "BS", '\010' - }, - { - "backspace", '\b' - }, - { - "HT", '\011' - }, - { - "tab", '\t' - }, - { - "LF", '\012' - }, - { - "newline", '\n' - }, - { - "VT", '\013' - }, - { - "vertical-tab", '\v' - }, - { - "FF", '\014' - }, - { - "form-feed", '\f' - }, - { - "CR", '\015' - }, - { - "carriage-return", '\r' - }, - { - "SO", '\016' - }, - { - "SI", '\017' - }, - { - "DLE", '\020' - }, - { - "DC1", '\021' - }, - { - "DC2", '\022' - }, - { - "DC3", '\023' - }, - { - "DC4", '\024' - }, - { - "NAK", '\025' - }, - { - "SYN", '\026' - }, - { - "ETB", '\027' - }, - { - "CAN", '\030' - }, - { - "EM", '\031' - }, - { - "SUB", '\032' - }, - { - "ESC", '\033' - }, - { - "IS4", '\034' - }, - { - "FS", '\034' - }, - { - "IS3", '\035' - }, - { - "GS", '\035' - }, - { - "IS2", '\036' - }, - { - "RS", '\036' - }, - { - "IS1", '\037' - }, - { - "US", '\037' - }, - { - "space", ' ' - }, - { - "exclamation-mark", '!' - }, - { - "quotation-mark", '"' - }, - { - "number-sign", '#' - }, - { - "dollar-sign", '$' - }, - { - "percent-sign", '%' - }, - { - "ampersand", '&' - }, - { - "apostrophe", '\'' - }, - { - "left-parenthesis", '(' - }, - { - "right-parenthesis", ')' - }, - { - "asterisk", '*' - }, - { - "plus-sign", '+' - }, - { - "comma", ',' - }, - { - "hyphen", '-' - }, - { - "hyphen-minus", '-' - }, - { - "period", '.' - }, - { - "full-stop", '.' - }, - { - "slash", '/' - }, - { - "solidus", '/' - }, - { - "zero", '0' - }, - { - "one", '1' - }, - { - "two", '2' - }, - { - "three", '3' - }, - { - "four", '4' - }, - { - "five", '5' - }, - { - "six", '6' - }, - { - "seven", '7' - }, - { - "eight", '8' - }, - { - "nine", '9' - }, - { - "colon", ':' - }, - { - "semicolon", ';' - }, - { - "less-than-sign", '<' - }, - { - "equals-sign", '=' - }, - { - "greater-than-sign", '>' - }, - { - "question-mark", '?' - }, - { - "commercial-at", '@' - }, - { - "left-square-bracket", '[' - }, - { - "backslash", '\\' - }, - { - "reverse-solidus", '\\' - }, - { - "right-square-bracket", ']' - }, - { - "circumflex", '^' - }, - { - "circumflex-accent", '^' - }, - { - "underscore", '_' - }, - { - "low-line", '_' - }, - { - "grave-accent", '`' - }, - { - "left-brace", '{' - }, - { - "left-curly-bracket", '{' - }, - { - "vertical-line", '|' - }, - { - "right-brace", '}' - }, - { - "right-curly-bracket", '}' - }, - { - "tilde", '~' - }, - { - "DEL", '\177' - }, - { - NULL, 0 - } +#define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange)) + +static chr alphaCharTable[] = { + 0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c, + 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5, + 0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd, + 0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, + 0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f, + 0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c, + 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1, + 0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87, + 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3, + 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258, + 0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f, + 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d, + 0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe +}; + +#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) + +/* Unicode: decimal digit characters */ + +static crange digitRangeTable[] = { + {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f}, + {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f}, + {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f}, + {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049}, + {0x1369, 0x1371}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0xff10, 0xff19} +}; + +#define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange)) + +/* no singletons of digit characters */ + +/* Unicode: punctuation characters */ + +static crange punctRangeTable[] = { + {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d}, + {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12}, + {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed}, + {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043}, + {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f}, + {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03}, + {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff61, 0xff65} +}; + +#define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange)) + +static chr punctCharTable[] = { + 0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab, + 0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be, + 0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964, + 0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d, + 0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d, + 0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68, + 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d +}; + +#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr)) + +/* Unicode: white space characters */ + +static crange spaceRangeTable[] = { + {0x0009, 0x000d}, {0x2000, 0x200b} +}; + +#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange)) + +static chr spaceCharTable[] = { + 0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000 +}; + +#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr)) + +/* Unicode: lowercase characters */ + +static crange lowerRangeTable[] = { + {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180}, + {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce}, + {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587}, + {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27}, + {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67}, + {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7}, + {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7}, + {0x1ff2, 0x1ff4}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a} +}; + +#define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange)) + +static chr lowerCharTable[] = { + 0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b, + 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d, + 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f, + 0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140, + 0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151, + 0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163, + 0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175, + 0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192, + 0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad, + 0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce, + 0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df, + 0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0, + 0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205, + 0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217, + 0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b, + 0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd, + 0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5, + 0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471, + 0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d, + 0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f, + 0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1, + 0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4, + 0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd, + 0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef, + 0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09, + 0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b, + 0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d, + 0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f, + 0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51, + 0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63, + 0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75, + 0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87, + 0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5, + 0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7, + 0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9, + 0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb, + 0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed, + 0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe, + 0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e, + 0x210f, 0x2113, 0x212f, 0x2134, 0x2139 +}; + +#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr)) + +/* Unicode: uppercase characters */ + +static crange upperRangeTable[] = { + {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b}, + {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8}, + {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4}, + {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f}, + {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d}, + {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb}, + {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112}, + {0x2119, 0x211d}, {0x212a, 0x212d}, {0xff21, 0xff3a} +}; + +#define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange)) + +static chr upperCharTable[] = { + 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110, + 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122, + 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134, + 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147, + 0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a, + 0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c, + 0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d, + 0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d, + 0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae, + 0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd, + 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0, + 0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4, + 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a, + 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c, + 0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230, + 0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0, + 0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460, + 0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472, + 0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e, + 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0, + 0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2, + 0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3, + 0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc, + 0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee, + 0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08, + 0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a, + 0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c, + 0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e, + 0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50, + 0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62, + 0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74, + 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86, + 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2, + 0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4, + 0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6, + 0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8, + 0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea, + 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b, + 0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130, + 0x2131, 0x2133 }; +#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) + +/* Unicode: unicode print characters excluding space */ + +static crange graphRangeTable[] = { + {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233}, + {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e}, + {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce}, + {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486}, + {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f}, + {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4}, + {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655}, + {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d}, + {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0}, + {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d}, + {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c}, + {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4}, + {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a}, + {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42}, + {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83}, + {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0}, + {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, + {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f}, + {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43}, + {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a}, + {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, + {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, + {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, + {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39}, + {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f}, + {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3}, + {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, + {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f}, + {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48}, + {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1}, + {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf}, + {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b}, + {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9}, + {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9}, + {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b}, + {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f}, + {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059}, + {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159}, + {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f}, + {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d}, + {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5}, + {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6}, + {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e}, + {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4}, + {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676}, + {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9}, + {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9}, + {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, + {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, + {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3}, + {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe}, + {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046}, + {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3}, + {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3}, + {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b}, + {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a}, + {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7}, + {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704}, + {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b}, + {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794}, + {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff}, + {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5}, + {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094}, + {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c}, + {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243}, + {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe}, + {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe}, + {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f}, + {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f}, + {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f}, + {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f}, + {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f}, + {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f}, + {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f}, + {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f}, + {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f}, + {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f}, + {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f}, + {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f}, + {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f}, + {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f}, + {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f}, + {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f}, + {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f}, + {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f}, + {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f}, + {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f}, + {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f}, + {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f}, + {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f}, + {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f}, + {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f}, + {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f}, + {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f}, + {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f}, + {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c}, + {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4}, + {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f}, + {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f}, + {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f}, + {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f}, + {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f}, + {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f}, + {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f}, + {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f}, + {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f}, + {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f}, + {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f}, + {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d}, + {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36}, + {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f}, + {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, + {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66}, + {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f}, + {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf}, + {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee}, + {0xfffc, 0xffff} +}; + +#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange)) + +static chr graphCharTable[] = { + 0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8, + 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f, + 0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd, + 0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39, + 0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0, + 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c, + 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3, + 0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5, + 0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61, + 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87, + 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc, + 0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288, + 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756, + 0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74 +}; + +#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr)) + +/* + * End of auto-generated Unicode character ranges declarations. + */ + +#define CH NOCELT /* - * nmcces - how many distinct MCCEs are there? + - nmcces - how many distinct MCCEs are there? + ^ static int nmcces(struct vars *); */ static int -nmcces(struct vars * v) +nmcces(v) + struct vars *v; /* context */ { - /* - * No multi-character collating elements defined at the moment. - */ - return 0; + /* + * No multi-character collating elements defined at the moment. + */ + return 0; } /* - * nleaders - how many chrs can be first chrs of MCCEs? + - nleaders - how many chrs can be first chrs of MCCEs? + ^ static int nleaders(struct vars *); */ static int -nleaders(struct vars * v) +nleaders(v) + struct vars *v; /* context */ { - return 0; + return 0; } /* - * allmcces - return a cvec with all the MCCEs of the locale + - allmcces - return a cvec with all the MCCEs of the locale + ^ static struct cvec *allmcces(struct vars *, struct cvec *); */ static struct cvec * -allmcces(struct vars * v, /* context */ - struct cvec * cv) /* this is supposed to have enough room */ +allmcces(v, cv) + struct vars *v; /* context */ + struct cvec *cv; /* this is supposed to have enough room */ { - return clearcvec(cv); + return clearcvec(cv); } /* - * element - map collating-element name to celt + - element - map collating-element name to celt + ^ static celt element(struct vars *, chr *, chr *); */ static celt -element(struct vars * v, /* context */ - chr *startp, /* points to start of name */ - chr *endp) /* points just past end of name */ +element(v, startp, endp) + struct vars *v; /* context */ + chr *startp; /* points to start of name */ + chr *endp; /* points just past end of name */ { - struct cname *cn; - size_t len; - - /* generic: one-chr names stand for themselves */ - assert(startp < endp); - len = endp - startp; - if (len == 1) - return *startp; - - NOTE(REG_ULOCALE); - - /* search table */ - for (cn = cnames; cn->name != NULL; cn++) - { - if (strlen(cn->name) == len && - char_and_wchar_strncmp(cn->name, startp, len) == 0) - { - break; /* NOTE BREAK OUT */ - } + struct cname *cn; + size_t len; + Tcl_DString ds; + CONST char *np; + + /* generic: one-chr names stand for themselves */ + assert(startp < endp); + len = endp - startp; + if (len == 1) { + return *startp; + } + + NOTE(REG_ULOCALE); + + /* search table */ + Tcl_DStringInit(&ds); + np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); + for (cn=cnames; cn->name!=NULL; cn++) { + if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) { + break; /* NOTE BREAK OUT */ } - if (cn->name != NULL) - return CHR(cn->code); - - /* couldn't find it */ - ERR(REG_ECOLLATE); - return 0; + } + Tcl_DStringFree(&ds); + if (cn->name != NULL) { + return CHR(cn->code); + } + + /* couldn't find it */ + ERR(REG_ECOLLATE); + return 0; } /* - * range - supply cvec for a range, including legality check + - range - supply cvec for a range, including legality check + ^ static struct cvec *range(struct vars *, celt, celt, int); */ static struct cvec * -range(struct vars * v, /* context */ - celt a, /* range start */ - celt b, /* range end, might equal a */ - int cases) /* case-independent? */ +range(v, a, b, cases) + struct vars *v; /* context */ + celt a; /* range start */ + celt b; /* range end, might equal a */ + int cases; /* case-independent? */ { - int nchrs; - struct cvec *cv; - celt c, - lc, - uc; - - if (a != b && !before(a, b)) - { - ERR(REG_ERANGE); - return NULL; - } + int nchrs; + struct cvec *cv; + celt c, lc, uc, tc; - if (!cases) - { /* easy version */ - cv = getcvec(v, 0, 1, 0); - NOERRN(); - addrange(cv, a, b); - return cv; - } + if (a != b && !before(a, b)) { + ERR(REG_ERANGE); + return NULL; + } - /* - * When case-independent, it's hard to decide when cvec ranges are - * usable, so for now at least, we won't try. We allocate enough - * space for two case variants plus a little extra for the two title - * case variants. - */ - - nchrs = (b - a + 1) * 2 + 4; - - cv = getcvec(v, nchrs, 0, 0); + if (!cases) { /* easy version */ + cv = getcvec(v, 0, 1, 0); NOERRN(); - - for (c = a; c <= b; c++) - { - addchr(cv, c); - lc = wx_tolower((chr) c); - if (c != lc) - addchr(cv, lc); - uc = wx_toupper((chr) c); - if (c != uc) - addchr(cv, uc); + addrange(cv, a, b); + return cv; + } + + /* + * When case-independent, it's hard to decide when cvec ranges are + * usable, so for now at least, we won't try. We allocate enough + * space for two case variants plus a little extra for the two + * title case variants. + */ + + nchrs = (b - a + 1)*2 + 4; + + cv = getcvec(v, nchrs, 0, 0); + NOERRN(); + + for (c=a; c<=b; c++) { + addchr(cv, c); + lc = Tcl_UniCharToLower((chr)c); + uc = Tcl_UniCharToUpper((chr)c); + tc = Tcl_UniCharToTitle((chr)c); + if (c != lc) { + addchr(cv, lc); + } + if (c != uc) { + addchr(cv, uc); + } + if (c != tc && tc != uc) { + addchr(cv, tc); } + } - return cv; + return cv; } /* - * before - is celt x before celt y, for purposes of range legality? + - before - is celt x before celt y, for purposes of range legality? + ^ static int before(celt, celt); */ -static int /* predicate */ -before(celt x, celt y) +static int /* predicate */ +before(x, y) + celt x, y; /* collating elements */ { - /* trivial because no MCCEs */ - if (x < y) - return 1; - return 0; + /* trivial because no MCCEs */ + if (x < y) { + return 1; + } + return 0; } /* - * eclass - supply cvec for an equivalence class + - eclass - supply cvec for an equivalence class * Must include case counterparts on request. + ^ static struct cvec *eclass(struct vars *, celt, int); */ static struct cvec * -eclass(struct vars * v, /* context */ - celt c, /* Collating element representing the - * equivalence class. */ - int cases) /* all cases? */ +eclass(v, c, cases) + struct vars *v; /* context */ + celt c; /* Collating element representing + * the equivalence class. */ + int cases; /* all cases? */ { - struct cvec *cv; - - /* crude fake equivalence class for testing */ - if ((v->cflags & REG_FAKE) && c == 'x') - { - cv = getcvec(v, 4, 0, 0); - addchr(cv, (chr) 'x'); - addchr(cv, (chr) 'y'); - if (cases) - { - addchr(cv, (chr) 'X'); - addchr(cv, (chr) 'Y'); - } - return cv; + struct cvec *cv; + + /* crude fake equivalence class for testing */ + if ((v->cflags®_FAKE) && c == 'x') { + cv = getcvec(v, 4, 0, 0); + addchr(cv, (chr)'x'); + addchr(cv, (chr)'y'); + if (cases) { + addchr(cv, (chr)'X'); + addchr(cv, (chr)'Y'); } - - /* otherwise, none */ - if (cases) - return allcases(v, c); - cv = getcvec(v, 1, 0, 0); - assert(cv != NULL); - addchr(cv, (chr) c); return cv; + } + + /* otherwise, none */ + if (cases) { + return allcases(v, c); + } + cv = getcvec(v, 1, 0, 0); + assert(cv != NULL); + addchr(cv, (chr)c); + return cv; } /* - * cclass - supply cvec for a character class - * + - cclass - supply cvec for a character class * Must include case counterparts on request. + ^ static struct cvec *cclass(struct vars *, chr *, chr *, int); */ static struct cvec * -cclass(struct vars * v, /* context */ - chr *startp, /* where the name starts */ - chr *endp, /* just past the end of the name */ - int cases) /* case-independent? */ +cclass(v, startp, endp, cases) + struct vars *v; /* context */ + chr *startp; /* where the name starts */ + chr *endp; /* just past the end of the name */ + int cases; /* case-independent? */ { - size_t len; - struct cvec *cv = NULL; - char **namePtr; - int i, - index; - - /* - * The following arrays define the valid character class names. - */ + size_t len; + struct cvec *cv = NULL; + Tcl_DString ds; + CONST char *np; + char **namePtr; + int i, index; + + /* + * The following arrays define the valid character class names. + */ + + static char *classNames[] = { + "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", + "lower", "print", "punct", "space", "upper", "xdigit", NULL + }; + + enum classes { + CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, + CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT + }; + - static char *classNames[] = { - "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", - "lower", "print", "punct", "space", "upper", "xdigit", NULL - }; + /* + * Extract the class name + */ - enum classes - { - CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, - CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT - }; + len = endp - startp; + Tcl_DStringInit(&ds); + np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); - /* - * Map the name to the corresponding enumerated value. - */ - len = endp - startp; - index = -1; - for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++) - { - if (strlen(*namePtr) == len && - char_and_wchar_strncmp(*namePtr, startp, len) == 0) - { - index = i; - break; - } - } - if (index == -1) - { - ERR(REG_ECTYPE); - return NULL; - } + /* + * Remap lower and upper to alpha if the match is case insensitive. + */ - /* - * Remap lower and upper to alpha if the match is case insensitive. - */ + if (cases && len == 5 && (strncmp("lower", np, 5) == 0 + || strncmp("upper", np, 5) == 0)) { + np = "alpha"; + } - if (cases && - ((enum classes) index == CC_LOWER || - (enum classes) index == CC_UPPER)) - index = (int) CC_ALPHA; + /* + * Map the name to the corresponding enumerated value. + */ + index = -1; + for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) { + if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) { + index = i; + break; + } + } + Tcl_DStringInit(&ds); + if (index == -1) { + ERR(REG_ECTYPE); + return NULL; + } + + /* + * Now compute the character class contents. + */ + + switch((enum classes) index) { + case CC_PRINT: + case CC_ALNUM: + cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0); + if (cv) { + for (i=0 ; i 0; len--, x++, y++) - { - if ((*x != *y) && (wx_tolower(*x) != wx_tolower(*y))) - return 1; + for (; len > 0; len--, x++, y++) { + if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) { + return 1; } - return 0; + } + return 0; } diff --git a/src/regex/regc_nfa.c b/src/regex/regc_nfa.c index cc9f6ea2f9..9881cd4304 100644 --- a/src/regex/regc_nfa.c +++ b/src/regex/regc_nfa.c @@ -2,21 +2,21 @@ * NFA utilities. * This file is #included by regcomp.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,7 +28,6 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header$ * * * One or two things that technically ought to be in here @@ -36,21 +35,23 @@ * the color chains. */ -#define NISERR() VISERR(nfa->v) -#define NERR(e) VERR(nfa->v, (e)) +#define NISERR() VISERR(nfa->v) +#define NERR(e) VERR(nfa->v, (e)) /* - * newnfa - set up an NFA + - newnfa - set up an NFA + ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); */ -static struct nfa * /* the NFA, or NULL */ -newnfa(struct vars * v, - struct colormap * cm, - struct nfa * parent) /* NULL if primary NFA */ +static struct nfa * /* the NFA, or NULL */ +newnfa(v, cm, parent) +struct vars *v; +struct colormap *cm; +struct nfa *parent; /* NULL if primary NFA */ { struct nfa *nfa; - nfa = (struct nfa *) MALLOC(sizeof(struct nfa)); + nfa = (struct nfa *)MALLOC(sizeof(struct nfa)); if (nfa == NULL) return NULL; @@ -66,10 +67,9 @@ newnfa(struct vars * v, nfa->pre = newfstate(nfa, '>'); /* number 1 */ nfa->parent = parent; - nfa->init = newstate(nfa); /* may become invalid later */ + nfa->init = newstate(nfa); /* may become invalid later */ nfa->final = newstate(nfa); - if (ISERR()) - { + if (ISERR()) { freenfa(nfa); return NULL; } @@ -80,8 +80,7 @@ newnfa(struct vars * v, newarc(nfa, '$', 1, nfa->final, nfa->post); newarc(nfa, '$', 0, nfa->final, nfa->post); - if (ISERR()) - { + if (ISERR()) { freenfa(nfa); return NULL; } @@ -89,20 +88,20 @@ newnfa(struct vars * v, } /* - * freenfa - free an entire NFA + - freenfa - free an entire NFA + ^ static VOID freenfa(struct nfa *); */ -static void -freenfa(struct nfa * nfa) +static VOID +freenfa(nfa) +struct nfa *nfa; { struct state *s; - while ((s = nfa->states) != NULL) - { - s->nins = s->nouts = 0; /* don't worry about arcs */ + while ((s = nfa->states) != NULL) { + s->nins = s->nouts = 0; /* don't worry about arcs */ freestate(nfa, s); } - while ((s = nfa->free) != NULL) - { + while ((s = nfa->free) != NULL) { nfa->free = s->next; destroystate(nfa, s); } @@ -115,23 +114,21 @@ freenfa(struct nfa * nfa) } /* - * newstate - allocate an NFA state, with zero flag value + - newstate - allocate an NFA state, with zero flag value + ^ static struct state *newstate(struct nfa *); */ -static struct state * /* NULL on error */ -newstate(struct nfa * nfa) +static struct state * /* NULL on error */ +newstate(nfa) +struct nfa *nfa; { struct state *s; - if (nfa->free != NULL) - { + if (nfa->free != NULL) { s = nfa->free; nfa->free = s->next; - } - else - { - s = (struct state *) MALLOC(sizeof(struct state)); - if (s == NULL) - { + } else { + s = (struct state *)MALLOC(sizeof(struct state)); + if (s == NULL) { NERR(REG_ESPACE); return NULL; } @@ -151,8 +148,7 @@ newstate(struct nfa * nfa) s->outs = NULL; s->tmp = NULL; s->next = NULL; - if (nfa->slast != NULL) - { + if (nfa->slast != NULL) { assert(nfa->slast->next == NULL); nfa->slast->next = s; } @@ -162,25 +158,30 @@ newstate(struct nfa * nfa) } /* - * newfstate - allocate an NFA state with a specified flag value + - newfstate - allocate an NFA state with a specified flag value + ^ static struct state *newfstate(struct nfa *, int flag); */ -static struct state * /* NULL on error */ -newfstate(struct nfa * nfa, int flag) +static struct state * /* NULL on error */ +newfstate(nfa, flag) +struct nfa *nfa; +int flag; { struct state *s; s = newstate(nfa); if (s != NULL) - s->flag = (char) flag; + s->flag = (char)flag; return s; } /* - * dropstate - delete a state's inarcs and outarcs and free it + - dropstate - delete a state's inarcs and outarcs and free it + ^ static VOID dropstate(struct nfa *, struct state *); */ -static void -dropstate(struct nfa * nfa, - struct state * s) +static VOID +dropstate(nfa, s) +struct nfa *nfa; +struct state *s; { struct arc *a; @@ -192,11 +193,13 @@ dropstate(struct nfa * nfa, } /* - * freestate - free a state, which has no in-arcs or out-arcs + - freestate - free a state, which has no in-arcs or out-arcs + ^ static VOID freestate(struct nfa *, struct state *); */ -static void -freestate(struct nfa * nfa, - struct state * s) +static VOID +freestate(nfa, s) +struct nfa *nfa; +struct state *s; { assert(s != NULL); assert(s->nins == 0 && s->nouts == 0); @@ -205,37 +208,35 @@ freestate(struct nfa * nfa, s->flag = 0; if (s->next != NULL) s->next->prev = s->prev; - else - { + else { assert(s == nfa->slast); nfa->slast = s->prev; } if (s->prev != NULL) s->prev->next = s->next; - else - { + else { assert(s == nfa->states); nfa->states = s->next; } s->prev = NULL; - s->next = nfa->free; /* don't delete it, put it on the free - * list */ + s->next = nfa->free; /* don't delete it, put it on the free list */ nfa->free = s; } /* - * destroystate - really get rid of an already-freed state + - destroystate - really get rid of an already-freed state + ^ static VOID destroystate(struct nfa *, struct state *); */ -static void -destroystate(struct nfa * nfa, - struct state * s) +static VOID +destroystate(nfa, s) +struct nfa *nfa; +struct state *s; { struct arcbatch *ab; struct arcbatch *abnext; assert(s->no == FREESTATE); - for (ab = s->oas.next; ab != NULL; ab = abnext) - { + for (ab = s->oas.next; ab != NULL; ab = abnext) { abnext = ab->next; FREE(ab); } @@ -246,14 +247,17 @@ destroystate(struct nfa * nfa, } /* - * newarc - set up a new arc within an NFA + - newarc - set up a new arc within an NFA + ^ static VOID newarc(struct nfa *, int, pcolor, struct state *, + ^ struct state *); */ -static void -newarc(struct nfa * nfa, - int t, - pcolor co, - struct state * from, - struct state * to) +static VOID +newarc(nfa, t, co, from, to) +struct nfa *nfa; +int t; +pcolor co; +struct state *from; +struct state *to; { struct arc *a; @@ -270,13 +274,13 @@ newarc(struct nfa * nfa, assert(a != NULL); a->type = t; - a->co = (color) co; + a->co = (color)co; a->to = to; a->from = from; /* - * Put the new arc on the beginning, not the end, of the chains. Not - * only is this easier, it has the very useful side effect that + * Put the new arc on the beginning, not the end, of the chains. + * Not only is this easier, it has the very useful side effect that * deleting the most-recently-added arc is the cheapest case rather * than the most expensive one. */ @@ -295,42 +299,40 @@ newarc(struct nfa * nfa, } /* - * allocarc - allocate a new out-arc within a state + - allocarc - allocate a new out-arc within a state + ^ static struct arc *allocarc(struct nfa *, struct state *); */ -static struct arc * /* NULL for failure */ -allocarc(struct nfa * nfa, - struct state * s) +static struct arc * /* NULL for failure */ +allocarc(nfa, s) +struct nfa *nfa; +struct state *s; { struct arc *a; struct arcbatch *new; - int i; + int i; /* shortcut */ - if (s->free == NULL && s->noas < ABSIZE) - { + if (s->free == NULL && s->noas < ABSIZE) { a = &s->oas.a[s->noas]; s->noas++; return a; } /* if none at hand, get more */ - if (s->free == NULL) - { - new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch)); - if (new == NULL) - { + if (s->free == NULL) { + new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch)); + if (new == NULL) { NERR(REG_ESPACE); return NULL; } new->next = s->oas.next; s->oas.next = new; - for (i = 0; i < ABSIZE; i++) - { + for (i = 0; i < ABSIZE; i++) { new->a[i].type = 0; - new->a[i].freechain = &new->a[i + 1]; + new->a[i].freechain = &new->a[i+1]; } - new->a[ABSIZE - 1].freechain = NULL; + new->a[ABSIZE-1].freechain = NULL; s->free = &new->a[0]; } assert(s->free != NULL); @@ -341,11 +343,13 @@ allocarc(struct nfa * nfa, } /* - * freearc - free an arc + - freearc - free an arc + ^ static VOID freearc(struct nfa *, struct arc *); */ -static void -freearc(struct nfa * nfa, - struct arc * victim) +static VOID +freearc(nfa, victim) +struct nfa *nfa; +struct arc *victim; { struct state *from = victim->from; struct state *to = victim->to; @@ -361,10 +365,9 @@ freearc(struct nfa * nfa, assert(from != NULL); assert(from->outs != NULL); a = from->outs; - if (a == victim) /* simple case: first in chain */ + if (a == victim) /* simple case: first in chain */ from->outs = victim->outchain; - else - { + else { for (; a != NULL && a->outchain != victim; a = a->outchain) continue; assert(a != NULL); @@ -376,10 +379,9 @@ freearc(struct nfa * nfa, assert(to != NULL); assert(to->ins != NULL); a = to->ins; - if (a == victim) /* simple case: first in chain */ + if (a == victim) /* simple case: first in chain */ to->ins = victim->inchain; - else - { + else { for (; a != NULL && a->inchain != victim; a = a->inchain) continue; assert(a != NULL); @@ -398,13 +400,15 @@ freearc(struct nfa * nfa, } /* - * findarc - find arc, if any, from given source with given type and color + - findarc - find arc, if any, from given source with given type and color * If there is more than one such arc, the result is random. + ^ static struct arc *findarc(struct state *, int, pcolor); */ static struct arc * -findarc(struct state * s, - int type, - pcolor co) +findarc(s, type, co) +struct state *s; +int type; +pcolor co; { struct arc *a; @@ -415,36 +419,39 @@ findarc(struct state * s, } /* - * cparc - allocate a new arc within an NFA, copying details from old one + - cparc - allocate a new arc within an NFA, copying details from old one + ^ static VOID cparc(struct nfa *, struct arc *, struct state *, + ^ struct state *); */ -static void -cparc(struct nfa * nfa, - struct arc * oa, - struct state * from, - struct state * to) +static VOID +cparc(nfa, oa, from, to) +struct nfa *nfa; +struct arc *oa; +struct state *from; +struct state *to; { newarc(nfa, oa->type, oa->co, from, to); } /* - * moveins - move all in arcs of a state to another state - * + - moveins - move all in arcs of a state to another state * You might think this could be done better by just updating the * existing arcs, and you would be right if it weren't for the desire * for duplicate suppression, which makes it easier to just make new * ones to exploit the suppression built into newarc. + ^ static VOID moveins(struct nfa *, struct state *, struct state *); */ -static void -moveins(struct nfa * nfa, - struct state * old, - struct state * new) +static VOID +moveins(nfa, old, new) +struct nfa *nfa; +struct state *old; +struct state *new; { struct arc *a; assert(old != new); - while ((a = old->ins) != NULL) - { + while ((a = old->ins) != NULL) { cparc(nfa, a, a->from, new); freearc(nfa, a); } @@ -453,12 +460,14 @@ moveins(struct nfa * nfa, } /* - * copyins - copy all in arcs of a state to another state + - copyins - copy all in arcs of a state to another state + ^ static VOID copyins(struct nfa *, struct state *, struct state *); */ -static void -copyins(struct nfa * nfa, - struct state * old, - struct state * new) +static VOID +copyins(nfa, old, new) +struct nfa *nfa; +struct state *old; +struct state *new; { struct arc *a; @@ -469,31 +478,34 @@ copyins(struct nfa * nfa, } /* - * moveouts - move all out arcs of a state to another state + - moveouts - move all out arcs of a state to another state + ^ static VOID moveouts(struct nfa *, struct state *, struct state *); */ -static void -moveouts(struct nfa * nfa, - struct state * old, - struct state * new) +static VOID +moveouts(nfa, old, new) +struct nfa *nfa; +struct state *old; +struct state *new; { struct arc *a; assert(old != new); - while ((a = old->outs) != NULL) - { + while ((a = old->outs) != NULL) { cparc(nfa, a, new, a->to); freearc(nfa, a); } } /* - * copyouts - copy all out arcs of a state to another state + - copyouts - copy all out arcs of a state to another state + ^ static VOID copyouts(struct nfa *, struct state *, struct state *); */ -static void -copyouts(struct nfa * nfa, - struct state * old, - struct state * new) +static VOID +copyouts(nfa, old, new) +struct nfa *nfa; +struct state *old; +struct state *new; { struct arc *a; @@ -504,14 +516,17 @@ copyouts(struct nfa * nfa, } /* - * cloneouts - copy out arcs of a state to another state pair, modifying type + - cloneouts - copy out arcs of a state to another state pair, modifying type + ^ static VOID cloneouts(struct nfa *, struct state *, struct state *, + ^ struct state *, int); */ -static void -cloneouts(struct nfa * nfa, - struct state * old, - struct state * from, - struct state * to, - int type) +static VOID +cloneouts(nfa, old, from, to, type) +struct nfa *nfa; +struct state *old; +struct state *from; +struct state *to; +int type; { struct arc *a; @@ -522,83 +537,85 @@ cloneouts(struct nfa * nfa, } /* - * delsub - delete a sub-NFA, updating subre pointers if necessary - * + - delsub - delete a sub-NFA, updating subre pointers if necessary * This uses a recursive traversal of the sub-NFA, marking already-seen * states using their tmp pointer. + ^ static VOID delsub(struct nfa *, struct state *, struct state *); */ -static void -delsub(struct nfa * nfa, - struct state * lp, /* the sub-NFA goes from here... */ - struct state * rp) /* ...to here, *not* inclusive */ +static VOID +delsub(nfa, lp, rp) +struct nfa *nfa; +struct state *lp; /* the sub-NFA goes from here... */ +struct state *rp; /* ...to here, *not* inclusive */ { assert(lp != rp); - rp->tmp = rp; /* mark end */ + rp->tmp = rp; /* mark end */ deltraverse(nfa, lp, lp); assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ - assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ + assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ - rp->tmp = NULL; /* unmark end */ - lp->tmp = NULL; /* and begin, marked by deltraverse */ + rp->tmp = NULL; /* unmark end */ + lp->tmp = NULL; /* and begin, marked by deltraverse */ } /* - * deltraverse - the recursive heart of delsub + - deltraverse - the recursive heart of delsub * This routine's basic job is to destroy all out-arcs of the state. + ^ static VOID deltraverse(struct nfa *, struct state *, struct state *); */ -static void -deltraverse(struct nfa * nfa, - struct state * leftend, - struct state * s) +static VOID +deltraverse(nfa, leftend, s) +struct nfa *nfa; +struct state *leftend; +struct state *s; { struct arc *a; struct state *to; if (s->nouts == 0) - return; /* nothing to do */ + return; /* nothing to do */ if (s->tmp != NULL) - return; /* already in progress */ + return; /* already in progress */ - s->tmp = s; /* mark as in progress */ + s->tmp = s; /* mark as in progress */ - while ((a = s->outs) != NULL) - { + while ((a = s->outs) != NULL) { to = a->to; deltraverse(nfa, leftend, to); assert(to->nouts == 0 || to->tmp != NULL); freearc(nfa, a); - if (to->nins == 0 && to->tmp == NULL) - { + if (to->nins == 0 && to->tmp == NULL) { assert(to->nouts == 0); freestate(nfa, to); } } - assert(s->no != FREESTATE); /* we're still here */ - assert(s == leftend || s->nins != 0); /* and still reachable */ + assert(s->no != FREESTATE); /* we're still here */ + assert(s == leftend || s->nins != 0); /* and still reachable */ assert(s->nouts == 0); /* but have no outarcs */ - s->tmp = NULL; /* we're done here */ + s->tmp = NULL; /* we're done here */ } /* - * dupnfa - duplicate sub-NFA - * + - dupnfa - duplicate sub-NFA * Another recursive traversal, this time using tmp to point to duplicates * as well as mark already-seen states. (You knew there was a reason why * it's a state pointer, didn't you? :-)) + ^ static VOID dupnfa(struct nfa *, struct state *, struct state *, + ^ struct state *, struct state *); */ -static void -dupnfa(struct nfa * nfa, - struct state * start, /* duplicate of subNFA starting here */ - struct state * stop, /* and stopping here */ - struct state * from, /* stringing duplicate from here */ - struct state * to) /* to here */ +static VOID +dupnfa(nfa, start, stop, from, to) +struct nfa *nfa; +struct state *start; /* duplicate of subNFA starting here */ +struct state *stop; /* and stopping here */ +struct state *from; /* stringing duplicate from here */ +struct state *to; /* to here */ { - if (start == stop) - { + if (start == stop) { newarc(nfa, EMPTY, 0, from, to); return; } @@ -612,39 +629,41 @@ dupnfa(struct nfa * nfa, } /* - * duptraverse - recursive heart of dupnfa + - duptraverse - recursive heart of dupnfa + ^ static VOID duptraverse(struct nfa *, struct state *, struct state *); */ -static void -duptraverse(struct nfa * nfa, - struct state * s, - struct state * stmp) /* s's duplicate, or NULL */ +static VOID +duptraverse(nfa, s, stmp) +struct nfa *nfa; +struct state *s; +struct state *stmp; /* s's duplicate, or NULL */ { struct arc *a; if (s->tmp != NULL) - return; /* already done */ + return; /* already done */ s->tmp = (stmp == NULL) ? newstate(nfa) : stmp; - if (s->tmp == NULL) - { + if (s->tmp == NULL) { assert(NISERR()); return; } - for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) - { - duptraverse(nfa, a->to, (struct state *) NULL); + for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { + duptraverse(nfa, a->to, (struct state *)NULL); assert(a->to->tmp != NULL); cparc(nfa, a, s->tmp, a->to->tmp); } } /* - * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set + - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set + ^ static VOID cleartraverse(struct nfa *, struct state *); */ -static void -cleartraverse(struct nfa * nfa, - struct state * s) +static VOID +cleartraverse(nfa, s) +struct nfa *nfa; +struct state *s; { struct arc *a; @@ -657,21 +676,20 @@ cleartraverse(struct nfa * nfa, } /* - * specialcolors - fill in special colors for an NFA + - specialcolors - fill in special colors for an NFA + ^ static VOID specialcolors(struct nfa *); */ -static void -specialcolors(struct nfa * nfa) +static VOID +specialcolors(nfa) +struct nfa *nfa; { /* false colors for BOS, BOL, EOS, EOL */ - if (nfa->parent == NULL) - { + if (nfa->parent == NULL) { nfa->bos[0] = pseudocolor(nfa->cm); nfa->bos[1] = pseudocolor(nfa->cm); nfa->eos[0] = pseudocolor(nfa->cm); nfa->eos[1] = pseudocolor(nfa->cm); - } - else - { + } else { assert(nfa->parent->bos[0] != COLORLESS); nfa->bos[0] = nfa->parent->bos[0]; assert(nfa->parent->bos[1] != COLORLESS); @@ -684,62 +702,55 @@ specialcolors(struct nfa * nfa) } /* - * optimize - optimize an NFA + - optimize - optimize an NFA + ^ static long optimize(struct nfa *, FILE *); */ -static long /* re_info bits */ -optimize(struct nfa * nfa, - FILE *f) /* for debug output; NULL none */ +static long /* re_info bits */ +optimize(nfa, f) +struct nfa *nfa; +FILE *f; /* for debug output; NULL none */ { -#ifdef REG_DEBUG - int verbose = (f != NULL) ? 1 : 0; + int verbose = (f != NULL) ? 1 : 0; if (verbose) fprintf(f, "\ninitial cleanup:\n"); -#endif - cleanup(nfa); /* may simplify situation */ -#ifdef REG_DEBUG + cleanup(nfa); /* may simplify situation */ if (verbose) dumpnfa(nfa, f); if (verbose) fprintf(f, "\nempties:\n"); -#endif - fixempties(nfa, f); /* get rid of EMPTY arcs */ -#ifdef REG_DEBUG + fixempties(nfa, f); /* get rid of EMPTY arcs */ if (verbose) fprintf(f, "\nconstraints:\n"); -#endif - pullback(nfa, f); /* pull back constraints backward */ - pushfwd(nfa, f); /* push fwd constraints forward */ -#ifdef REG_DEBUG + pullback(nfa, f); /* pull back constraints backward */ + pushfwd(nfa, f); /* push fwd constraints forward */ if (verbose) fprintf(f, "\nfinal cleanup:\n"); -#endif - cleanup(nfa); /* final tidying */ - return analyze(nfa); /* and analysis */ + cleanup(nfa); /* final tidying */ + return analyze(nfa); /* and analysis */ } /* - * pullback - pull back constraints backward to (with luck) eliminate them + - pullback - pull back constraints backward to (with luck) eliminate them + ^ static VOID pullback(struct nfa *, FILE *); */ -static void -pullback(struct nfa * nfa, - FILE *f) /* for debug output; NULL none */ +static VOID +pullback(nfa, f) +struct nfa *nfa; +FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *nexts; struct arc *a; struct arc *nexta; - int progress; + int progress; /* find and pull until there are no more */ - do - { + do { progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) - { + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) - { + for (a = s->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; if (a->type == '^' || a->type == BEHIND) if (pull(nfa, a)) @@ -753,11 +764,9 @@ pullback(struct nfa * nfa, if (NISERR()) return; - for (a = nfa->pre->outs; a != NULL; a = nexta) - { + for (a = nfa->pre->outs; a != NULL; a = nexta) { nexta = a->outchain; - if (a->type == '^') - { + if (a->type == '^') { assert(a->co == 0 || a->co == 1); newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to); freearc(nfa, a); @@ -766,14 +775,16 @@ pullback(struct nfa * nfa, } /* - * pull - pull a back constraint backward past its source state + - pull - pull a back constraint backward past its source state * A significant property of this function is that it deletes at most * one state -- the constraint's from state -- and only if the constraint * was that state's last outarc. + ^ static int pull(struct nfa *, struct arc *); */ -static int /* 0 couldn't, 1 could */ -pull(struct nfa * nfa, - struct arc * con) +static int /* 0 couldn't, 1 could */ +pull(nfa, con) +struct nfa *nfa; +struct arc *con; { struct state *from = con->from; struct state *to = con->to; @@ -781,28 +792,25 @@ pull(struct nfa * nfa, struct arc *nexta; struct state *s; - if (from == to) - { /* circular constraint is pointless */ + if (from == to) { /* circular constraint is pointless */ freearc(nfa, con); return 1; } - if (from->flag) /* can't pull back beyond start */ + if (from->flag) /* can't pull back beyond start */ return 0; - if (from->nins == 0) - { /* unreachable */ + if (from->nins == 0) { /* unreachable */ freearc(nfa, con); return 1; } /* first, clone from state if necessary to avoid other outarcs */ - if (from->nouts > 1) - { + if (from->nouts > 1) { s = newstate(nfa); if (NISERR()) return 0; assert(to != from); /* con is not an inarc */ - copyins(nfa, from, s); /* duplicate inarcs */ - cparc(nfa, con, s, to); /* move constraint arc */ + copyins(nfa, from, s); /* duplicate inarcs */ + cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); from = s; con = from->outs; @@ -810,29 +818,27 @@ pull(struct nfa * nfa, assert(from->nouts == 1); /* propagate the constraint into the from state's inarcs */ - for (a = from->ins; a != NULL; a = nexta) - { + for (a = from->ins; a != NULL; a = nexta) { nexta = a->inchain; - switch (combine(con, a)) - { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, a, s, to); /* anticipate move */ - cparc(nfa, con, a->from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; + switch (combine(con, a)) { + case INCOMPATIBLE: /* destroy the arc */ + freearc(nfa, a); + break; + case SATISFIED: /* no action needed */ + break; + case COMPATIBLE: /* swap the two arcs, more or less */ + s = newstate(nfa); + if (NISERR()) + return 0; + cparc(nfa, a, s, to); /* anticipate move */ + cparc(nfa, con, a->from, s); + if (NISERR()) + return 0; + freearc(nfa, a); + break; + default: + assert(NOTREACHED); + break; } } @@ -843,27 +849,26 @@ pull(struct nfa * nfa, } /* - * pushfwd - push forward constraints forward to (with luck) eliminate them + - pushfwd - push forward constraints forward to (with luck) eliminate them + ^ static VOID pushfwd(struct nfa *, FILE *); */ -static void -pushfwd(struct nfa * nfa, - FILE *f) /* for debug output; NULL none */ +static VOID +pushfwd(nfa, f) +struct nfa *nfa; +FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *nexts; struct arc *a; struct arc *nexta; - int progress; + int progress; /* find and push until there are no more */ - do - { + do { progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) - { + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - for (a = s->ins; a != NULL && !NISERR(); a = nexta) - { + for (a = s->ins; a != NULL && !NISERR(); a = nexta) { nexta = a->inchain; if (a->type == '$' || a->type == AHEAD) if (push(nfa, a)) @@ -877,11 +882,9 @@ pushfwd(struct nfa * nfa, if (NISERR()) return; - for (a = nfa->post->ins; a != NULL; a = nexta) - { + for (a = nfa->post->ins; a != NULL; a = nexta) { nexta = a->inchain; - if (a->type == '$') - { + if (a->type == '$') { assert(a->co == 0 || a->co == 1); newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to); freearc(nfa, a); @@ -890,14 +893,16 @@ pushfwd(struct nfa * nfa, } /* - * push - push a forward constraint forward past its destination state + - push - push a forward constraint forward past its destination state * A significant property of this function is that it deletes at most * one state -- the constraint's to state -- and only if the constraint * was that state's last inarc. + ^ static int push(struct nfa *, struct arc *); */ -static int /* 0 couldn't, 1 could */ -push(struct nfa * nfa, - struct arc * con) +static int /* 0 couldn't, 1 could */ +push(nfa, con) +struct nfa *nfa; +struct arc *con; { struct state *from = con->from; struct state *to = con->to; @@ -905,27 +910,24 @@ push(struct nfa * nfa, struct arc *nexta; struct state *s; - if (to == from) - { /* circular constraint is pointless */ + if (to == from) { /* circular constraint is pointless */ freearc(nfa, con); return 1; } - if (to->flag) /* can't push forward beyond end */ + if (to->flag) /* can't push forward beyond end */ return 0; - if (to->nouts == 0) - { /* dead end */ + if (to->nouts == 0) { /* dead end */ freearc(nfa, con); return 1; } /* first, clone to state if necessary to avoid other inarcs */ - if (to->nins > 1) - { + if (to->nins > 1) { s = newstate(nfa); if (NISERR()) return 0; - copyouts(nfa, to, s); /* duplicate outarcs */ - cparc(nfa, con, from, s); /* move constraint */ + copyouts(nfa, to, s); /* duplicate outarcs */ + cparc(nfa, con, from, s); /* move constraint */ freearc(nfa, con); to = s; con = to->ins; @@ -933,118 +935,115 @@ push(struct nfa * nfa, assert(to->nins == 1); /* propagate the constraint into the to state's outarcs */ - for (a = to->outs; a != NULL; a = nexta) - { + for (a = to->outs; a != NULL; a = nexta) { nexta = a->outchain; - switch (combine(con, a)) - { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, con, s, a->to); /* anticipate move */ - cparc(nfa, a, from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; + switch (combine(con, a)) { + case INCOMPATIBLE: /* destroy the arc */ + freearc(nfa, a); + break; + case SATISFIED: /* no action needed */ + break; + case COMPATIBLE: /* swap the two arcs, more or less */ + s = newstate(nfa); + if (NISERR()) + return 0; + cparc(nfa, con, s, a->to); /* anticipate move */ + cparc(nfa, a, from, s); + if (NISERR()) + return 0; + freearc(nfa, a); + break; + default: + assert(NOTREACHED); + break; } } /* remaining outarcs, if any, incorporate the constraint */ moveouts(nfa, to, from); - dropstate(nfa, to); /* will free the constraint */ + dropstate(nfa, to); /* will free the constraint */ return 1; } /* - * combine - constraint lands on an arc, what happens? - * - * #def INCOMPATIBLE 1 // destroys arc - * #def SATISFIED 2 // constraint satisfied - * #def COMPATIBLE 3 // compatible but not satisfied yet + - combine - constraint lands on an arc, what happens? + ^ #def INCOMPATIBLE 1 // destroys arc + ^ #def SATISFIED 2 // constraint satisfied + ^ #def COMPATIBLE 3 // compatible but not satisfied yet + ^ static int combine(struct arc *, struct arc *); */ static int -combine(struct arc * con, - struct arc * a) +combine(con, a) +struct arc *con; +struct arc *a; { -#define CA(ct,at) (((ct)<type, a->type)) - { - case CA('^', PLAIN): /* newlines are handled separately */ - case CA('$', PLAIN): - return INCOMPATIBLE; - break; - case CA(AHEAD, PLAIN): /* color constraints meet colors */ - case CA(BEHIND, PLAIN): - if (con->co == a->co) - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', '^'): /* collision, similar constraints */ - case CA('$', '$'): - case CA(AHEAD, AHEAD): - case CA(BEHIND, BEHIND): - if (con->co == a->co) /* true duplication */ - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', BEHIND): /* collision, dissimilar constraints */ - case CA(BEHIND, '^'): - case CA('$', AHEAD): - case CA(AHEAD, '$'): - return INCOMPATIBLE; - break; - case CA('^', '$'): /* constraints passing each other */ - case CA('^', AHEAD): - case CA(BEHIND, '$'): - case CA(BEHIND, AHEAD): - case CA('$', '^'): - case CA('$', BEHIND): - case CA(AHEAD, '^'): - case CA(AHEAD, BEHIND): - case CA('^', LACON): - case CA(BEHIND, LACON): - case CA('$', LACON): - case CA(AHEAD, LACON): - return COMPATIBLE; - break; +# define CA(ct,at) (((ct)<type, a->type)) { + case CA('^', PLAIN): /* newlines are handled separately */ + case CA('$', PLAIN): + return INCOMPATIBLE; + break; + case CA(AHEAD, PLAIN): /* color constraints meet colors */ + case CA(BEHIND, PLAIN): + if (con->co == a->co) + return SATISFIED; + return INCOMPATIBLE; + break; + case CA('^', '^'): /* collision, similar constraints */ + case CA('$', '$'): + case CA(AHEAD, AHEAD): + case CA(BEHIND, BEHIND): + if (con->co == a->co) /* true duplication */ + return SATISFIED; + return INCOMPATIBLE; + break; + case CA('^', BEHIND): /* collision, dissimilar constraints */ + case CA(BEHIND, '^'): + case CA('$', AHEAD): + case CA(AHEAD, '$'): + return INCOMPATIBLE; + break; + case CA('^', '$'): /* constraints passing each other */ + case CA('^', AHEAD): + case CA(BEHIND, '$'): + case CA(BEHIND, AHEAD): + case CA('$', '^'): + case CA('$', BEHIND): + case CA(AHEAD, '^'): + case CA(AHEAD, BEHIND): + case CA('^', LACON): + case CA(BEHIND, LACON): + case CA('$', LACON): + case CA(AHEAD, LACON): + return COMPATIBLE; + break; } assert(NOTREACHED); return INCOMPATIBLE; /* for benefit of blind compilers */ } /* - * fixempties - get rid of EMPTY arcs + - fixempties - get rid of EMPTY arcs + ^ static VOID fixempties(struct nfa *, FILE *); */ -static void -fixempties(struct nfa * nfa, - FILE *f) /* for debug output; NULL none */ +static VOID +fixempties(nfa, f) +struct nfa *nfa; +FILE *f; /* for debug output; NULL none */ { struct state *s; struct state *nexts; struct arc *a; struct arc *nexta; - int progress; + int progress; /* find and eliminate empties until there are no more */ - do - { + do { progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) - { + for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) - { + for (a = s->outs; a != NULL && !NISERR(); a = nexta) { nexta = a->outchain; if (a->type == EMPTY && unempty(nfa, a)) progress = 1; @@ -1057,60 +1056,52 @@ fixempties(struct nfa * nfa, } /* - * unempty - optimize out an EMPTY arc, if possible - * + - unempty - optimize out an EMPTY arc, if possible * Actually, as it stands this function always succeeds, but the return * value is kept with an eye on possible future changes. + ^ static int unempty(struct nfa *, struct arc *); */ -static int /* 0 couldn't, 1 could */ -unempty(struct nfa * nfa, - struct arc * a) +static int /* 0 couldn't, 1 could */ +unempty(nfa, a) +struct nfa *nfa; +struct arc *a; { struct state *from = a->from; struct state *to = a->to; - int usefrom; /* work on from, as opposed to to? */ + int usefrom; /* work on from, as opposed to to? */ assert(a->type == EMPTY); assert(from != nfa->pre && to != nfa->post); - if (from == to) - { /* vacuous loop */ + if (from == to) { /* vacuous loop */ freearc(nfa, a); return 1; } /* decide which end to work on */ - usefrom = 1; /* default: attack from */ + usefrom = 1; /* default: attack from */ if (from->nouts > to->nins) usefrom = 0; - else if (from->nouts == to->nins) - { + else if (from->nouts == to->nins) { /* decide on secondary issue: move/copy fewest arcs */ if (from->nins > to->nouts) usefrom = 0; } - + freearc(nfa, a); - if (usefrom) - { - if (from->nouts == 0) - { + if (usefrom) { + if (from->nouts == 0) { /* was the state's only outarc */ moveins(nfa, from, to); freestate(nfa, from); - } - else + } else copyins(nfa, from, to); - } - else - { - if (to->nins == 0) - { + } else { + if (to->nins == 0) { /* was the state's only inarc */ moveouts(nfa, to, from); freestate(nfa, to); - } - else + } else copyouts(nfa, to, from); } @@ -1118,21 +1109,22 @@ unempty(struct nfa * nfa, } /* - * cleanup - clean up NFA after optimizations + - cleanup - clean up NFA after optimizations + ^ static VOID cleanup(struct nfa *); */ -static void -cleanup(struct nfa * nfa) +static VOID +cleanup(nfa) +struct nfa *nfa; { struct state *s; struct state *nexts; - int n; + int n; /* clear out unreachable or dead-end states */ /* use pre to mark reachable, then post to mark can-reach-post */ - markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre); + markreachable(nfa, nfa->pre, (struct state *)NULL, nfa->pre); markcanreach(nfa, nfa->post, nfa->pre, nfa->post); - for (s = nfa->states; s != NULL; s = nexts) - { + for (s = nfa->states; s != NULL; s = nexts) { nexts = s->next; if (s->tmp != nfa->post && !s->flag) dropstate(nfa, s); @@ -1150,14 +1142,16 @@ cleanup(struct nfa * nfa) } /* - * markreachable - recursive marking of reachable states + - markreachable - recursive marking of reachable states + ^ static VOID markreachable(struct nfa *, struct state *, struct state *, + ^ struct state *); */ -static void -markreachable(struct nfa * nfa, - struct state * s, - struct state * okay, /* consider only states with this - * mark */ - struct state * mark) /* the value to mark with */ +static VOID +markreachable(nfa, s, okay, mark) +struct nfa *nfa; +struct state *s; +struct state *okay; /* consider only states with this mark */ +struct state *mark; /* the value to mark with */ { struct arc *a; @@ -1170,14 +1164,16 @@ markreachable(struct nfa * nfa, } /* - * markcanreach - recursive marking of states which can reach here + - markcanreach - recursive marking of states which can reach here + ^ static VOID markcanreach(struct nfa *, struct state *, struct state *, + ^ struct state *); */ -static void -markcanreach(struct nfa * nfa, - struct state * s, - struct state * okay, /* consider only states with this - * mark */ - struct state * mark) /* the value to mark with */ +static VOID +markcanreach(nfa, s, okay, mark) +struct nfa *nfa; +struct state *s; +struct state *okay; /* consider only states with this mark */ +struct state *mark; /* the value to mark with */ { struct arc *a; @@ -1190,10 +1186,12 @@ markcanreach(struct nfa * nfa, } /* - * analyze - ascertain potentially-useful facts about an optimized NFA + - analyze - ascertain potentially-useful facts about an optimized NFA + ^ static long analyze(struct nfa *); */ -static long /* re_info bits to be ORed in */ -analyze(struct nfa * nfa) +static long /* re_info bits to be ORed in */ +analyze(nfa) +struct nfa *nfa; { struct arc *a; struct arc *aa; @@ -1208,34 +1206,34 @@ analyze(struct nfa * nfa) } /* - * compact - compact an NFA + - compact - compact an NFA + ^ static VOID compact(struct nfa *, struct cnfa *); */ -static void -compact(struct nfa * nfa, - struct cnfa * cnfa) +static VOID +compact(nfa, cnfa) +struct nfa *nfa; +struct cnfa *cnfa; { struct state *s; struct arc *a; - size_t nstates; - size_t narcs; + size_t nstates; + size_t narcs; struct carc *ca; struct carc *first; - assert(!NISERR()); + assert (!NISERR()); nstates = 0; narcs = 0; - for (s = nfa->states; s != NULL; s = s->next) - { + for (s = nfa->states; s != NULL; s = s->next) { nstates++; narcs += 1 + s->nouts + 1; /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ } - cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *)); - cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc)); - if (cnfa->states == NULL || cnfa->arcs == NULL) - { + cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *)); + cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc)); + if (cnfa->states == NULL || cnfa->arcs == NULL) { if (cnfa->states != NULL) FREE(cnfa->states); if (cnfa->arcs != NULL) @@ -1254,33 +1252,31 @@ compact(struct nfa * nfa, cnfa->flags = 0; ca = cnfa->arcs; - for (s = nfa->states; s != NULL; s = s->next) - { - assert((size_t) s->no < nstates); + for (s = nfa->states; s != NULL; s = s->next) { + assert((size_t)s->no < nstates); cnfa->states[s->no] = ca; - ca->co = 0; /* clear and skip flags "arc" */ + ca->co = 0; /* clear and skip flags "arc" */ ca++; first = ca; for (a = s->outs; a != NULL; a = a->outchain) - switch (a->type) - { - case PLAIN: - ca->co = a->co; - ca->to = a->to->no; - ca++; - break; - case LACON: - assert(s->no != cnfa->pre); - ca->co = (color) (cnfa->ncolors + a->co); - ca->to = a->to->no; - ca++; - cnfa->flags |= HASLACONS; - break; - default: - assert(NOTREACHED); - break; + switch (a->type) { + case PLAIN: + ca->co = a->co; + ca->to = a->to->no; + ca++; + break; + case LACON: + assert(s->no != cnfa->pre); + ca->co = (color)(cnfa->ncolors + a->co); + ca->to = a->to->no; + ca++; + cnfa->flags |= HASLACONS; + break; + default: + assert(NOTREACHED); + break; } - carcsort(first, ca - 1); + carcsort(first, ca-1); ca->co = COLORLESS; ca->to = 0; ca++; @@ -1295,14 +1291,15 @@ compact(struct nfa * nfa, } /* - * carcsort - sort compacted-NFA arcs by color - * + - carcsort - sort compacted-NFA arcs by color * Really dumb algorithm, but if the list is long enough for that to matter, * you're in real trouble anyway. + ^ static VOID carcsort(struct carc *, struct carc *); */ -static void -carcsort(struct carc * first, - struct carc * last) +static VOID +carcsort(first, last) +struct carc *first; +struct carc *last; { struct carc *p; struct carc *q; @@ -1314,8 +1311,7 @@ carcsort(struct carc * first, for (p = first; p <= last; p++) for (q = p; q <= last; q++) if (p->co > q->co || - (p->co == q->co && p->to > q->to)) - { + (p->co == q->co && p->to > q->to)) { assert(p != q); tmp = *p; *p = *q; @@ -1324,36 +1320,40 @@ carcsort(struct carc * first, } /* - * freecnfa - free a compacted NFA + - freecnfa - free a compacted NFA + ^ static VOID freecnfa(struct cnfa *); */ -static void -freecnfa(struct cnfa * cnfa) +static VOID +freecnfa(cnfa) +struct cnfa *cnfa; { - assert(cnfa->nstates != 0); /* not empty already */ + assert(cnfa->nstates != 0); /* not empty already */ cnfa->nstates = 0; FREE(cnfa->states); FREE(cnfa->arcs); } /* - * dumpnfa - dump an NFA in human-readable form + - dumpnfa - dump an NFA in human-readable form + ^ static VOID dumpnfa(struct nfa *, FILE *); */ -static void -dumpnfa(struct nfa * nfa, - FILE *f) +static VOID +dumpnfa(nfa, f) +struct nfa *nfa; +FILE *f; { #ifdef REG_DEBUG struct state *s; fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); if (nfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long) nfa->bos[0]); + fprintf(f, ", bos [%ld]", (long)nfa->bos[0]); if (nfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long) nfa->bos[1]); + fprintf(f, ", bol [%ld]", (long)nfa->bos[1]); if (nfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long) nfa->eos[0]); + fprintf(f, ", eos [%ld]", (long)nfa->eos[0]); if (nfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long) nfa->eos[1]); + fprintf(f, ", eol [%ld]", (long)nfa->eos[1]); fprintf(f, "\n"); for (s = nfa->states; s != NULL; s = s->next) dumpstate(s, f); @@ -1363,19 +1363,24 @@ dumpnfa(struct nfa * nfa, #endif } -#ifdef REG_DEBUG /* subordinates of dumpnfa */ +#ifdef REG_DEBUG /* subordinates of dumpnfa */ +/* + ^ #ifdef REG_DEBUG + */ /* - * dumpstate - dump an NFA state in human-readable form + - dumpstate - dump an NFA state in human-readable form + ^ static VOID dumpstate(struct state *, FILE *); */ -static void -dumpstate(struct state * s, - FILE *f) +static VOID +dumpstate(s, f) +struct state *s; +FILE *f; { struct arc *a; fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", - (s->flag) ? s->flag : '.'); + (s->flag) ? s->flag : '.'); if (s->prev != NULL && s->prev->next != s) fprintf(f, "\tstate chain bad\n"); if (s->nouts == 0) @@ -1383,8 +1388,7 @@ dumpstate(struct state * s, else dumparcs(s, f); fflush(f); - for (a = s->ins; a != NULL; a = a->inchain) - { + for (a = s->ins; a != NULL; a = a->inchain) { if (a->to != s) fprintf(f, "\tlink from %d to %d on %d's in-chain\n", a->from->no, a->to->no, s->no); @@ -1392,13 +1396,15 @@ dumpstate(struct state * s, } /* - * dumparcs - dump out-arcs in human-readable form + - dumparcs - dump out-arcs in human-readable form + ^ static VOID dumparcs(struct state *, FILE *); */ -static void -dumparcs(struct state * s, - FILE *f) +static VOID +dumparcs(s, f) +struct state *s; +FILE *f; { - int pos; + int pos; assert(s->nouts > 0); /* printing arcs in reverse order is usually clearer */ @@ -1408,147 +1414,154 @@ dumparcs(struct state * s, } /* - * dumprarcs - dump remaining outarcs, recursively, in reverse order + - dumprarcs - dump remaining outarcs, recursively, in reverse order + ^ static int dumprarcs(struct arc *, struct state *, FILE *, int); */ -static int /* resulting print position */ -dumprarcs(struct arc * a, - struct state * s, - FILE *f, - int pos) /* initial print position */ +static int /* resulting print position */ +dumprarcs(a, s, f, pos) +struct arc *a; +struct state *s; +FILE *f; +int pos; /* initial print position */ { if (a->outchain != NULL) pos = dumprarcs(a->outchain, s, f, pos); dumparc(a, s, f); - if (pos == 5) - { + if (pos == 5) { fprintf(f, "\n"); pos = 1; - } - else + } else pos++; return pos; } /* - * dumparc - dump one outarc in readable form, including prefixing tab + - dumparc - dump one outarc in readable form, including prefixing tab + ^ static VOID dumparc(struct arc *, struct state *, FILE *); */ -static void -dumparc(struct arc * a, - struct state * s, - FILE *f) +static VOID +dumparc(a, s, f) +struct arc *a; +struct state *s; +FILE *f; { struct arc *aa; struct arcbatch *ab; fprintf(f, "\t"); - switch (a->type) - { - case PLAIN: - fprintf(f, "[%ld]", (long) a->co); - break; - case AHEAD: - fprintf(f, ">%ld>", (long) a->co); - break; - case BEHIND: - fprintf(f, "<%ld<", (long) a->co); - break; - case LACON: - fprintf(f, ":%ld:", (long) a->co); - break; - case '^': - case '$': - fprintf(f, "%c%d", a->type, (int) a->co); - break; - case EMPTY: - break; - default: - fprintf(f, "0x%x/0%lo", a->type, (long) a->co); - break; + switch (a->type) { + case PLAIN: + fprintf(f, "[%ld]", (long)a->co); + break; + case AHEAD: + fprintf(f, ">%ld>", (long)a->co); + break; + case BEHIND: + fprintf(f, "<%ld<", (long)a->co); + break; + case LACON: + fprintf(f, ":%ld:", (long)a->co); + break; + case '^': + case '$': + fprintf(f, "%c%d", a->type, (int)a->co); + break; + case EMPTY: + break; + default: + fprintf(f, "0x%x/0%lo", a->type, (long)a->co); + break; } if (a->from != s) fprintf(f, "?%d?", a->from->no); - for (ab = &a->from->oas; ab != NULL; ab = ab->next) - { + for (ab = &a->from->oas; ab != NULL; ab = ab->next) { for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) if (aa == a) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ if (aa < &ab->a[ABSIZE]) /* propagate break */ - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } if (ab == NULL) - fprintf(f, "?!?"); /* not in allocated space */ + fprintf(f, "?!?"); /* not in allocated space */ fprintf(f, "->"); - if (a->to == NULL) - { + if (a->to == NULL) { fprintf(f, "NULL"); return; } fprintf(f, "%d", a->to->no); for (aa = a->to->ins; aa != NULL; aa = aa->inchain) if (aa == a) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ if (aa == NULL) - fprintf(f, "?!?"); /* missing from in-chain */ + fprintf(f, "?!?"); /* missing from in-chain */ } -#endif /* REG_DEBUG */ /* - * dumpcnfa - dump a compacted NFA in human-readable form + ^ #endif */ -#ifdef REG_DEBUG -static void -dumpcnfa(struct cnfa * cnfa, - FILE *f) +#endif /* ifdef REG_DEBUG */ + +/* + - dumpcnfa - dump a compacted NFA in human-readable form + ^ static VOID dumpcnfa(struct cnfa *, FILE *); + */ +static VOID +dumpcnfa(cnfa, f) +struct cnfa *cnfa; +FILE *f; { - int st; +#ifdef REG_DEBUG + int st; fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); if (cnfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]); + fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]); if (cnfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]); + fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]); if (cnfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]); + fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]); if (cnfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]); - if (cnfa->flags & HASLACONS) + fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]); + if (cnfa->flags&HASLACONS) fprintf(f, ", haslacons"); fprintf(f, "\n"); for (st = 0; st < cnfa->nstates; st++) dumpcstate(st, cnfa->states[st], cnfa, f); fflush(f); -} #endif +} -#ifdef REG_DEBUG /* subordinates of dumpcnfa */ +#ifdef REG_DEBUG /* subordinates of dumpcnfa */ +/* + ^ #ifdef REG_DEBUG + */ /* - * dumpcstate - dump a compacted-NFA state in human-readable form + - dumpcstate - dump a compacted-NFA state in human-readable form + ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); */ -static void -dumpcstate(int st, - struct carc * ca, - struct cnfa * cnfa, - FILE *f) +static VOID +dumpcstate(st, ca, cnfa, f) +int st; +struct carc *ca; +struct cnfa *cnfa; +FILE *f; { - int i; - int pos; + int i; + int pos; fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); pos = 1; - for (i = 1; ca[i].co != COLORLESS; i++) - { + for (i = 1; ca[i].co != COLORLESS; i++) { if (ca[i].co < cnfa->ncolors) - fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to); + fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to); else - fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors, - ca[i].to); - if (pos == 5) - { + fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors, + ca[i].to); + if (pos == 5) { fprintf(f, "\n"); pos = 1; - } - else + } else pos++; } if (i == 1 || pos != 1) @@ -1556,4 +1569,7 @@ dumpcstate(int st, fflush(f); } -#endif /* REG_DEBUG */ +/* + ^ #endif + */ +#endif /* ifdef REG_DEBUG */ diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c index 69f9517447..2a7fd6e0b9 100644 --- a/src/regex/regcomp.c +++ b/src/regex/regcomp.c @@ -2,21 +2,21 @@ * re_*comp and friends - compile REs * This file #includes several others (see the bottom). * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,8 +28,6 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regcomp.c,v 1.38 2003/08/08 21:41:56 momjian Exp $ - * */ #include "regguts.h" @@ -37,301 +35,279 @@ /* * forward declarations, up here so forward datatypes etc. are defined early */ +/* =====^!^===== begin forwards =====^!^===== */ +/* automatically gathered by fwd; do not hand-edit */ /* === regcomp.c === */ -static void moresubs(struct vars *, int); -static int freev(struct vars *, int); -static void makesearch(struct vars *, struct nfa *); -static struct subre *parse(struct vars *, int, int, struct state *, struct state *); -static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int); -static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *); -static void nonword(struct vars *, int, struct state *, struct state *); -static void word(struct vars *, int, struct state *, struct state *); -static int scannum(struct vars *); -static void repeat(struct vars *, struct state *, struct state *, int, int); -static void bracket(struct vars *, struct state *, struct state *); -static void cbracket(struct vars *, struct state *, struct state *); -static void brackpart(struct vars *, struct state *, struct state *); -static chr *scanplain(struct vars *); -static void leaders(struct vars *, struct cvec *); -static void onechr(struct vars *, chr, struct state *, struct state *); -static void dovec(struct vars *, struct cvec *, struct state *, struct state *); -static celt nextleader(struct vars *, chr, chr); -static void wordchrs(struct vars *); -static struct subre *subre(struct vars *, int, int, struct state *, struct state *); -static void freesubre(struct vars *, struct subre *); -static void freesrnode(struct vars *, struct subre *); -static void optst(struct vars *, struct subre *); -static int numst(struct subre *, int); -static void markst(struct subre *); -static void cleanst(struct vars *); -static long nfatree(struct vars *, struct subre *, FILE *); -static long nfanode(struct vars *, struct subre *, FILE *); -static int newlacon(struct vars *, struct state *, struct state *, int); -static void freelacons(struct subre *, int); -static void rfree(regex_t *); - +int compile _ANSI_ARGS_((regex_t *, CONST chr *, size_t, int)); +static VOID moresubs _ANSI_ARGS_((struct vars *, int)); +static int freev _ANSI_ARGS_((struct vars *, int)); +static VOID makesearch _ANSI_ARGS_((struct vars *, struct nfa *)); +static struct subre *parse _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *)); +static struct subre *parsebranch _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, int)); +static VOID parseqatom _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *, struct subre *)); +static VOID nonword _ANSI_ARGS_((struct vars *, int, struct state *, struct state *)); +static VOID word _ANSI_ARGS_((struct vars *, int, struct state *, struct state *)); +static int scannum _ANSI_ARGS_((struct vars *)); +static VOID repeat _ANSI_ARGS_((struct vars *, struct state *, struct state *, int, int)); +static VOID bracket _ANSI_ARGS_((struct vars *, struct state *, struct state *)); +static VOID cbracket _ANSI_ARGS_((struct vars *, struct state *, struct state *)); +static VOID brackpart _ANSI_ARGS_((struct vars *, struct state *, struct state *)); +static chr *scanplain _ANSI_ARGS_((struct vars *)); +static VOID leaders _ANSI_ARGS_((struct vars *, struct cvec *)); +static VOID onechr _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *)); +static VOID dovec _ANSI_ARGS_((struct vars *, struct cvec *, struct state *, struct state *)); +static celt nextleader _ANSI_ARGS_((struct vars *, pchr, pchr)); +static VOID wordchrs _ANSI_ARGS_((struct vars *)); +static struct subre *subre _ANSI_ARGS_((struct vars *, int, int, struct state *, struct state *)); +static VOID freesubre _ANSI_ARGS_((struct vars *, struct subre *)); +static VOID freesrnode _ANSI_ARGS_((struct vars *, struct subre *)); +static VOID optst _ANSI_ARGS_((struct vars *, struct subre *)); +static int numst _ANSI_ARGS_((struct subre *, int)); +static VOID markst _ANSI_ARGS_((struct subre *)); +static VOID cleanst _ANSI_ARGS_((struct vars *)); +static long nfatree _ANSI_ARGS_((struct vars *, struct subre *, FILE *)); +static long nfanode _ANSI_ARGS_((struct vars *, struct subre *, FILE *)); +static int newlacon _ANSI_ARGS_((struct vars *, struct state *, struct state *, int)); +static VOID freelacons _ANSI_ARGS_((struct subre *, int)); +static VOID rfree _ANSI_ARGS_((regex_t *)); +static VOID dump _ANSI_ARGS_((regex_t *, FILE *)); +static VOID dumpst _ANSI_ARGS_((struct subre *, FILE *, int)); +static VOID stdump _ANSI_ARGS_((struct subre *, FILE *, int)); +static char *stid _ANSI_ARGS_((struct subre *, char *, size_t)); +/* === regc_lex.c === */ +static VOID lexstart _ANSI_ARGS_((struct vars *)); +static VOID prefixes _ANSI_ARGS_((struct vars *)); +static VOID lexnest _ANSI_ARGS_((struct vars *, chr *, chr *)); +static VOID lexword _ANSI_ARGS_((struct vars *)); +static int next _ANSI_ARGS_((struct vars *)); +static int lexescape _ANSI_ARGS_((struct vars *)); +static chr lexdigits _ANSI_ARGS_((struct vars *, int, int, int)); +static int brenext _ANSI_ARGS_((struct vars *, pchr)); +static VOID skip _ANSI_ARGS_((struct vars *)); +static chr newline _ANSI_ARGS_((NOPARMS)); #ifdef REG_DEBUG -static void dump(regex_t *, FILE *); -static void dumpst(struct subre *, FILE *, int); -static void stdump(struct subre *, FILE *, int); -static char *stid(struct subre *, char *, size_t); +static chr *ch _ANSI_ARGS_((NOPARMS)); #endif -/* === regc_lex.c === */ -static void lexstart(struct vars *); -static void prefixes(struct vars *); -static void lexnest(struct vars *, chr *, chr *); -static void lexword(struct vars *); -static int next(struct vars *); -static int lexescape(struct vars *); -static chr lexdigits(struct vars *, int, int, int); -static int brenext(struct vars *, chr); -static void skip(struct vars *); -static chr newline(void); -static chr chrnamed(struct vars *, chr *, chr *, chr); - +static chr chrnamed _ANSI_ARGS_((struct vars *, chr *, chr *, pchr)); /* === regc_color.c === */ -static void initcm(struct vars *, struct colormap *); -static void freecm(struct colormap *); -static void cmtreefree(struct colormap *, union tree *, int); -static color setcolor(struct colormap *, chr, pcolor); -static color maxcolor(struct colormap *); -static color newcolor(struct colormap *); -static void freecolor(struct colormap *, pcolor); -static color pseudocolor(struct colormap *); -static color subcolor(struct colormap *, chr c); -static color newsub(struct colormap *, pcolor); -static void subrange(struct vars *, chr, chr, struct state *, struct state *); -static void subblock(struct vars *, chr, struct state *, struct state *); -static void okcolors(struct nfa *, struct colormap *); -static void colorchain(struct colormap *, struct arc *); -static void uncolorchain(struct colormap *, struct arc *); -static int singleton(struct colormap *, chr c); -static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *); -static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *); - +static VOID initcm _ANSI_ARGS_((struct vars *, struct colormap *)); +static VOID freecm _ANSI_ARGS_((struct colormap *)); +static VOID cmtreefree _ANSI_ARGS_((struct colormap *, union tree *, int)); +static color setcolor _ANSI_ARGS_((struct colormap *, pchr, pcolor)); +static color maxcolor _ANSI_ARGS_((struct colormap *)); +static color newcolor _ANSI_ARGS_((struct colormap *)); +static VOID freecolor _ANSI_ARGS_((struct colormap *, pcolor)); +static color pseudocolor _ANSI_ARGS_((struct colormap *)); +static color subcolor _ANSI_ARGS_((struct colormap *, pchr c)); +static color newsub _ANSI_ARGS_((struct colormap *, pcolor)); +static VOID subrange _ANSI_ARGS_((struct vars *, pchr, pchr, struct state *, struct state *)); +static VOID subblock _ANSI_ARGS_((struct vars *, pchr, struct state *, struct state *)); +static VOID okcolors _ANSI_ARGS_((struct nfa *, struct colormap *)); +static VOID colorchain _ANSI_ARGS_((struct colormap *, struct arc *)); +static VOID uncolorchain _ANSI_ARGS_((struct colormap *, struct arc *)); +static int singleton _ANSI_ARGS_((struct colormap *, pchr c)); +static VOID rainbow _ANSI_ARGS_((struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *)); +static VOID colorcomplement _ANSI_ARGS_((struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *)); #ifdef REG_DEBUG -static void dumpcolors(struct colormap *, FILE *); -static void fillcheck(struct colormap *, union tree *, int, FILE *); -static void dumpchr(chr, FILE *); +static VOID dumpcolors _ANSI_ARGS_((struct colormap *, FILE *)); +static VOID fillcheck _ANSI_ARGS_((struct colormap *, union tree *, int, FILE *)); +static VOID dumpchr _ANSI_ARGS_((pchr, FILE *)); #endif /* === regc_nfa.c === */ -static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); -static void freenfa(struct nfa *); -static struct state *newstate(struct nfa *); -static struct state *newfstate(struct nfa *, int flag); -static void dropstate(struct nfa *, struct state *); -static void freestate(struct nfa *, struct state *); -static void destroystate(struct nfa *, struct state *); -static void newarc(struct nfa *, int, pcolor, struct state *, struct state *); -static struct arc *allocarc(struct nfa *, struct state *); -static void freearc(struct nfa *, struct arc *); -static struct arc *findarc(struct state *, int, pcolor); -static void cparc(struct nfa *, struct arc *, struct state *, struct state *); -static void moveins(struct nfa *, struct state *, struct state *); -static void copyins(struct nfa *, struct state *, struct state *); -static void moveouts(struct nfa *, struct state *, struct state *); -static void copyouts(struct nfa *, struct state *, struct state *); -static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); -static void delsub(struct nfa *, struct state *, struct state *); -static void deltraverse(struct nfa *, struct state *, struct state *); -static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *); -static void duptraverse(struct nfa *, struct state *, struct state *); -static void cleartraverse(struct nfa *, struct state *); -static void specialcolors(struct nfa *); -static long optimize(struct nfa *, FILE *); -static void pullback(struct nfa *, FILE *); -static int pull(struct nfa *, struct arc *); -static void pushfwd(struct nfa *, FILE *); -static int push(struct nfa *, struct arc *); - -#define INCOMPATIBLE 1 /* destroys arc */ -#define SATISFIED 2 /* constraint satisfied */ -#define COMPATIBLE 3 /* compatible but not satisfied yet */ -static int combine(struct arc *, struct arc *); -static void fixempties(struct nfa *, FILE *); -static int unempty(struct nfa *, struct arc *); -static void cleanup(struct nfa *); -static void markreachable(struct nfa *, struct state *, struct state *, struct state *); -static void markcanreach(struct nfa *, struct state *, struct state *, struct state *); -static long analyze(struct nfa *); -static void compact(struct nfa *, struct cnfa *); -static void carcsort(struct carc *, struct carc *); -static void freecnfa(struct cnfa *); -static void dumpnfa(struct nfa *, FILE *); - +static struct nfa *newnfa _ANSI_ARGS_((struct vars *, struct colormap *, struct nfa *)); +static VOID freenfa _ANSI_ARGS_((struct nfa *)); +static struct state *newstate _ANSI_ARGS_((struct nfa *)); +static struct state *newfstate _ANSI_ARGS_((struct nfa *, int flag)); +static VOID dropstate _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID freestate _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID destroystate _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID newarc _ANSI_ARGS_((struct nfa *, int, pcolor, struct state *, struct state *)); +static struct arc *allocarc _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID freearc _ANSI_ARGS_((struct nfa *, struct arc *)); +static struct arc *findarc _ANSI_ARGS_((struct state *, int, pcolor)); +static VOID cparc _ANSI_ARGS_((struct nfa *, struct arc *, struct state *, struct state *)); +static VOID moveins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID copyins _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID moveouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID copyouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID cloneouts _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, int)); +static VOID delsub _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID deltraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID dupnfa _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *, struct state *)); +static VOID duptraverse _ANSI_ARGS_((struct nfa *, struct state *, struct state *)); +static VOID cleartraverse _ANSI_ARGS_((struct nfa *, struct state *)); +static VOID specialcolors _ANSI_ARGS_((struct nfa *)); +static long optimize _ANSI_ARGS_((struct nfa *, FILE *)); +static VOID pullback _ANSI_ARGS_((struct nfa *, FILE *)); +static int pull _ANSI_ARGS_((struct nfa *, struct arc *)); +static VOID pushfwd _ANSI_ARGS_((struct nfa *, FILE *)); +static int push _ANSI_ARGS_((struct nfa *, struct arc *)); +#define INCOMPATIBLE 1 /* destroys arc */ +#define SATISFIED 2 /* constraint satisfied */ +#define COMPATIBLE 3 /* compatible but not satisfied yet */ +static int combine _ANSI_ARGS_((struct arc *, struct arc *)); +static VOID fixempties _ANSI_ARGS_((struct nfa *, FILE *)); +static int unempty _ANSI_ARGS_((struct nfa *, struct arc *)); +static VOID cleanup _ANSI_ARGS_((struct nfa *)); +static VOID markreachable _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); +static VOID markcanreach _ANSI_ARGS_((struct nfa *, struct state *, struct state *, struct state *)); +static long analyze _ANSI_ARGS_((struct nfa *)); +static VOID compact _ANSI_ARGS_((struct nfa *, struct cnfa *)); +static VOID carcsort _ANSI_ARGS_((struct carc *, struct carc *)); +static VOID freecnfa _ANSI_ARGS_((struct cnfa *)); +static VOID dumpnfa _ANSI_ARGS_((struct nfa *, FILE *)); #ifdef REG_DEBUG -static void dumpstate(struct state *, FILE *); -static void dumparcs(struct state *, FILE *); -static int dumprarcs(struct arc *, struct state *, FILE *, int); -static void dumparc(struct arc *, struct state *, FILE *); -static void dumpcnfa(struct cnfa *, FILE *); -static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); +static VOID dumpstate _ANSI_ARGS_((struct state *, FILE *)); +static VOID dumparcs _ANSI_ARGS_((struct state *, FILE *)); +static int dumprarcs _ANSI_ARGS_((struct arc *, struct state *, FILE *, int)); +static VOID dumparc _ANSI_ARGS_((struct arc *, struct state *, FILE *)); +#endif +static VOID dumpcnfa _ANSI_ARGS_((struct cnfa *, FILE *)); +#ifdef REG_DEBUG +static VOID dumpcstate _ANSI_ARGS_((int, struct carc *, struct cnfa *, FILE *)); #endif /* === regc_cvec.c === */ -static struct cvec *newcvec(int, int, int); -static struct cvec *clearcvec(struct cvec *); -static void addchr(struct cvec *, chr); -static void addrange(struct cvec *, chr, chr); -static void addmcce(struct cvec *, chr *, chr *); -static int haschr(struct cvec *, chr); -static struct cvec *getcvec(struct vars *, int, int, int); -static void freecvec(struct cvec *); - +static struct cvec *newcvec _ANSI_ARGS_((int, int, int)); +static struct cvec *clearcvec _ANSI_ARGS_((struct cvec *)); +static VOID addchr _ANSI_ARGS_((struct cvec *, pchr)); +static VOID addrange _ANSI_ARGS_((struct cvec *, pchr, pchr)); +static VOID addmcce _ANSI_ARGS_((struct cvec *, chr *, chr *)); +static int haschr _ANSI_ARGS_((struct cvec *, pchr)); +static struct cvec *getcvec _ANSI_ARGS_((struct vars *, int, int, int)); +static VOID freecvec _ANSI_ARGS_((struct cvec *)); /* === regc_locale.c === */ -extern int wx_isdigit(wx_wchar c); -extern int wx_isalpha(wx_wchar c); -extern int wx_isalnum(wx_wchar c); -extern int wx_isupper(wx_wchar c); -extern int wx_islower(wx_wchar c); -extern int wx_isgraph(wx_wchar c); -extern int wx_ispunct(wx_wchar c); -extern int wx_isspace(wx_wchar c); -extern wx_wchar wx_toupper(wx_wchar c); -extern wx_wchar wx_tolower(wx_wchar c); -static int nmcces(struct vars *); -static int nleaders(struct vars *); -static struct cvec *allmcces(struct vars *, struct cvec *); -static celt element(struct vars *, chr *, chr *); -static struct cvec *range(struct vars *, celt, celt, int); -static int before(celt, celt); -static struct cvec *eclass(struct vars *, celt, int); -static struct cvec *cclass(struct vars *, chr *, chr *, int); -static struct cvec *allcases(struct vars *, chr); -static int cmp(const chr *, const chr *, size_t); -static int casecmp(const chr *, const chr *, size_t); +static int nmcces _ANSI_ARGS_((struct vars *)); +static int nleaders _ANSI_ARGS_((struct vars *)); +static struct cvec *allmcces _ANSI_ARGS_((struct vars *, struct cvec *)); +static celt element _ANSI_ARGS_((struct vars *, chr *, chr *)); +static struct cvec *range _ANSI_ARGS_((struct vars *, celt, celt, int)); +static int before _ANSI_ARGS_((celt, celt)); +static struct cvec *eclass _ANSI_ARGS_((struct vars *, celt, int)); +static struct cvec *cclass _ANSI_ARGS_((struct vars *, chr *, chr *, int)); +static struct cvec *allcases _ANSI_ARGS_((struct vars *, pchr)); +static int cmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); +static int casecmp _ANSI_ARGS_((CONST chr *, CONST chr *, size_t)); +/* automatically gathered by fwd; do not hand-edit */ +/* =====^!^===== end forwards =====^!^===== */ + /* internal variables, bundled for easy passing around */ -struct vars -{ - regex_t *re; - chr *now; /* scan pointer into string */ - chr *stop; /* end of string */ - chr *savenow; /* saved now and stop for "subroutine - * call" */ - chr *savestop; - int err; /* error code (0 if none) */ - int cflags; /* copy of compile flags */ - int lasttype; /* type of previous token */ - int nexttype; /* type of next token */ - chr nextvalue; /* value (if any) of next token */ - int lexcon; /* lexical context type (see lex.c) */ - int nsubexp; /* subexpression count */ - struct subre **subs; /* subRE pointer vector */ - size_t nsubs; /* length of vector */ +struct vars { + regex_t *re; + chr *now; /* scan pointer into string */ + chr *stop; /* end of string */ + chr *savenow; /* saved now and stop for "subroutine call" */ + chr *savestop; + int err; /* error code (0 if none) */ + int cflags; /* copy of compile flags */ + int lasttype; /* type of previous token */ + int nexttype; /* type of next token */ + chr nextvalue; /* value (if any) of next token */ + int lexcon; /* lexical context type (see lex.c) */ + int nsubexp; /* subexpression count */ + struct subre **subs; /* subRE pointer vector */ + size_t nsubs; /* length of vector */ struct subre *sub10[10]; /* initial vector, enough for most */ - struct nfa *nfa; /* the NFA */ - struct colormap *cm; /* character color map */ - color nlcolor; /* color of newline */ - struct state *wordchrs; /* state in nfa holding word-char outarcs */ - struct subre *tree; /* subexpression tree */ + struct nfa *nfa; /* the NFA */ + struct colormap *cm; /* character color map */ + color nlcolor; /* color of newline */ + struct state *wordchrs; /* state in nfa holding word-char outarcs */ + struct subre *tree; /* subexpression tree */ struct subre *treechain; /* all tree nodes allocated */ struct subre *treefree; /* any free tree nodes */ - int ntree; /* number of tree nodes */ - struct cvec *cv; /* interface cvec */ - struct cvec *cv2; /* utility cvec */ - struct cvec *mcces; /* collating-element information */ -#define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) + int ntree; /* number of tree nodes */ + struct cvec *cv; /* interface cvec */ + struct cvec *cv2; /* utility cvec */ + struct cvec *mcces; /* collating-element information */ +# define ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) struct state *mccepbegin; /* in nfa, start of MCCE prototypes */ - struct state *mccepend; /* in nfa, end of MCCE prototypes */ - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ + struct state *mccepend; /* in nfa, end of MCCE prototypes */ + struct subre *lacons; /* lookahead-constraint vector */ + int nlacons; /* size of lacons */ }; /* parsing macros; most know that `v' is the struct vars pointer */ -#define NEXT() (next(v)) /* advance by one token */ -#define SEE(t) (v->nexttype == (t)) /* is next token this? */ -#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ +#define NEXT() (next(v)) /* advance by one token */ +#define SEE(t) (v->nexttype == (t)) /* is next token this? */ +#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */ +#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ +#define ISERR() VISERR(v) +#define VERR(vv,e) ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return;} /* if error seen, return */ -#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ -#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */ -#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, - * error */ -#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ -#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) +#define ERR(e) VERR(v, e) /* record an error */ +#define NOERR() {if (ISERR()) return;} /* if error seen, return */ +#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */ +#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */ +#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */ +#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */ +#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y) /* token type codes, some also used as NFA arc types */ -#define EMPTY 'n' /* no token present */ -#define EOS 'e' /* end of string */ -#define PLAIN 'p' /* ordinary character */ -#define DIGIT 'd' /* digit (in bound) */ -#define BACKREF 'b' /* back reference */ -#define COLLEL 'I' /* start of [. */ -#define ECLASS 'E' /* start of [= */ -#define CCLASS 'C' /* start of [: */ -#define END 'X' /* end of [. [= [: */ -#define RANGE 'R' /* - within [] which might be range delim. */ -#define LACON 'L' /* lookahead constraint subRE */ -#define AHEAD 'a' /* color-lookahead arc */ -#define BEHIND 'r' /* color-lookbehind arc */ -#define WBDRY 'w' /* word boundary constraint */ -#define NWBDRY 'W' /* non-word-boundary constraint */ -#define SBEGIN 'A' /* beginning of string (even if not BOL) */ -#define SEND 'Z' /* end of string (even if not EOL) */ -#define PREFER 'P' /* length preference */ +#define EMPTY 'n' /* no token present */ +#define EOS 'e' /* end of string */ +#define PLAIN 'p' /* ordinary character */ +#define DIGIT 'd' /* digit (in bound) */ +#define BACKREF 'b' /* back reference */ +#define COLLEL 'I' /* start of [. */ +#define ECLASS 'E' /* start of [= */ +#define CCLASS 'C' /* start of [: */ +#define END 'X' /* end of [. [= [: */ +#define RANGE 'R' /* - within [] which might be range delim. */ +#define LACON 'L' /* lookahead constraint subRE */ +#define AHEAD 'a' /* color-lookahead arc */ +#define BEHIND 'r' /* color-lookbehind arc */ +#define WBDRY 'w' /* word boundary constraint */ +#define NWBDRY 'W' /* non-word-boundary constraint */ +#define SBEGIN 'A' /* beginning of string (even if not BOL) */ +#define SEND 'Z' /* end of string (even if not EOL) */ +#define PREFER 'P' /* length preference */ /* is an arc colored, and hence on a color chain? */ -#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ +#define COLORED(a) ((a)->type == PLAIN || (a)->type == AHEAD || \ (a)->type == BEHIND) /* static function list */ static struct fns functions = { - rfree, /* regfree insides */ + rfree, /* regfree insides */ }; /* - * regcomp - compile regular expression + - compile - compile regular expression + ^ int compile(regex_t *, CONST chr *, size_t, int); */ int -regcomp(regex_t *re, - const chr *string, - int flags) -{ - return wx_regcomp(re, string, wx_strlen(string), flags); -} -int -wx_regcomp(regex_t *re, - const chr *string, - size_t len, - int flags) +compile(re, string, len, flags) +regex_t *re; +CONST chr *string; +size_t len; +int flags; { struct vars var; struct vars *v = &var; struct guts *g; - int i; - size_t j; - -#ifdef REG_DEBUG - FILE *debug = (flags & REG_PROGRESS) ? stdout : (FILE *) NULL; - -#else - FILE *debug = (FILE *) NULL; -#endif - -#define CNOERR() { if (ISERR()) return freev(v, v->err); } + int i; + size_t j; + FILE *debug = (flags®_PROGRESS) ? stdout : (FILE *)NULL; +# define CNOERR() { if (ISERR()) return freev(v, v->err); } /* sanity checks */ if (re == NULL || string == NULL) return REG_INVARG; - if ((flags & REG_QUOTE) && - (flags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE))) + if ((flags®_QUOTE) && + (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) return REG_INVARG; - if (!(flags & REG_EXTENDED) && (flags & REG_ADVF)) + if (!(flags®_EXTENDED) && (flags®_ADVF)) return REG_INVARG; /* initial setup (after which freev() is callable) */ v->re = re; - v->now = (chr *) string; + v->now = (chr *)string; v->stop = v->now + len; v->savenow = v->savestop = NULL; v->err = 0; @@ -354,7 +330,7 @@ wx_regcomp(regex_t *re, v->lacons = NULL; v->nlacons = 0; re->re_magic = REMAGIC; - re->re_info = 0; /* bits get set during parse */ + re->re_info = 0; /* bits get set during parse */ re->re_csize = sizeof(chr); re->re_guts = NULL; re->re_fns = VS(&functions); @@ -363,89 +339,76 @@ wx_regcomp(regex_t *re, re->re_guts = VS(MALLOC(sizeof(struct guts))); if (re->re_guts == NULL) return freev(v, REG_ESPACE); - g = (struct guts *) re->re_guts; + g = (struct guts *)re->re_guts; g->tree = NULL; initcm(v, &g->cmap); v->cm = &g->cmap; g->lacons = NULL; g->nlacons = 0; ZAPCNFA(g->search); - v->nfa = newnfa(v, v->cm, (struct nfa *) NULL); + v->nfa = newnfa(v, v->cm, (struct nfa *)NULL); CNOERR(); v->cv = newcvec(100, 20, 10); if (v->cv == NULL) return freev(v, REG_ESPACE); i = nmcces(v); - if (i > 0) - { + if (i > 0) { v->mcces = newcvec(nleaders(v), 0, i); CNOERR(); v->mcces = allmcces(v, v->mcces); leaders(v, v->mcces); - addmcce(v->mcces, (chr *) NULL, (chr *) NULL); /* dummy */ + addmcce(v->mcces, (chr *)NULL, (chr *)NULL); /* dummy */ } CNOERR(); /* parsing */ - lexstart(v); /* also handles prefixes */ - if ((v->cflags & REG_NLSTOP) || (v->cflags & REG_NLANCH)) - { + lexstart(v); /* also handles prefixes */ + if ((v->cflags®_NLSTOP) || (v->cflags®_NLANCH)) { /* assign newline a unique color */ v->nlcolor = subcolor(v->cm, newline()); okcolors(v->nfa, v->cm); } CNOERR(); v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final); - assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ + assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */ CNOERR(); assert(v->tree != NULL); /* finish setup of nfa and its subre tree */ specialcolors(v->nfa); CNOERR(); -#ifdef REG_DEBUG - if (debug != NULL) - { + if (debug != NULL) { fprintf(debug, "\n\n\n========= RAW ==========\n"); dumpnfa(v->nfa, debug); dumpst(v->tree, debug, 1); - } -#endif + } optst(v, v->tree); v->ntree = numst(v->tree, 1); markst(v->tree); cleanst(v); -#ifdef REG_DEBUG - if (debug != NULL) - { + if (debug != NULL) { fprintf(debug, "\n\n\n========= TREE FIXED ==========\n"); dumpst(v->tree, debug, 1); } -#endif /* build compacted NFAs for tree and lacons */ re->re_info |= nfatree(v, v->tree, debug); CNOERR(); assert(v->nlacons == 0 || v->lacons != NULL); - for (i = 1; i < v->nlacons; i++) - { -#ifdef REG_DEBUG + for (i = 1; i < v->nlacons; i++) { if (debug != NULL) fprintf(debug, "\n\n\n========= LA%d ==========\n", i); -#endif nfanode(v, &v->lacons[i], debug); } CNOERR(); - if (v->tree->flags & SHORTER) + if (v->tree->flags&SHORTER) NOTE(REG_USHORTEST); /* build compacted NFAs for tree, lacons, fast search */ -#ifdef REG_DEBUG if (debug != NULL) fprintf(debug, "\n\n\n========= SEARCH ==========\n"); -#endif /* can sacrifice main NFA now, so use it as work area */ - (DISCARD) optimize(v->nfa, debug); + (DISCARD)optimize(v->nfa, debug); CNOERR(); makesearch(v, v->nfa); CNOERR(); @@ -454,7 +417,7 @@ wx_regcomp(regex_t *re, /* looks okay, package it up */ re->re_nsub = v->nsubexp; - v->re = NULL; /* freev no longer frees re */ + v->re = NULL; /* freev no longer frees re */ g->magic = GUTSMAGIC; g->cflags = v->cflags; g->info = re->re_info; @@ -462,44 +425,40 @@ wx_regcomp(regex_t *re, g->tree = v->tree; v->tree = NULL; g->ntree = v->ntree; - g->compare = (v->cflags & REG_ICASE) ? casecmp : cmp; + g->compare = (v->cflags®_ICASE) ? casecmp : cmp; g->lacons = v->lacons; v->lacons = NULL; g->nlacons = v->nlacons; -#ifdef REG_DEBUG - if (flags & REG_DUMP) + if (flags®_DUMP) dump(re, stdout); -#endif assert(v->err == 0); return freev(v, 0); } /* - * moresubs - enlarge subRE vector + - moresubs - enlarge subRE vector + ^ static VOID moresubs(struct vars *, int); */ -static void -moresubs(struct vars * v, - int wanted) /* want enough room for this one */ +static VOID +moresubs(v, wanted) +struct vars *v; +int wanted; /* want enough room for this one */ { struct subre **p; - size_t n; + size_t n; - assert(wanted > 0 && (size_t) wanted >= v->nsubs); - n = (size_t) wanted *3 / 2 + 1; - - if (v->subs == v->sub10) - { - p = (struct subre **) MALLOC(n * sizeof(struct subre *)); + assert(wanted > 0 && (size_t)wanted >= v->nsubs); + n = (size_t)wanted * 3 / 2 + 1; + if (v->subs == v->sub10) { + p = (struct subre **)MALLOC(n * sizeof(struct subre *)); if (p != NULL) memcpy(VS(p), VS(v->subs), - v->nsubs * sizeof(struct subre *)); - } - else - p = (struct subre **) REALLOC(v->subs, n * sizeof(struct subre *)); - if (p == NULL) - { + v->nsubs * sizeof(struct subre *)); + } else + p = (struct subre **)REALLOC(v->subs, n*sizeof(struct subre *)); + if (p == NULL) { ERR(REG_ESPACE); return; } @@ -507,18 +466,19 @@ moresubs(struct vars * v, for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) *p = NULL; assert(v->nsubs == n); - assert((size_t) wanted < v->nsubs); + assert((size_t)wanted < v->nsubs); } /* - * freev - free vars struct's substructures where necessary - * + - freev - free vars struct's substructures where necessary * Optionally does error-number setting, and always returns error code * (if any), to make error-handling code terser. + ^ static int freev(struct vars *, int); */ static int -freev(struct vars * v, - int err) +freev(v, err) +struct vars *v; +int err; { if (v->re != NULL) rfree(v->re); @@ -538,18 +498,20 @@ freev(struct vars * v, freecvec(v->mcces); if (v->lacons != NULL) freelacons(v->lacons, v->nlacons); - ERR(err); /* nop if err==0 */ + ERR(err); /* nop if err==0 */ return v->err; } /* - * makesearch - turn an NFA into a search NFA (implicit prepend of .*?) + - makesearch - turn an NFA into a search NFA (implicit prepend of .*?) * NFA must have been optimize()d already. + ^ static VOID makesearch(struct vars *, struct nfa *); */ -static void -makesearch(struct vars * v, - struct nfa * nfa) +static VOID +makesearch(v, nfa) +struct vars *v; +struct nfa *nfa; { struct arc *a; struct arc *b; @@ -559,14 +521,12 @@ makesearch(struct vars * v, struct state *slist; /* no loops are needed if it's anchored */ - for (a = pre->outs; a != NULL; a = a->outchain) - { + for (a = pre->outs; a != NULL; a = a->outchain) { assert(a->type == PLAIN); if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) break; } - if (a != NULL) - { + if (a != NULL) { /* add implicit .* in front */ rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre); @@ -577,68 +537,69 @@ makesearch(struct vars * v, /* * Now here's the subtle part. Because many REs have no lookback - * constraints, often knowing when you were in the pre state tells you - * little; it's the next state(s) that are informative. But some of - * them may have other inarcs, i.e. it may be possible to make actual - * progress and then return to one of them. We must de-optimize such - * cases, splitting each such state into progress and no-progress - * states. - */ + * constraints, often knowing when you were in the pre state tells + * you little; it's the next state(s) that are informative. But + * some of them may have other inarcs, i.e. it may be possible to + * make actual progress and then return to one of them. We must + * de-optimize such cases, splitting each such state into progress + * and no-progress states. + */ /* first, make a list of the states */ slist = NULL; - for (a = pre->outs; a != NULL; a = a->outchain) - { + for (a = pre->outs; a != NULL; a = a->outchain) { s = a->to; for (b = s->ins; b != NULL; b = b->inchain) if (b->from != pre) - break; - if (b != NULL) - { /* must be split */ - s->tmp = slist; - slist = s; - } + break; + if (b != NULL) { /* must be split */ + if (s->tmp == NULL) { /* if not already in the list */ + /* (fixes bugs 505048, 230589, */ + /* 840258, 504785) */ + s->tmp = slist; + slist = s; + } } + } /* do the splits */ - for (s = slist; s != NULL; s = s2) - { + for (s = slist; s != NULL; s = s2) { s2 = newstate(nfa); copyouts(nfa, s, s2); - for (a = s->ins; a != NULL; a = b) - { + for (a = s->ins; a != NULL; a = b) { b = a->inchain; - if (a->from != pre) - { + if (a->from != pre) { cparc(nfa, a, a->from, s2); freearc(nfa, a); } } s2 = s->tmp; - s->tmp = NULL; /* clean up while we're at it */ + s->tmp = NULL; /* clean up while we're at it */ } } /* - * parse - parse an RE - * + - parse - parse an RE * This is actually just the top level, which parses a bunch of branches - * tied together with '|'. They appear in the tree as the left children + * tied together with '|'. They appear in the tree as the left children * of a chain of '|' subres. + ^ static struct subre *parse(struct vars *, int, int, struct state *, + ^ struct state *); */ static struct subre * -parse(struct vars * v, - int stopper, /* EOS or ')' */ - int type, /* LACON (lookahead subRE) or PLAIN */ - struct state * init, /* initial state */ - struct state * final) /* final state */ +parse(v, stopper, type, init, final) +struct vars *v; +int stopper; /* EOS or ')' */ +int type; /* LACON (lookahead subRE) or PLAIN */ +struct state *init; /* initial state */ +struct state *final; /* final state */ { - struct state *left; /* scaffolding for branch */ + struct state *left; /* scaffolding for branch */ struct state *right; - struct subre *branches; /* top level */ - struct subre *branch; /* current branch */ - struct subre *t; /* temporary */ - int firstbranch; /* is this the first branch? */ + struct subre *branches; /* top level */ + struct subre *branch; /* current branch */ + struct subre *t; /* temporary */ + int firstbranch; /* is this the first branch? */ assert(stopper == ')' || stopper == EOS); @@ -646,10 +607,8 @@ parse(struct vars * v, NOERRN(); branch = branches; firstbranch = 1; - do - { /* a branch */ - if (!firstbranch) - { + do { /* a branch */ + if (!firstbranch) { /* need a place to hang it */ branch->right = subre(v, '|', LONGER, init, final); NOERRN(); @@ -665,29 +624,25 @@ parse(struct vars * v, branch->left = parsebranch(v, stopper, type, left, right, 0); NOERRN(); branch->flags |= UP(branch->flags | branch->left->flags); - if ((branch->flags & ~branches->flags) != 0) /* new flags */ + if ((branch->flags &~ branches->flags) != 0) /* new flags */ for (t = branches; t != branch; t = t->right) t->flags |= branch->flags; } while (EAT('|')); assert(SEE(stopper) || SEE(EOS)); - if (!SEE(stopper)) - { + if (!SEE(stopper)) { assert(stopper == ')' && SEE(EOS)); ERR(REG_EPAREN); } /* optimize out simple cases */ - if (branch == branches) - { /* only one branch */ + if (branch == branches) { /* only one branch */ assert(branch->right == NULL); t = branch->left; branch->left = NULL; freesubre(v, branches); branches = t; - } - else if (!MESSY(branches->flags)) - { /* no interesting innards */ + } else if (!MESSY(branches->flags)) { /* no interesting innards */ freesubre(v, branches->left); branches->left = NULL; freesubre(v, branches->right); @@ -699,44 +654,43 @@ parse(struct vars * v, } /* - * parsebranch - parse one branch of an RE - * + - parsebranch - parse one branch of an RE * This mostly manages concatenation, working closely with parseqatom(). * Concatenated things are bundled up as much as possible, with separate * ',' nodes introduced only when necessary due to substructure. + ^ static struct subre *parsebranch(struct vars *, int, int, struct state *, + ^ struct state *, int); */ static struct subre * -parsebranch(struct vars * v, - int stopper, /* EOS or ')' */ - int type, /* LACON (lookahead subRE) or PLAIN */ - struct state * left, /* leftmost state */ - struct state * right, /* rightmost state */ - int partial) /* is this only part of a branch? */ +parsebranch(v, stopper, type, left, right, partial) +struct vars *v; +int stopper; /* EOS or ')' */ +int type; /* LACON (lookahead subRE) or PLAIN */ +struct state *left; /* leftmost state */ +struct state *right; /* rightmost state */ +int partial; /* is this only part of a branch? */ { - struct state *lp; /* left end of current construct */ - int seencontent; /* is there anything in this branch yet? */ + struct state *lp; /* left end of current construct */ + int seencontent; /* is there anything in this branch yet? */ struct subre *t; lp = left; seencontent = 0; t = subre(v, '=', 0, left, right); /* op '=' is tentative */ NOERRN(); - while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) - { - if (seencontent) - { /* implicit concat operator */ + while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) { + if (seencontent) { /* implicit concat operator */ lp = newstate(v->nfa); NOERRN(); moveins(v->nfa, right, lp); - } + } seencontent = 1; /* NB, recursion in parseqatom() may swallow rest of branch */ parseqatom(v, stopper, type, lp, right, t); } - if (!seencontent) - { /* empty branch */ + if (!seencontent) { /* empty branch */ if (!partial) NOTE(REG_UUNSPEC); assert(lp == left); @@ -747,280 +701,268 @@ parsebranch(struct vars * v, } /* - * parseqatom - parse one quantified atom or constraint of an RE - * + - parseqatom - parse one quantified atom or constraint of an RE * The bookkeeping near the end cooperates very closely with parsebranch(); * in particular, it contains a recursion that can involve parsing the rest * of the branch, making this function's name somewhat inaccurate. + ^ static VOID parseqatom(struct vars *, int, int, struct state *, + ^ struct state *, struct subre *); */ -static void -parseqatom(struct vars * v, - int stopper, /* EOS or ')' */ - int type, /* LACON (lookahead subRE) or PLAIN */ - struct state * lp, /* left state to hang it on */ - struct state * rp, /* right state to hang it on */ - struct subre * top) /* subtree top */ +static VOID +parseqatom(v, stopper, type, lp, rp, top) +struct vars *v; +int stopper; /* EOS or ')' */ +int type; /* LACON (lookahead subRE) or PLAIN */ +struct state *lp; /* left state to hang it on */ +struct state *rp; /* right state to hang it on */ +struct subre *top; /* subtree top */ { - struct state *s; /* temporaries for new states */ + struct state *s; /* temporaries for new states */ struct state *s2; - -#define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) - int m, - n; - struct subre *atom; /* atom's subtree */ +# define ARCV(t, val) newarc(v->nfa, t, val, lp, rp) + int m, n; + struct subre *atom; /* atom's subtree */ struct subre *t; - int cap; /* capturing parens? */ - int pos; /* positive lookahead? */ - int subno; /* capturing-parens or backref number */ - int atomtype; - int qprefer; /* quantifier short/long preference */ - int f; - struct subre **atomp; /* where the pointer to atom is */ + int cap; /* capturing parens? */ + int pos; /* positive lookahead? */ + int subno; /* capturing-parens or backref number */ + int atomtype; + int qprefer; /* quantifier short/long preference */ + int f; + struct subre **atomp; /* where the pointer to atom is */ /* initial bookkeeping */ atom = NULL; - assert(lp->nouts == 0); /* must string new code */ - assert(rp->nins == 0); /* between lp and rp */ - subno = 0; /* just to shut lint up */ + assert(lp->nouts == 0); /* must string new code */ + assert(rp->nins == 0); /* between lp and rp */ + subno = 0; /* just to shut lint up */ /* an atom or constraint... */ atomtype = v->nexttype; - switch (atomtype) - { - /* first, constraints, which end by returning */ - case '^': - ARCV('^', 1); - if (v->cflags & REG_NLANCH) - ARCV(BEHIND, v->nlcolor); - NEXT(); - return; - break; - case '$': - ARCV('$', 1); - if (v->cflags & REG_NLANCH) - ARCV(AHEAD, v->nlcolor); - NEXT(); - return; - break; - case SBEGIN: - ARCV('^', 1); /* BOL */ - ARCV('^', 0); /* or BOS */ - NEXT(); - return; - break; - case SEND: - ARCV('$', 1); /* EOL */ - ARCV('$', 0); /* or EOS */ - NEXT(); - return; - break; - case '<': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - return; - break; - case '>': - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; + switch (atomtype) { + /* first, constraints, which end by returning */ + case '^': + ARCV('^', 1); + if (v->cflags®_NLANCH) + ARCV(BEHIND, v->nlcolor); + NEXT(); + return; break; - case WBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; - break; - case NWBDRY: - wordchrs(v); /* does NEXT() */ - s = newstate(v->nfa); - NOERR(); - word(v, BEHIND, lp, s); - word(v, AHEAD, s, rp); - s = newstate(v->nfa); - NOERR(); - nonword(v, BEHIND, lp, s); - nonword(v, AHEAD, s, rp); - return; + case '$': + ARCV('$', 1); + if (v->cflags®_NLANCH) + ARCV(AHEAD, v->nlcolor); + NEXT(); + return; + break; + case SBEGIN: + ARCV('^', 1); /* BOL */ + ARCV('^', 0); /* or BOS */ + NEXT(); + return; + break; + case SEND: + ARCV('$', 1); /* EOL */ + ARCV('$', 0); /* or EOS */ + NEXT(); + return; + break; + case '<': + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + nonword(v, BEHIND, lp, s); + word(v, AHEAD, s, rp); + return; + break; + case '>': + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + word(v, BEHIND, lp, s); + nonword(v, AHEAD, s, rp); + return; + break; + case WBDRY: + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + nonword(v, BEHIND, lp, s); + word(v, AHEAD, s, rp); + s = newstate(v->nfa); + NOERR(); + word(v, BEHIND, lp, s); + nonword(v, AHEAD, s, rp); + return; + break; + case NWBDRY: + wordchrs(v); /* does NEXT() */ + s = newstate(v->nfa); + NOERR(); + word(v, BEHIND, lp, s); + word(v, AHEAD, s, rp); + s = newstate(v->nfa); + NOERR(); + nonword(v, BEHIND, lp, s); + nonword(v, AHEAD, s, rp); + return; + break; + case LACON: /* lookahead constraint */ + pos = v->nextvalue; + NEXT(); + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERR(); + t = parse(v, ')', LACON, s, s2); + freesubre(v, t); /* internal structure irrelevant */ + assert(SEE(')') || ISERR()); + NEXT(); + n = newlacon(v, s, s2, pos); + NOERR(); + ARCV(LACON, n); + return; + break; + /* then errors, to get them out of the way */ + case '*': + case '+': + case '?': + case '{': + ERR(REG_BADRPT); + return; break; - case LACON: /* lookahead constraint */ - pos = v->nextvalue; - NEXT(); - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - t = parse(v, ')', LACON, s, s2); - freesubre(v, t); /* internal structure irrelevant */ - assert(SEE(')') || ISERR()); - NEXT(); - n = newlacon(v, s, s2, pos); - NOERR(); - ARCV(LACON, n); - return; - break; - /* then errors, to get them out of the way */ - case '*': - case '+': - case '?': - case '{': - ERR(REG_BADRPT); - return; - break; default: - ERR(REG_ASSERT); + ERR(REG_ASSERT); + return; + break; + /* then plain characters, and minor variants on that theme */ + case ')': /* unbalanced paren */ + if ((v->cflags®_ADVANCED) != REG_EXTENDED) { + ERR(REG_EPAREN); return; + } + /* legal in EREs due to specification botch */ + NOTE(REG_UPBOTCH); + /* fallthrough into case PLAIN */ + case PLAIN: + onechr(v, v->nextvalue, lp, rp); + okcolors(v->nfa, v->cm); + NOERR(); + NEXT(); break; - /* then plain characters, and minor variants on that theme */ - case ')': /* unbalanced paren */ - if ((v->cflags & REG_ADVANCED) != REG_EXTENDED) - { - ERR(REG_EPAREN); - return; - } - /* legal in EREs due to specification botch */ - NOTE(REG_UPBOTCH); - /* fallthrough into case PLAIN */ - case PLAIN: - onechr(v, v->nextvalue, lp, rp); - okcolors(v->nfa, v->cm); - NOERR(); - NEXT(); + case '[': + if (v->nextvalue == 1) + bracket(v, lp, rp); + else + cbracket(v, lp, rp); + assert(SEE(']') || ISERR()); + NEXT(); break; - case '[': - if (v->nextvalue == 1) - bracket(v, lp, rp); - else - cbracket(v, lp, rp); - assert(SEE(']') || ISERR()); - NEXT(); + case '.': + rainbow(v->nfa, v->cm, PLAIN, + (v->cflags®_NLSTOP) ? v->nlcolor : COLORLESS, + lp, rp); + NEXT(); break; - case '.': - rainbow(v->nfa, v->cm, PLAIN, - (v->cflags & REG_NLSTOP) ? v->nlcolor : COLORLESS, - lp, rp); - NEXT(); - break; - /* and finally the ugly stuff */ - case '(': /* value flags as capturing or non */ - cap = (type == LACON) ? 0 : v->nextvalue; - if (cap) - { - v->nsubexp++; - subno = v->nsubexp; - if ((size_t) subno >= v->nsubs) - moresubs(v, subno); - assert((size_t) subno < v->nsubs); - } - else - atomtype = PLAIN; /* something that's not '(' */ - NEXT(); - /* need new endpoints because tree will contain pointers */ - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - EMPTYARC(lp, s); - EMPTYARC(s2, rp); - NOERR(); - atom = parse(v, ')', PLAIN, s, s2); - assert(SEE(')') || ISERR()); - NEXT(); - NOERR(); - if (cap) - { - v->subs[subno] = atom; - t = subre(v, '(', atom->flags | CAP, lp, rp); - NOERR(); - t->subno = subno; - t->left = atom; - atom = t; - } - /* postpone everything else pending possible {0} */ - break; - case BACKREF: /* the Feature From The Black Lagoon */ - INSIST(type != LACON, REG_ESUBREG); - INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); - INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); + /* and finally the ugly stuff */ + case '(': /* value flags as capturing or non */ + cap = (type == LACON) ? 0 : v->nextvalue; + if (cap) { + v->nsubexp++; + subno = v->nsubexp; + if ((size_t)subno >= v->nsubs) + moresubs(v, subno); + assert((size_t)subno < v->nsubs); + } else + atomtype = PLAIN; /* something that's not '(' */ + NEXT(); + /* need new endpoints because tree will contain pointers */ + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERR(); + EMPTYARC(lp, s); + EMPTYARC(s2, rp); + NOERR(); + atom = parse(v, ')', PLAIN, s, s2); + assert(SEE(')') || ISERR()); + NEXT(); + NOERR(); + if (cap) { + v->subs[subno] = atom; + t = subre(v, '(', atom->flags|CAP, lp, rp); NOERR(); - assert(v->nextvalue > 0); - atom = subre(v, 'b', BACKR, lp, rp); - subno = v->nextvalue; - atom->subno = subno; - EMPTYARC(lp, rp); /* temporarily, so there's something */ - NEXT(); + t->subno = subno; + t->left = atom; + atom = t; + } + /* postpone everything else pending possible {0} */ + break; + case BACKREF: /* the Feature From The Black Lagoon */ + INSIST(type != LACON, REG_ESUBREG); + INSIST(v->nextvalue < v->nsubs, REG_ESUBREG); + INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG); + NOERR(); + assert(v->nextvalue > 0); + atom = subre(v, 'b', BACKR, lp, rp); + subno = v->nextvalue; + atom->subno = subno; + EMPTYARC(lp, rp); /* temporarily, so there's something */ + NEXT(); break; } /* ...and an atom may be followed by a quantifier */ - switch (v->nexttype) - { - case '*': - m = 0; - n = INFINITY; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '+': - m = 1; - n = INFINITY; - qprefer = (v->nextvalue) ? LONGER : SHORTER; - NEXT(); - break; - case '?': - m = 0; - n = 1; - qprefer = (v->nextvalue) ? LONGER : SHORTER; + switch (v->nexttype) { + case '*': + m = 0; + n = INFINITY; + qprefer = (v->nextvalue) ? LONGER : SHORTER; NEXT(); - break; - case '{': - NEXT(); - m = scannum(v); - if (EAT(',')) - { - if (SEE(DIGIT)) - n = scannum(v); - else - n = INFINITY; - if (m > n) - { - ERR(REG_BADBR); - return; - } - /* {m,n} exercises preference, even if it's {m,m} */ - qprefer = (v->nextvalue) ? LONGER : SHORTER; - } + break; + case '+': + m = 1; + n = INFINITY; + qprefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '?': + m = 0; + n = 1; + qprefer = (v->nextvalue) ? LONGER : SHORTER; + NEXT(); + break; + case '{': + NEXT(); + m = scannum(v); + if (EAT(',')) { + if (SEE(DIGIT)) + n = scannum(v); else - { - n = m; - /* {m} passes operand's preference through */ - qprefer = 0; - } - if (!SEE('}')) - { /* catches errors too */ + n = INFINITY; + if (m > n) { ERR(REG_BADBR); return; } - NEXT(); - break; - default: /* no quantifier */ - m = n = 1; + /* {m,n} exercises preference, even if it's {m,m} */ + qprefer = (v->nextvalue) ? LONGER : SHORTER; + } else { + n = m; + /* {m} passes operand's preference through */ qprefer = 0; - break; + } + if (!SEE('}')) { /* catches errors too */ + ERR(REG_BADBR); + return; + } + NEXT(); + break; + default: /* no quantifier */ + m = n = 1; + qprefer = 0; + break; } /* annoying special case: {0} or {0,0} cancels everything */ - if (m == 0 && n == 0) - { + if (m == 0 && n == 0) { if (atom != NULL) freesubre(v, atom); if (atomtype == '(') @@ -1033,8 +975,7 @@ parseqatom(struct vars * v, /* if not a messy case, avoid hard part */ assert(!MESSY(top->flags)); f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0); - if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) - { + if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) { if (!(m == 1 && n == 1)) repeat(v, lp, rp, m, n); if (atom != NULL) @@ -1044,14 +985,13 @@ parseqatom(struct vars * v, } /* - * hard part: something messy That is, capturing parens, back - * reference, short/long clash, or an atom with substructure - * containing one of those. - */ + * hard part: something messy + * That is, capturing parens, back reference, short/long clash, or + * an atom with substructure containing one of those. + */ /* now we'll need a subre for the contents even if they're boring */ - if (atom == NULL) - { + if (atom == NULL) { atom = subre(v, '=', 0, lp, rp); NOERR(); } @@ -1059,11 +999,12 @@ parseqatom(struct vars * v, /* * prepare a general-purpose state skeleton * - * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] / / + * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] + * / / * [lp] ----> [s2] ----bypass--------------------- * * where bypass is an empty, and prefix is some repetitions of atom - */ + */ s = newstate(v->nfa); /* first, new endpoints for the atom */ s2 = newstate(v->nfa); NOERR(); @@ -1092,23 +1033,21 @@ parseqatom(struct vars * v, top->right = t; /* if it's a backref, now is the time to replicate the subNFA */ - if (atomtype == BACKREF) - { - assert(atom->begin->nouts == 1); /* just the EMPTY */ + if (atomtype == BACKREF) { + assert(atom->begin->nouts == 1); /* just the EMPTY */ delsub(v->nfa, atom->begin, atom->end); assert(v->subs[subno] != NULL); /* and here's why the recursion got postponed: it must */ /* wait until the skeleton is filled in, because it may */ /* hit a backref that wants to copy the filled-in skeleton */ dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end, - atom->begin, atom->end); + atom->begin, atom->end); NOERR(); } /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */ - if (m == 0) - { - EMPTYARC(s2, atom->end); /* the bypass */ + if (m == 0) { + EMPTYARC(s2, atom->end); /* the bypass */ assert(PREF(qprefer) != 0); f = COMBINE(qprefer, atom->flags); t = subre(v, '|', f, lp, atom->end); @@ -1124,30 +1063,25 @@ parseqatom(struct vars * v, } /* deal with the rest of the quantifier */ - if (atomtype == BACKREF) - { + if (atomtype == BACKREF) { /* special case: backrefs have internal quantifiers */ - EMPTYARC(s, atom->begin); /* empty prefix */ + EMPTYARC(s, atom->begin); /* empty prefix */ /* just stuff everything into atom */ repeat(v, atom->begin, atom->end, m, n); - atom->min = (short) m; - atom->max = (short) n; + atom->min = (short)m; + atom->max = (short)n; atom->flags |= COMBINE(qprefer, atom->flags); - } - else if (m == 1 && n == 1) - { + } else if (m == 1 && n == 1) { /* no/vacuous quantifier: done */ - EMPTYARC(s, atom->begin); /* empty prefix */ - } - else - { + EMPTYARC(s, atom->begin); /* empty prefix */ + } else { /* turn x{m,n} into x{m-1,n-1}x, with capturing */ - /* parens in only second x */ + /* parens in only second x */ dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin); assert(m >= 1 && m != INFINITY && n >= 1); - repeat(v, s, atom->begin, m - 1, (n == INFINITY) ? n : n - 1); + repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1); f = COMBINE(qprefer, atom->flags); - t = subre(v, '.', f, s, atom->end); /* prefix and atom */ + t = subre(v, '.', f, s, atom->end); /* prefix and atom */ NOERR(); t->left = subre(v, '=', PREF(f), s, atom->begin); NOERR(); @@ -1159,8 +1093,7 @@ parseqatom(struct vars * v, t = top->right; if (!(SEE('|') || SEE(stopper) || SEE(EOS))) t->right = parsebranch(v, stopper, type, atom->end, rp, 1); - else - { + else { EMPTYARC(atom->end, rp); t->right = subre(v, '=', 0, atom->end, rp); } @@ -1170,15 +1103,17 @@ parseqatom(struct vars * v, } /* - * nonword - generate arcs for non-word-character ahead or behind + - nonword - generate arcs for non-word-character ahead or behind + ^ static VOID nonword(struct vars *, int, struct state *, struct state *); */ -static void -nonword(struct vars * v, - int dir, /* AHEAD or BEHIND */ - struct state * lp, - struct state * rp) +static VOID +nonword(v, dir, lp, rp) +struct vars *v; +int dir; /* AHEAD or BEHIND */ +struct state *lp; +struct state *rp; { - int anchor = (dir == AHEAD) ? '$' : '^'; + int anchor = (dir == AHEAD) ? '$' : '^'; assert(dir == AHEAD || dir == BEHIND); newarc(v->nfa, anchor, 1, lp, rp); @@ -1188,13 +1123,15 @@ nonword(struct vars * v, } /* - * word - generate arcs for word character ahead or behind + - word - generate arcs for word character ahead or behind + ^ static VOID word(struct vars *, int, struct state *, struct state *); */ -static void -word(struct vars * v, - int dir, /* AHEAD or BEHIND */ - struct state * lp, - struct state * rp) +static VOID +word(v, dir, lp, rp) +struct vars *v; +int dir; /* AHEAD or BEHIND */ +struct state *lp; +struct state *rp; { assert(dir == AHEAD || dir == BEHIND); cloneouts(v->nfa, v->wordchrs, lp, rp, dir); @@ -1202,20 +1139,20 @@ word(struct vars * v, } /* - * scannum - scan a number + - scannum - scan a number + ^ static int scannum(struct vars *); */ -static int /* value, <= DUPMAX */ -scannum(struct vars * v) +static int /* value, <= DUPMAX */ +scannum(v) +struct vars *v; { - int n = 0; + int n = 0; - while (SEE(DIGIT) && n < DUPMAX) - { - n = n * 10 + v->nextvalue; + while (SEE(DIGIT) && n < DUPMAX) { + n = n*10 + v->nextvalue; NEXT(); } - if (SEE(DIGIT) || n > DUPMAX) - { + if (SEE(DIGIT) || n > DUPMAX) { ERR(REG_BADBR); return 0; } @@ -1223,105 +1160,107 @@ scannum(struct vars * v) } /* - * repeat - replicate subNFA for quantifiers - * + - repeat - replicate subNFA for quantifiers * The duplication sequences used here are chosen carefully so that any * pointers starting out pointing into the subexpression end up pointing into * the last occurrence. (Note that it may not be strung between the same * left and right end states, however!) This used to be important for the * subRE tree, although the important bits are now handled by the in-line * code in parse(), and when this is called, it doesn't matter any more. + ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int); */ -static void -repeat(struct vars * v, - struct state * lp, - struct state * rp, - int m, - int n) +static VOID +repeat(v, lp, rp, m, n) +struct vars *v; +struct state *lp; +struct state *rp; +int m; +int n; { -#define SOME 2 -#define INF 3 -#define PAIR(x, y) ((x)*4 + (y)) -#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) - const int rm = REDUCE(m); - const int rn = REDUCE(n); +# define SOME 2 +# define INF 3 +# define PAIR(x, y) ((x)*4 + (y)) +# define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) + CONST int rm = REDUCE(m); + CONST int rn = REDUCE(n); struct state *s; struct state *s2; - switch (PAIR(rm, rn)) - { - case PAIR(0, 0): /* empty string */ - delsub(v->nfa, lp, rp); - EMPTYARC(lp, rp); - break; - case PAIR(0, 1): /* do as x| */ - EMPTYARC(lp, rp); - break; - case PAIR(0, SOME): /* do as x{1,n}| */ - repeat(v, lp, rp, 1, n); - NOERR(); - EMPTYARC(lp, rp); - break; - case PAIR(0, INF): /* loop x around */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s); - EMPTYARC(lp, s); - EMPTYARC(s, rp); - break; - case PAIR(1, 1): /* no action required */ - break; - case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, 1, n - 1); - NOERR(); - EMPTYARC(lp, s); - break; - case PAIR(1, INF): /* add loopback arc */ - s = newstate(v->nfa); - s2 = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - moveins(v->nfa, rp, s2); - EMPTYARC(lp, s); - EMPTYARC(s2, rp); - EMPTYARC(s2, s); - break; - case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m - 1, n - 1); - break; - case PAIR(SOME, INF): /* do as x{m-1,}x */ - s = newstate(v->nfa); - NOERR(); - moveouts(v->nfa, lp, s); - dupnfa(v->nfa, s, rp, lp, s); - NOERR(); - repeat(v, lp, s, m - 1, n); - break; - default: - ERR(REG_ASSERT); - break; + switch (PAIR(rm, rn)) { + case PAIR(0, 0): /* empty string */ + delsub(v->nfa, lp, rp); + EMPTYARC(lp, rp); + break; + case PAIR(0, 1): /* do as x| */ + EMPTYARC(lp, rp); + break; + case PAIR(0, SOME): /* do as x{1,n}| */ + repeat(v, lp, rp, 1, n); + NOERR(); + EMPTYARC(lp, rp); + break; + case PAIR(0, INF): /* loop x around */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + moveins(v->nfa, rp, s); + EMPTYARC(lp, s); + EMPTYARC(s, rp); + break; + case PAIR(1, 1): /* no action required */ + break; + case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, 1, n-1); + NOERR(); + EMPTYARC(lp, s); + break; + case PAIR(1, INF): /* add loopback arc */ + s = newstate(v->nfa); + s2 = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + moveins(v->nfa, rp, s2); + EMPTYARC(lp, s); + EMPTYARC(s2, rp); + EMPTYARC(s2, s); + break; + case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, m-1, n-1); + break; + case PAIR(SOME, INF): /* do as x{m-1,}x */ + s = newstate(v->nfa); + NOERR(); + moveouts(v->nfa, lp, s); + dupnfa(v->nfa, s, rp, lp, s); + NOERR(); + repeat(v, lp, s, m-1, n); + break; + default: + ERR(REG_ASSERT); + break; } } /* - * bracket - handle non-complemented bracket expression + - bracket - handle non-complemented bracket expression * Also called from cbracket for complemented bracket expressions. + ^ static VOID bracket(struct vars *, struct state *, struct state *); */ -static void -bracket(struct vars * v, - struct state * lp, - struct state * rp) +static VOID +bracket(v, lp, rp) +struct vars *v; +struct state *lp; +struct state *rp; { assert(SEE('[')); NEXT(); @@ -1332,29 +1271,31 @@ bracket(struct vars * v, } /* - * cbracket - handle complemented bracket expression + - cbracket - handle complemented bracket expression * We do it by calling bracket() with dummy endpoints, and then complementing - * the result. The alternative would be to invoke rainbow(), and then delete + * the result. The alternative would be to invoke rainbow(), and then delete * arcs as the b.e. is seen... but that gets messy. + ^ static VOID cbracket(struct vars *, struct state *, struct state *); */ -static void -cbracket(struct vars * v, - struct state * lp, - struct state * rp) +static VOID +cbracket(v, lp, rp) +struct vars *v; +struct state *lp; +struct state *rp; { struct state *left = newstate(v->nfa); struct state *right = newstate(v->nfa); struct state *s; - struct arc *a; /* arc from lp */ - struct arc *ba; /* arc from left, from bracket() */ - struct arc *pa; /* MCCE-prototype arc */ - color co; - chr *p; - int i; + struct arc *a; /* arc from lp */ + struct arc *ba; /* arc from left, from bracket() */ + struct arc *pa; /* MCCE-prototype arc */ + color co; + chr *p; + int i; NOERR(); bracket(v, left, right); - if (v->cflags & REG_NLSTOP) + if (v->cflags®_NLSTOP) newarc(v->nfa, PLAIN, v->nlcolor, left, right); NOERR(); @@ -1363,8 +1304,7 @@ cbracket(struct vars * v, /* easy part of complementing */ colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp); NOERR(); - if (v->mcces == NULL) - { /* no MCCEs -- we're done */ + if (v->mcces == NULL) { /* no MCCEs -- we're done */ dropstate(v->nfa, left); assert(right->nins == 0); freestate(v->nfa, right); @@ -1373,39 +1313,33 @@ cbracket(struct vars * v, /* but complementing gets messy in the presence of MCCEs... */ NOTE(REG_ULOCALE); - for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) - { + for (p = v->mcces->chrs, i = v->mcces->nchrs; i > 0; p++, i--) { co = GETCOLOR(v->cm, *p); a = findarc(lp, PLAIN, co); ba = findarc(left, PLAIN, co); - if (ba == NULL) - { + if (ba == NULL) { assert(a != NULL); freearc(v->nfa, a); - } - else + } else { assert(a == NULL); + } s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); NOERR(); pa = findarc(v->mccepbegin, PLAIN, co); assert(pa != NULL); - if (ba == NULL) - { /* easy case, need all of them */ + if (ba == NULL) { /* easy case, need all of them */ cloneouts(v->nfa, pa->to, s, rp, PLAIN); newarc(v->nfa, '$', 1, s, rp); newarc(v->nfa, '$', 0, s, rp); colorcomplement(v->nfa, v->cm, AHEAD, pa->to, s, rp); - } - else - { /* must be selective */ - if (findarc(ba->to, '$', 1) == NULL) - { + } else { /* must be selective */ + if (findarc(ba->to, '$', 1) == NULL) { newarc(v->nfa, '$', 1, s, rp); newarc(v->nfa, '$', 0, s, rp); colorcomplement(v->nfa, v->cm, AHEAD, pa->to, - s, rp); + s, rp); } for (pa = pa->to->outs; pa != NULL; pa = pa->outchain) if (findarc(ba->to, PLAIN, pa->co) == NULL) @@ -1422,135 +1356,132 @@ cbracket(struct vars * v, assert(right->nins == 0); freestate(v->nfa, right); } - + /* - * brackpart - handle one item (or range) within a bracket expression + - brackpart - handle one item (or range) within a bracket expression + ^ static VOID brackpart(struct vars *, struct state *, struct state *); */ -static void -brackpart(struct vars * v, - struct state * lp, - struct state * rp) +static VOID +brackpart(v, lp, rp) +struct vars *v; +struct state *lp; +struct state *rp; { - celt startc; - celt endc; + celt startc; + celt endc; struct cvec *cv; - chr *startp; - chr *endp; - chr c[1]; + chr *startp; + chr *endp; + chr c[1]; /* parse something, get rid of special cases, take shortcuts */ - switch (v->nexttype) - { - case RANGE: /* a-b-c or other botch */ - ERR(REG_ERANGE); + switch (v->nexttype) { + case RANGE: /* a-b-c or other botch */ + ERR(REG_ERANGE); + return; + break; + case PLAIN: + c[0] = v->nextvalue; + NEXT(); + /* shortcut for ordinary chr (not range, not MCCE leader) */ + if (!SEE(RANGE) && !ISCELEADER(v, c[0])) { + onechr(v, c[0], lp, rp); return; + } + startc = element(v, c, c+1); + NOERR(); + break; + case COLLEL: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + startc = element(v, startp, endp); + NOERR(); + break; + case ECLASS: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECOLLATE); + NOERR(); + startc = element(v, startp, endp); + NOERR(); + cv = eclass(v, startc, (v->cflags®_ICASE)); + NOERR(); + dovec(v, cv, lp, rp); + return; break; + case CCLASS: + startp = v->now; + endp = scanplain(v); + INSIST(startp < endp, REG_ECTYPE); + NOERR(); + cv = cclass(v, startp, endp, (v->cflags®_ICASE)); + NOERR(); + dovec(v, cv, lp, rp); + return; + break; + default: + ERR(REG_ASSERT); + return; + break; + } + + if (SEE(RANGE)) { + NEXT(); + switch (v->nexttype) { case PLAIN: + case RANGE: c[0] = v->nextvalue; NEXT(); - /* shortcut for ordinary chr (not range, not MCCE leader) */ - if (!SEE(RANGE) && !ISCELEADER(v, c[0])) - { - onechr(v, c[0], lp, rp); - return; - } - startc = element(v, c, c + 1); + endc = element(v, c, c+1); NOERR(); - break; + break; case COLLEL: startp = v->now; endp = scanplain(v); INSIST(startp < endp, REG_ECOLLATE); NOERR(); - startc = element(v, startp, endp); + endc = element(v, startp, endp); NOERR(); break; - case ECLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - startc = element(v, startp, endp); - NOERR(); - cv = eclass(v, startc, (v->cflags & REG_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - break; - case CCLASS: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECTYPE); - NOERR(); - cv = cclass(v, startp, endp, (v->cflags & REG_ICASE)); - NOERR(); - dovec(v, cv, lp, rp); - return; - break; default: - ERR(REG_ASSERT); + ERR(REG_ERANGE); return; - break; - } - - if (SEE(RANGE)) - { - NEXT(); - switch (v->nexttype) - { - case PLAIN: - case RANGE: - c[0] = v->nextvalue; - NEXT(); - endc = element(v, c, c + 1); - NOERR(); - break; - case COLLEL: - startp = v->now; - endp = scanplain(v); - INSIST(startp < endp, REG_ECOLLATE); - NOERR(); - endc = element(v, startp, endp); - NOERR(); - break; - default: - ERR(REG_ERANGE); - return; - break; - } - } - else + break; + } + } else endc = startc; /* - * Ranges are unportable. Actually, standard C does guarantee that - * digits are contiguous, but making that an exception is just too - * complicated. - */ + * Ranges are unportable. Actually, standard C does + * guarantee that digits are contiguous, but making + * that an exception is just too complicated. + */ if (startc != endc) NOTE(REG_UUNPORT); - cv = range(v, startc, endc, (v->cflags & REG_ICASE)); + cv = range(v, startc, endc, (v->cflags®_ICASE)); NOERR(); dovec(v, cv, lp, rp); } /* - * scanplain - scan PLAIN contents of [. etc. - * + - scanplain - scan PLAIN contents of [. etc. * Certain bits of trickery in lex.c know that this code does not try * to look past the final bracket of the [. etc. + ^ static chr *scanplain(struct vars *); */ -static chr * /* just after end of sequence */ -scanplain(struct vars * v) +static chr * /* just after end of sequence */ +scanplain(v) +struct vars *v; { - chr *endp; + chr *endp; assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS)); NEXT(); endp = v->now; - while (SEE(PLAIN)) - { + while (SEE(PLAIN)) { endp = v->now; NEXT(); } @@ -1562,17 +1493,19 @@ scanplain(struct vars * v) } /* - * leaders - process a cvec of collating elements to also include leaders + - leaders - process a cvec of collating elements to also include leaders * Also gives all characters involved their own colors, which is almost * certainly necessary, and sets up little disconnected subNFA. + ^ static VOID leaders(struct vars *, struct cvec *); */ -static void -leaders(struct vars * v, - struct cvec * cv) +static VOID +leaders(v, cv) +struct vars *v; +struct cvec *cv; { - int mcce; - chr *p; - chr leader; + int mcce; + chr *p; + chr leader; struct state *s; struct arc *a; @@ -1580,20 +1513,16 @@ leaders(struct vars * v, v->mccepend = newstate(v->nfa); NOERR(); - for (mcce = 0; mcce < cv->nmcces; mcce++) - { + for (mcce = 0; mcce < cv->nmcces; mcce++) { p = cv->mcces[mcce]; leader = *p; - if (!haschr(cv, leader)) - { + if (!haschr(cv, leader)) { addchr(cv, leader); s = newstate(v->nfa); newarc(v->nfa, PLAIN, subcolor(v->cm, leader), - v->mccepbegin, s); + v->mccepbegin, s); okcolors(v->nfa, v->cm); - } - else - { + } else { a = findarc(v->mccepbegin, PLAIN, GETCOLOR(v->cm, leader)); assert(a != NULL); @@ -1601,25 +1530,25 @@ leaders(struct vars * v, assert(s != v->mccepend); } p++; - assert(*p != 0 && *(p + 1) == 0); /* only 2-char MCCEs for - * now */ + assert(*p != 0 && *(p+1) == 0); /* only 2-char MCCEs for now */ newarc(v->nfa, PLAIN, subcolor(v->cm, *p), s, v->mccepend); okcolors(v->nfa, v->cm); } } /* - * onechr - fill in arcs for a plain character, and possible case complements + - onechr - fill in arcs for a plain character, and possible case complements * This is mostly a shortcut for efficient handling of the common case. + ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *); */ -static void -onechr(struct vars * v, - chr c, - struct state * lp, - struct state * rp) +static VOID +onechr(v, c, lp, rp) +struct vars *v; +pchr c; +struct state *lp; +struct state *rp; { - if (!(v->cflags & REG_ICASE)) - { + if (!(v->cflags®_ICASE)) { newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp); return; } @@ -1629,68 +1558,61 @@ onechr(struct vars * v, } /* - * dovec - fill in arcs for each element of a cvec + - dovec - fill in arcs for each element of a cvec * This one has to handle the messy cases, like MCCEs and MCCE leaders. + ^ static VOID dovec(struct vars *, struct cvec *, struct state *, + ^ struct state *); */ -static void -dovec(struct vars * v, - struct cvec * cv, - struct state * lp, - struct state * rp) +static VOID +dovec(v, cv, lp, rp) +struct vars *v; +struct cvec *cv; +struct state *lp; +struct state *rp; { - chr ch, - from, - to; - celt ce; - chr *p; - int i; - color co; + chr ch, from, to; + celt ce; + chr *p; + int i; + color co; struct cvec *leads; struct arc *a; - struct arc *pa; /* arc in prototype */ + struct arc *pa; /* arc in prototype */ struct state *s; - struct state *ps; /* state in prototype */ + struct state *ps; /* state in prototype */ /* need a place to store leaders, if any */ - if (nmcces(v) > 0) - { + if (nmcces(v) > 0) { assert(v->mcces != NULL); - if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) - { + if (v->cv2 == NULL || v->cv2->nchrs < v->mcces->nchrs) { if (v->cv2 != NULL) free(v->cv2); v->cv2 = newcvec(v->mcces->nchrs, 0, v->mcces->nmcces); NOERR(); leads = v->cv2; - } - else + } else leads = clearcvec(v->cv2); - } - else + } else leads = NULL; /* first, get the ordinary characters out of the way */ - for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) - { + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { ch = *p; if (!ISCELEADER(v, ch)) newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); - else - { + else { assert(singleton(v->cm, ch)); assert(leads != NULL); if (!haschr(leads, ch)) addchr(leads, ch); - } + } } /* and the ranges */ - for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) - { + for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { from = *p; - to = *(p + 1); - while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) - { + to = *(p+1); + while (from <= to && (ce = nextleader(v, from, to)) != NOCELT) { if (from < ce) subrange(v, from, ce - 1, lp, rp); assert(singleton(v->cm, ce)); @@ -1708,14 +1630,12 @@ dovec(struct vars * v, /* deal with the MCCE leaders */ NOTE(REG_ULOCALE); - for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) - { + for (p = leads->chrs, i = leads->nchrs; i > 0; p++, i--) { co = GETCOLOR(v->cm, *p); a = findarc(lp, PLAIN, co); if (a != NULL) s = a->to; - else - { + else { s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); @@ -1731,12 +1651,10 @@ dovec(struct vars * v, } /* and the MCCEs */ - for (i = 0; i < cv->nmcces; i++) - { + for (i = 0; i < cv->nmcces; i++) { p = cv->mcces[i]; assert(singleton(v->cm, *p)); - if (!singleton(v->cm, *p)) - { + if (!singleton(v->cm, *p)) { ERR(REG_ASSERT); return; } @@ -1745,41 +1663,41 @@ dovec(struct vars * v, a = findarc(lp, PLAIN, co); if (a != NULL) s = a->to; - else - { + else { s = newstate(v->nfa); NOERR(); newarc(v->nfa, PLAIN, co, lp, s); NOERR(); } - assert(*p != 0); /* at least two chars */ + assert(*p != 0); /* at least two chars */ assert(singleton(v->cm, *p)); ch = *p++; co = GETCOLOR(v->cm, ch); - assert(*p == 0); /* and only two, for now */ + assert(*p == 0); /* and only two, for now */ newarc(v->nfa, PLAIN, co, s, rp); NOERR(); } } /* - * nextleader - find next MCCE leader within range + - nextleader - find next MCCE leader within range + ^ static celt nextleader(struct vars *, pchr, pchr); */ -static celt /* NOCELT means none */ -nextleader(struct vars * v, - chr from, - chr to) +static celt /* NOCELT means none */ +nextleader(v, from, to) +struct vars *v; +pchr from; +pchr to; { - int i; - chr *p; - chr ch; - celt it = NOCELT; + int i; + chr *p; + chr ch; + celt it = NOCELT; if (v->mcces == NULL) return it; - for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) - { + for (i = v->mcces->nchrs, p = v->mcces->chrs; i > 0; i--, p++) { ch = *p; if (from <= ch && ch <= to) if (it == NOCELT || ch < it) @@ -1789,30 +1707,30 @@ nextleader(struct vars * v, } /* - * wordchrs - set up word-chr list for word-boundary stuff, if needed - * + - wordchrs - set up word-chr list for word-boundary stuff, if needed * The list is kept as a bunch of arcs between two dummy states; it's * disposed of by the unreachable-states sweep in NFA optimization. * Does NEXT(). Must not be called from any unusual lexical context. * This should be reconciled with the \w etc. handling in lex.c, and * should be cleaned up to reduce dependencies on input scanning. + ^ static VOID wordchrs(struct vars *); */ -static void -wordchrs(struct vars * v) +static VOID +wordchrs(v) +struct vars *v; { struct state *left; struct state *right; - if (v->wordchrs != NULL) - { - NEXT(); /* for consistency */ + if (v->wordchrs != NULL) { + NEXT(); /* for consistency */ return; } left = newstate(v->nfa); right = newstate(v->nfa); NOERR(); - /* fine point: implemented with [::], and lexer will set REG_ULOCALE */ + /* fine point: implemented with [::], and lexer will set REG_ULOCALE */ lexword(v); NEXT(); assert(v->savenow != NULL && SEE('[')); @@ -1824,28 +1742,29 @@ wordchrs(struct vars * v) } /* - * subre - allocate a subre + - subre - allocate a subre + ^ static struct subre *subre(struct vars *, int, int, struct state *, + ^ struct state *); */ static struct subre * -subre(struct vars * v, - int op, - int flags, - struct state * begin, - struct state * end) +subre(v, op, flags, begin, end) +struct vars *v; +int op; +int flags; +struct state *begin; +struct state *end; { struct subre *ret; ret = v->treefree; if (ret != NULL) v->treefree = ret->left; - else - { - ret = (struct subre *) MALLOC(sizeof(struct subre)); - if (ret == NULL) - { + else { + ret = (struct subre *)MALLOC(sizeof(struct subre)); + if (ret == NULL) { ERR(REG_ESPACE); return NULL; - } + } ret->chain = v->treechain; v->treechain = ret; } @@ -1867,11 +1786,13 @@ subre(struct vars * v, } /* - * freesubre - free a subRE subtree + - freesubre - free a subRE subtree + ^ static VOID freesubre(struct vars *, struct subre *); */ -static void -freesubre(struct vars * v, /* might be NULL */ - struct subre * sr) +static VOID +freesubre(v, sr) +struct vars *v; /* might be NULL */ +struct subre *sr; { if (sr == NULL) return; @@ -1885,11 +1806,13 @@ freesubre(struct vars * v, /* might be NULL */ } /* - * freesrnode - free one node in a subRE subtree + - freesrnode - free one node in a subRE subtree + ^ static VOID freesrnode(struct vars *, struct subre *); */ -static void -freesrnode(struct vars * v, /* might be NULL */ - struct subre * sr) +static VOID +freesrnode(v, sr) +struct vars *v; /* might be NULL */ +struct subre *sr; { if (sr == NULL) return; @@ -1898,21 +1821,21 @@ freesrnode(struct vars * v, /* might be NULL */ freecnfa(&sr->cnfa); sr->flags = 0; - if (v != NULL) - { + if (v != NULL) { sr->left = v->treefree; v->treefree = sr; - } - else + } else FREE(sr); } /* - * optst - optimize a subRE subtree + - optst - optimize a subRE subtree + ^ static VOID optst(struct vars *, struct subre *); */ -static void -optst(struct vars * v, - struct subre * t) +static VOID +optst(v, t) +struct vars *v; +struct subre *t; { if (t == NULL) return; @@ -1925,18 +1848,20 @@ optst(struct vars * v, } /* - * numst - number tree nodes (assigning retry indexes) + - numst - number tree nodes (assigning retry indexes) + ^ static int numst(struct subre *, int); */ -static int /* next number */ -numst(struct subre * t, - int start) /* starting point for subtree numbers */ +static int /* next number */ +numst(t, start) +struct subre *t; +int start; /* starting point for subtree numbers */ { - int i; + int i; assert(t != NULL); i = start; - t->retry = (short) i++; + t->retry = (short)i++; if (t->left != NULL) i = numst(t->left, i); if (t->right != NULL) @@ -1945,10 +1870,12 @@ numst(struct subre * t, } /* - * markst - mark tree nodes as INUSE + - markst - mark tree nodes as INUSE + ^ static VOID markst(struct subre *); */ -static void -markst(struct subre * t) +static VOID +markst(t) +struct subre *t; { assert(t != NULL); @@ -1960,69 +1887,68 @@ markst(struct subre * t) } /* - * cleanst - free any tree nodes not marked INUSE + - cleanst - free any tree nodes not marked INUSE + ^ static VOID cleanst(struct vars *); */ -static void -cleanst(struct vars * v) +static VOID +cleanst(v) +struct vars *v; { struct subre *t; struct subre *next; - for (t = v->treechain; t != NULL; t = next) - { + for (t = v->treechain; t != NULL; t = next) { next = t->chain; - if (!(t->flags & INUSE)) + if (!(t->flags&INUSE)) FREE(t); - } + } v->treechain = NULL; - v->treefree = NULL; /* just on general principles */ + v->treefree = NULL; /* just on general principles */ } /* - * nfatree - turn a subRE subtree into a tree of compacted NFAs + - nfatree - turn a subRE subtree into a tree of compacted NFAs + ^ static long nfatree(struct vars *, struct subre *, FILE *); */ -static long /* optimize results from top node */ -nfatree(struct vars * v, - struct subre * t, - FILE *f) /* for debug output */ +static long /* optimize results from top node */ +nfatree(v, t, f) +struct vars *v; +struct subre *t; +FILE *f; /* for debug output */ { assert(t != NULL && t->begin != NULL); if (t->left != NULL) - (DISCARD) nfatree(v, t->left, f); + (DISCARD)nfatree(v, t->left, f); if (t->right != NULL) - (DISCARD) nfatree(v, t->right, f); + (DISCARD)nfatree(v, t->right, f); return nfanode(v, t, f); } /* - * nfanode - do one NFA for nfatree + - nfanode - do one NFA for nfatree + ^ static long nfanode(struct vars *, struct subre *, FILE *); */ -static long /* optimize results */ -nfanode(struct vars * v, - struct subre * t, - FILE *f) /* for debug output */ +static long /* optimize results */ +nfanode(v, t, f) +struct vars *v; +struct subre *t; +FILE *f; /* for debug output */ { struct nfa *nfa; - long ret = 0; + long ret = 0; + char idbuf[50]; assert(t->begin != NULL); -#ifdef REG_DEBUG if (f != NULL) - { - char idbuf[50]; - fprintf(f, "\n\n\n========= TREE NODE %s ==========\n", - stid(t, idbuf, sizeof(idbuf))); - } -#endif + stid(t, idbuf, sizeof(idbuf))); nfa = newnfa(v, v->cm, v->nfa); NOERRZ(); dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final); - if (!ISERR()) - { + if (!ISERR()) { specialcolors(nfa); ret = optimize(nfa, f); } @@ -2034,31 +1960,29 @@ nfanode(struct vars * v, } /* - * newlacon - allocate a lookahead-constraint subRE + - newlacon - allocate a lookahead-constraint subRE + ^ static int newlacon(struct vars *, struct state *, struct state *, int); */ -static int /* lacon number */ -newlacon(struct vars * v, - struct state * begin, - struct state * end, - int pos) +static int /* lacon number */ +newlacon(v, begin, end, pos) +struct vars *v; +struct state *begin; +struct state *end; +int pos; { - int n; + int n; struct subre *sub; - if (v->nlacons == 0) - { - v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre)); - n = 1; /* skip 0th */ + if (v->nlacons == 0) { + v->lacons = (struct subre *)MALLOC(2 * sizeof(struct subre)); + n = 1; /* skip 0th */ v->nlacons = 2; - } - else - { - v->lacons = (struct subre *) REALLOC(v->lacons, - (v->nlacons + 1) * sizeof(struct subre)); + } else { + v->lacons = (struct subre *)REALLOC(v->lacons, + (v->nlacons+1)*sizeof(struct subre)); n = v->nlacons++; - } - if (v->lacons == NULL) - { + } + if (v->lacons == NULL) { ERR(REG_ESPACE); return 0; } @@ -2071,14 +1995,16 @@ newlacon(struct vars * v, } /* - * freelacons - free lookahead-constraint subRE vector + - freelacons - free lookahead-constraint subRE vector + ^ static VOID freelacons(struct subre *, int); */ -static void -freelacons(struct subre * subs, - int n) +static VOID +freelacons(subs, n) +struct subre *subs; +int n; { struct subre *sub; - int i; + int i; assert(n > 0); for (sub = subs + 1, i = n - 1; i > 0; sub++, i--) /* no 0th */ @@ -2088,24 +2014,26 @@ freelacons(struct subre * subs, } /* - * rfree - free a whole RE (insides of regfree) + - rfree - free a whole RE (insides of regfree) + ^ static VOID rfree(regex_t *); */ -static void -rfree(regex_t *re) +static VOID +rfree(re) +regex_t *re; { struct guts *g; if (re == NULL || re->re_magic != REMAGIC) return; - re->re_magic = 0; /* invalidate RE */ - g = (struct guts *) re->re_guts; + re->re_magic = 0; /* invalidate RE */ + g = (struct guts *)re->re_guts; re->re_guts = NULL; re->re_fns = NULL; g->magic = 0; freecm(&g->cmap); if (g->tree != NULL) - freesubre((struct vars *) NULL, g->tree); + freesubre((struct vars *)NULL, g->tree); if (g->lacons != NULL) freelacons(g->lacons, g->nlacons); if (!NULLCNFA(g->search)) @@ -2113,58 +2041,59 @@ rfree(regex_t *re) FREE(g); } -#ifdef REG_DEBUG - /* - * dump - dump an RE in human-readable form + - dump - dump an RE in human-readable form + ^ static VOID dump(regex_t *, FILE *); */ -static void -dump(regex_t *re, - FILE *f) +static VOID +dump(re, f) +regex_t *re; +FILE *f; { +#ifdef REG_DEBUG struct guts *g; - int i; + int i; if (re->re_magic != REMAGIC) fprintf(f, "bad magic number (0x%x not 0x%x)\n", re->re_magic, - REMAGIC); - if (re->re_guts == NULL) - { + REMAGIC); + if (re->re_guts == NULL) { fprintf(f, "NULL guts!!!\n"); return; } - g = (struct guts *) re->re_guts; + g = (struct guts *)re->re_guts; if (g->magic != GUTSMAGIC) fprintf(f, "bad guts magic number (0x%x not 0x%x)\n", g->magic, - GUTSMAGIC); + GUTSMAGIC); fprintf(f, "\n\n\n========= DUMP ==========\n"); - fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", - re->re_nsub, re->re_info, re->re_csize, g->ntree); + fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n", + re->re_nsub, re->re_info, re->re_csize, g->ntree); dumpcolors(&g->cmap, f); - if (!NULLCNFA(g->search)) - { + if (!NULLCNFA(g->search)) { printf("\nsearch:\n"); dumpcnfa(&g->search, f); } - for (i = 1; i < g->nlacons; i++) - { + for (i = 1; i < g->nlacons; i++) { fprintf(f, "\nla%d (%s):\n", i, (g->lacons[i].subno) ? "positive" : "negative"); dumpcnfa(&g->lacons[i].cnfa, f); } fprintf(f, "\n"); dumpst(g->tree, f, 0); +#endif } /* - * dumpst - dump a subRE tree + - dumpst - dump a subRE tree + ^ static VOID dumpst(struct subre *, FILE *, int); */ -static void -dumpst(struct subre * t, - FILE *f, - int nfapresent) /* is the original NFA still around? */ +static VOID +dumpst(t, f, nfapresent) +struct subre *t; +FILE *f; +int nfapresent; /* is the original NFA still around? */ { if (t == NULL) fprintf(f, "null tree\n"); @@ -2174,45 +2103,45 @@ dumpst(struct subre * t, } /* - * stdump - recursive guts of dumpst + - stdump - recursive guts of dumpst + ^ static VOID stdump(struct subre *, FILE *, int); */ -static void -stdump(struct subre * t, - FILE *f, - int nfapresent) /* is the original NFA still around? */ +static VOID +stdump(t, f, nfapresent) +struct subre *t; +FILE *f; +int nfapresent; /* is the original NFA still around? */ { - char idbuf[50]; + char idbuf[50]; fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op); - if (t->flags & LONGER) + if (t->flags&LONGER) fprintf(f, " longest"); - if (t->flags & SHORTER) + if (t->flags&SHORTER) fprintf(f, " shortest"); - if (t->flags & MIXED) + if (t->flags&MIXED) fprintf(f, " hasmixed"); - if (t->flags & CAP) + if (t->flags&CAP) fprintf(f, " hascapture"); - if (t->flags & BACKR) + if (t->flags&BACKR) fprintf(f, " hasbackref"); - if (!(t->flags & INUSE)) + if (!(t->flags&INUSE)) fprintf(f, " UNUSED"); if (t->subno != 0) fprintf(f, " (#%d)", t->subno); - if (t->min != 1 || t->max != 1) - { + if (t->min != 1 || t->max != 1) { fprintf(f, " {%d,", t->min); if (t->max != INFINITY) fprintf(f, "%d", t->max); fprintf(f, "}"); } if (nfapresent) - fprintf(f, " %ld-%ld", (long) t->begin->no, (long) t->end->no); + fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no); if (t->left != NULL) fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf))); if (t->right != NULL) fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf))); - if (!NULLCNFA(t->cnfa)) - { + if (!NULLCNFA(t->cnfa)) { fprintf(f, "\n"); dumpcnfa(&t->cnfa, f); fprintf(f, "\n"); @@ -2224,24 +2153,24 @@ stdump(struct subre * t, } /* - * stid - identify a subtree node for dumping + - stid - identify a subtree node for dumping + ^ static char *stid(struct subre *, char *, size_t); */ -static char * /* points to buf or constant string */ -stid(struct subre * t, - char *buf, - size_t bufsize) +static char * /* points to buf or constant string */ +stid(t, buf, bufsize) +struct subre *t; +char *buf; +size_t bufsize; { /* big enough for hex int or decimal t->retry? */ - if (bufsize < sizeof(int) * 2 + 3 || bufsize < sizeof(t->retry) * 3 + 1) + if (bufsize < sizeof(int)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) return "unable"; if (t->retry != 0) sprintf(buf, "%d", t->retry); else - sprintf(buf, "0x%x", (int) t); /* may lose bits, that's okay */ + sprintf(buf, "0x%x", (int)t); /* may lose bits, that's okay */ return buf; } -#endif /* REG_DEBUG */ - #include "regc_lex.c" #include "regc_color.c" diff --git a/src/regex/regcustom.h b/src/regex/regcustom.h index 8f51782d53..e258acd540 100644 --- a/src/regex/regcustom.h +++ b/src/regex/regcustom.h @@ -1,22 +1,19 @@ -#ifndef _REGEX_CUSTOM_H_ -#define _REGEX_CUSTOM_H_ - /* - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,72 +24,97 @@ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Id$ */ /* headers if any */ +#include "tclInt.h" -/* FreeBSD, Watcom and DMars require this, CW doesn't have nor need it. */ -/* Others also don't seem to need it. If you have an error related to */ -/* (not) including please report details to */ -/* wx-dev@lists.wxwindows.org */ -#if defined(__UNIX__) || defined(__WATCOMC__) || defined(__DIGITALMARS__) -# include -#endif - -#include -#include -#include -#include -#include +/* overrides for regguts.h definitions, if any */ +#define FUNCPTR(name, args) (*name) _ANSI_ARGS_(args) +#define MALLOC(n) ckalloc(n) +#define FREE(p) ckfree(VS(p)) +#define REALLOC(p,n) ckrealloc(VS(p),n) -#include "wx/wxchar.h" -/** -* -* wx_wchar == wxChar -* -*/ -#define wx_wchar wxChar -/* overrides for regguts.h definitions, if any */ -#define FUNCPTR(name, args) (*name) args -#define MALLOC(n) malloc(n) -#define FREE(p) free(VS(p)) -#define REALLOC(p,n) realloc(VS(p),n) +/* + * Do not insert extras between the "begin" and "end" lines -- this + * chunk is automatically extracted to be fitted into regex.h. + */ +/* --- begin --- */ +/* ensure certain things don't sneak in from system headers */ +#ifdef __REG_WIDE_T +#undef __REG_WIDE_T +#endif +#ifdef __REG_WIDE_COMPILE +#undef __REG_WIDE_COMPILE +#endif +#ifdef __REG_WIDE_EXEC +#undef __REG_WIDE_EXEC +#endif +#ifdef __REG_REGOFF_T +#undef __REG_REGOFF_T +#endif +#ifdef __REG_VOID_T +#undef __REG_VOID_T +#endif +#ifdef __REG_CONST +#undef __REG_CONST +#endif +#ifdef __REG_NOFRONT +#undef __REG_NOFRONT +#endif +#ifdef __REG_NOCHAR +#undef __REG_NOCHAR +#endif +/* interface types */ +#define __REG_WIDE_T Tcl_UniChar +#define __REG_REGOFF_T long /* not really right, but good enough... */ +#define __REG_VOID_T VOID +#define __REG_CONST CONST +/* names and declarations */ +#define __REG_WIDE_COMPILE TclReComp +#define __REG_WIDE_EXEC TclReExec +#define __REG_NOFRONT /* don't want regcomp() and regexec() */ +#define __REG_NOCHAR /* or the char versions */ +#define regfree TclReFree +#define regerror TclReError +/* --- end --- */ -/* internal character type and related */ -typedef wx_wchar chr; /* the type itself */ -typedef unsigned long uchr; /* unsigned type that will hold a chr */ -typedef long celt; /* type to hold chr, MCCE number, or - * NOCELT */ -#define NOCELT (-1) /* celt value which is not valid chr or - * MCCE */ -#define CHR(c) ((unsigned char) (c)) /* turn char literal into chr - * literal */ -#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ -#if wxUSE_WCHAR_T -# define CHRBITS (SIZEOF_WCHAR_T << 3) /* bits in a chr; must not use sizeof */ -# define CHR_MAX ((1 << CHRBITS) - 1) -# define CHR_MIN 0x00000000 /* smallest and largest chr; the value */ -#else /*ANSI*/ -# define CHRBITS 8 -# define CHR_MAX 0xFF -# define CHR_MIN 0x00 -#endif /*wxUSE_WCHAR_T*/ +/* internal character type and related */ +typedef Tcl_UniChar chr; /* the type itself */ +typedef int pchr; /* what it promotes to */ +typedef unsigned uchr; /* unsigned type that will hold a chr */ +typedef int celt; /* type to hold chr, MCCE number, or NOCELT */ +#define NOCELT (-1) /* celt value which is not valid chr or MCCE */ +#define CHR(c) (UCHAR(c)) /* turn char literal into chr literal */ +#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ +#if TCL_UTF_MAX > 3 +#define CHRBITS 32 /* bits in a chr; must not use sizeof */ +#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */ +#define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ +#else +#define CHRBITS 16 /* bits in a chr; must not use sizeof */ +#define CHR_MIN 0x0000 /* smallest and largest chr; the value */ +#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */ +#endif /* functions operating on chr */ -#define iscalnum(x) wx_isalnum(x) -#define iscalpha(x) wx_isalpha(x) -#define iscdigit(x) wx_isdigit(x) -#define iscspace(x) wx_isspace(x) +#define iscalnum(x) Tcl_UniCharIsAlnum(x) +#define iscalpha(x) Tcl_UniCharIsAlpha(x) +#define iscdigit(x) Tcl_UniCharIsDigit(x) +#define iscspace(x) Tcl_UniCharIsSpace(x) -extern int wx_strlen(const wx_wchar* szString); +/* name the external functions */ +#define compile TclReComp +#define exec TclReExec + +/* enable/disable debugging code (by whether REG_DEBUG is defined or not) */ +#if 0 /* no debug unless requested by makefile */ +#define REG_DEBUG /* */ +#endif /* and pick up the standard header */ #include "regex.h" - -#endif /* _REGEX_CUSTOM_H_ */ diff --git a/src/regex/rege_dfa.c b/src/regex/rege_dfa.c index 5347b90d73..313892cc8f 100644 --- a/src/regex/rege_dfa.c +++ b/src/regex/rege_dfa.c @@ -2,21 +2,21 @@ * DFA routines * This file is #included by regexec.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,27 +28,27 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header$ - * */ /* - * longest - longest-preferred matching engine + - longest - longest-preferred matching engine + ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); */ -static chr * /* endpoint, or NULL */ -longest(struct vars * v, /* used only for debug and exec flags */ - struct dfa * d, - chr *start, /* where the match should start */ - chr *stop, /* match must end at or before here */ - int *hitstopp) /* record whether hit v->stop, if non-NULL */ +static chr * /* endpoint, or NULL */ +longest(v, d, start, stop, hitstopp) +struct vars *v; /* used only for debug and exec flags */ +struct dfa *d; +chr *start; /* where the match should start */ +chr *stop; /* match must end at or before here */ +int *hitstopp; /* record whether hit v->stop, if non-NULL */ { - chr *cp; - chr *realstop = (stop == v->stop) ? stop : stop + 1; - color co; + chr *cp; + chr *realstop = (stop == v->stop) ? stop : stop + 1; + color co; struct sset *css; struct sset *ss; - chr *post; - int i; + chr *post; + int i; struct colormap *cm = d->cm; /* initialize */ @@ -59,15 +59,12 @@ longest(struct vars * v, /* used only for debug and exec flags */ /* startup */ FDEBUG(("+++ startup +++\n")); - if (cp == v->start) - { - co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long) co)); - } - else - { + if (cp == v->start) { + co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; + FDEBUG(("color %ld\n", (long)co)); + } else { co = GETCOLOR(cm, *(cp - 1)); - FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co)); + FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); } css = miss(v, d, css, co, cp, start); if (css == NULL) @@ -75,33 +72,29 @@ longest(struct vars * v, /* used only for debug and exec flags */ css->lastseen = cp; /* main loop */ - if (v->eflags & REG_FTRACE) - while (cp < realstop) - { + if (v->eflags®_FTRACE) + while (cp < realstop) { FDEBUG(("+++ at c%d +++\n", css - d->ssets)); co = GETCOLOR(cm, *cp); - FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co)); + FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); ss = css->outs[co]; - if (ss == NULL) - { - ss = miss(v, d, css, co, cp + 1, start); + if (ss == NULL) { + ss = miss(v, d, css, co, cp+1, start); if (ss == NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } cp++; ss->lastseen = cp; css = ss; } else - while (cp < realstop) - { + while (cp < realstop) { co = GETCOLOR(cm, *cp); ss = css->outs[co]; - if (ss == NULL) - { - ss = miss(v, d, css, co, cp + 1, start); + if (ss == NULL) { + ss = miss(v, d, css, co, cp+1, start); if (ss == NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } cp++; ss->lastseen = cp; @@ -110,15 +103,14 @@ longest(struct vars * v, /* used only for debug and exec flags */ /* shutdown */ FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); - if (cp == v->stop && stop == v->stop) - { + if (cp == v->stop && stop == v->stop) { if (hitstopp != NULL) *hitstopp = 1; - co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long) co)); + co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; + FDEBUG(("color %ld\n", (long)co)); ss = miss(v, d, css, co, cp, start); /* special case: match ended at eol? */ - if (ss != NULL && (ss->flags & POSTSTATE)) + if (ss != NULL && (ss->flags&POSTSTATE)) return cp; else if (ss != NULL) ss->lastseen = cp; /* to be tidy */ @@ -127,32 +119,34 @@ longest(struct vars * v, /* used only for debug and exec flags */ /* find last match, if any */ post = d->lastpost; for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) - if ((ss->flags & POSTSTATE) && post != ss->lastseen && - (post == NULL || post < ss->lastseen)) + if ((ss->flags&POSTSTATE) && post != ss->lastseen && + (post == NULL || post < ss->lastseen)) post = ss->lastseen; - if (post != NULL) /* found one */ + if (post != NULL) /* found one */ return post - 1; return NULL; } /* - * shortest - shortest-preferred matching engine + - shortest - shortest-preferred matching engine + ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, + ^ chr **, int *); */ -static chr * /* endpoint, or NULL */ -shortest(struct vars * v, - struct dfa * d, - chr *start, /* where the match should start */ - chr *min, /* match must end at or after here */ - chr *max, /* match must end at or before here */ - chr **coldp, /* store coldstart pointer here, if - * nonNULL */ - int *hitstopp) /* record whether hit v->stop, if non-NULL */ +static chr * /* endpoint, or NULL */ +shortest(v, d, start, min, max, coldp, hitstopp) +struct vars *v; +struct dfa *d; +chr *start; /* where the match should start */ +chr *min; /* match must end at or after here */ +chr *max; /* match must end at or before here */ +chr **coldp; /* store coldstart pointer here, if nonNULL */ +int *hitstopp; /* record whether hit v->stop, if non-NULL */ { - chr *cp; - chr *realmin = (min == v->stop) ? min : min + 1; - chr *realmax = (max == v->stop) ? max : max + 1; - color co; + chr *cp; + chr *realmin = (min == v->stop) ? min : min + 1; + chr *realmax = (max == v->stop) ? max : max + 1; + color co; struct sset *css; struct sset *ss; struct colormap *cm = d->cm; @@ -165,15 +159,12 @@ shortest(struct vars * v, /* startup */ FDEBUG(("--- startup ---\n")); - if (cp == v->start) - { - co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long) co)); - } - else - { + if (cp == v->start) { + co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; + FDEBUG(("color %ld\n", (long)co)); + } else { co = GETCOLOR(cm, *(cp - 1)); - FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co)); + FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); } css = miss(v, d, css, co, cp, start); if (css == NULL) @@ -182,116 +173,110 @@ shortest(struct vars * v, ss = css; /* main loop */ - if (v->eflags & REG_FTRACE) - while (cp < realmax) - { + if (v->eflags®_FTRACE) + while (cp < realmax) { FDEBUG(("--- at c%d ---\n", css - d->ssets)); co = GETCOLOR(cm, *cp); - FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co)); + FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); ss = css->outs[co]; - if (ss == NULL) - { - ss = miss(v, d, css, co, cp + 1, start); + if (ss == NULL) { + ss = miss(v, d, css, co, cp+1, start); if (ss == NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } cp++; ss->lastseen = cp; css = ss; - if ((ss->flags & POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ + if ((ss->flags&POSTSTATE) && cp >= realmin) + break; /* NOTE BREAK OUT */ } else - while (cp < realmax) - { + while (cp < realmax) { co = GETCOLOR(cm, *cp); ss = css->outs[co]; - if (ss == NULL) - { - ss = miss(v, d, css, co, cp + 1, start); + if (ss == NULL) { + ss = miss(v, d, css, co, cp+1, start); if (ss == NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } cp++; ss->lastseen = cp; css = ss; - if ((ss->flags & POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ + if ((ss->flags&POSTSTATE) && cp >= realmin) + break; /* NOTE BREAK OUT */ } if (ss == NULL) return NULL; - if (coldp != NULL) /* report last no-progress state set, if - * any */ + if (coldp != NULL) /* report last no-progress state set, if any */ *coldp = lastcold(v, d); - if ((ss->flags & POSTSTATE) && cp > min) - { + if ((ss->flags&POSTSTATE) && cp > min) { assert(cp >= realmin); cp--; - } - else if (cp == v->stop && max == v->stop) - { - co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long) co)); + } else if (cp == v->stop && max == v->stop) { + co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; + FDEBUG(("color %ld\n", (long)co)); ss = miss(v, d, css, co, cp, start); /* match might have ended at eol */ - if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL) + if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) *hitstopp = 1; } - if (ss == NULL || !(ss->flags & POSTSTATE)) + if (ss == NULL || !(ss->flags&POSTSTATE)) return NULL; return cp; } /* - * lastcold - determine last point at which no progress had been made + - lastcold - determine last point at which no progress had been made + ^ static chr *lastcold(struct vars *, struct dfa *); */ -static chr * /* endpoint, or NULL */ -lastcold(struct vars * v, - struct dfa * d) +static chr * /* endpoint, or NULL */ +lastcold(v, d) +struct vars *v; +struct dfa *d; { struct sset *ss; - chr *nopr; - int i; + chr *nopr; + int i; nopr = d->lastnopr; if (nopr == NULL) nopr = v->start; for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) - if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen) + if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) nopr = ss->lastseen; return nopr; } /* - * newdfa - set up a fresh DFA + - newdfa - set up a fresh DFA + ^ static struct dfa *newdfa(struct vars *, struct cnfa *, + ^ struct colormap *, struct smalldfa *); */ static struct dfa * -newdfa(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm, - struct smalldfa * small) /* preallocated space, may be NULL */ +newdfa(v, cnfa, cm, small) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; +struct smalldfa *small; /* preallocated space, may be NULL */ { struct dfa *d; - size_t nss = cnfa->nstates * 2; - int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; + size_t nss = cnfa->nstates * 2; + int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; struct smalldfa *smallwas = small; assert(cnfa != NULL && cnfa->nstates != 0); - if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) - { + if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) { assert(wordsper == 1); - if (small == NULL) - { - small = (struct smalldfa *) MALLOC( - sizeof(struct smalldfa)); - if (small == NULL) - { + if (small == NULL) { + small = (struct smalldfa *)MALLOC( + sizeof(struct smalldfa)); + if (small == NULL) { ERR(REG_ESPACE); return NULL; } @@ -303,36 +288,32 @@ newdfa(struct vars * v, d->outsarea = small->outsarea; d->incarea = small->incarea; d->cptsmalloced = 0; - d->mallocarea = (smallwas == NULL) ? (char *) small : NULL; - } - else - { - d = (struct dfa *) MALLOC(sizeof(struct dfa)); - if (d == NULL) - { + d->mallocarea = (smallwas == NULL) ? (char *)small : NULL; + } else { + d = (struct dfa *)MALLOC(sizeof(struct dfa)); + if (d == NULL) { ERR(REG_ESPACE); return NULL; } - d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset)); - d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper * - sizeof(unsigned)); + d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset)); + d->statesarea = (unsigned *)MALLOC((nss+WORK) * wordsper * + sizeof(unsigned)); d->work = &d->statesarea[nss * wordsper]; - d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors * - sizeof(struct sset *)); - d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors * - sizeof(struct arcp)); + d->outsarea = (struct sset **)MALLOC(nss * cnfa->ncolors * + sizeof(struct sset *)); + d->incarea = (struct arcp *)MALLOC(nss * cnfa->ncolors * + sizeof(struct arcp)); d->cptsmalloced = 1; - d->mallocarea = (char *) d; + d->mallocarea = (char *)d; if (d->ssets == NULL || d->statesarea == NULL || - d->outsarea == NULL || d->incarea == NULL) - { + d->outsarea == NULL || d->incarea == NULL) { freedfa(d); ERR(REG_ESPACE); return NULL; } } - d->nssets = (v->eflags & REG_SMALL) ? 7 : nss; + d->nssets = (v->eflags®_SMALL) ? 7 : nss; d->nssused = 0; d->nstates = cnfa->nstates; d->ncolors = cnfa->ncolors; @@ -349,13 +330,14 @@ newdfa(struct vars * v, } /* - * freedfa - free a DFA + - freedfa - free a DFA + ^ static VOID freedfa(struct dfa *); */ -static void -freedfa(struct dfa * d) +static VOID +freedfa(d) +struct dfa *d; { - if (d->cptsmalloced) - { + if (d->cptsmalloced) { if (d->ssets != NULL) FREE(d->ssets); if (d->statesarea != NULL) @@ -371,16 +353,17 @@ freedfa(struct dfa * d) } /* - * hash - construct a hash code for a bitvector - * + - hash - construct a hash code for a bitvector * There are probably better ways, but they're more expensive. + ^ static unsigned hash(unsigned *, int); */ static unsigned -hash(unsigned *uv, - int n) +hash(uv, n) +unsigned *uv; +int n; { - int i; - unsigned h; + int i; + unsigned h; h = 0; for (i = 0; i < n; i++) @@ -389,28 +372,29 @@ hash(unsigned *uv, } /* - * initialize - hand-craft a cache entry for startup, otherwise get ready + - initialize - hand-craft a cache entry for startup, otherwise get ready + ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); */ static struct sset * -initialize(struct vars * v, /* used only for debug flags */ - struct dfa * d, - chr *start) +initialize(v, d, start) +struct vars *v; /* used only for debug flags */ +struct dfa *d; +chr *start; { struct sset *ss; - int i; + int i; /* is previous one still there? */ - if (d->nssused > 0 && (d->ssets[0].flags & STARTER)) + if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) ss = &d->ssets[0]; - else - { /* no, must (re)build it */ + else { /* no, must (re)build it */ ss = getvacant(v, d, start, start); for (i = 0; i < d->wordsper; i++) ss->states[i] = 0; BSET(ss->states, d->cnfa->pre); ss->hash = HASH(ss->states, d->wordsper); assert(d->cnfa->pre != d->cnfa->post); - ss->flags = STARTER | LOCKED | NOPROGRESS; + ss->flags = STARTER|LOCKED|NOPROGRESS; /* lastseen dealt with below */ } @@ -423,30 +407,32 @@ initialize(struct vars * v, /* used only for debug flags */ } /* - * miss - handle a cache miss + - miss - handle a cache miss + ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, + ^ pcolor, chr *, chr *); */ -static struct sset * /* NULL if goes to empty set */ -miss(struct vars * v, /* used only for debug flags */ - struct dfa * d, - struct sset * css, - pcolor co, - chr *cp, /* next chr */ - chr *start) /* where the attempt got started */ +static struct sset * /* NULL if goes to empty set */ +miss(v, d, css, co, cp, start) +struct vars *v; /* used only for debug flags */ +struct dfa *d; +struct sset *css; +pcolor co; +chr *cp; /* next chr */ +chr *start; /* where the attempt got started */ { struct cnfa *cnfa = d->cnfa; - int i; - unsigned h; + int i; + unsigned h; struct carc *ca; struct sset *p; - int ispost; - int noprogress; - int gotstate; - int dolacons; - int sawlacons; + int ispost; + int noprogress; + int gotstate; + int dolacons; + int sawlacons; /* for convenience, we can be called even if it might not be a miss */ - if (css->outs[co] != NULL) - { + if (css->outs[co] != NULL) { FDEBUG(("hit\n")); return css->outs[co]; } @@ -460,9 +446,8 @@ miss(struct vars * v, /* used only for debug flags */ gotstate = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(css->states, i)) - for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++) - if (ca->co == co) - { + for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) + if (ca->co == co) { BSET(d->work, ca->to); gotstate = 1; if (ca->to == cnfa->post) @@ -471,23 +456,21 @@ miss(struct vars * v, /* used only for debug flags */ noprogress = 0; FDEBUG(("%d -> %d\n", i, ca->to)); } - dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0; + dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0; sawlacons = 0; - while (dolacons) - { /* transitive closure */ + while (dolacons) { /* transitive closure */ dolacons = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(d->work, i)) - for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; - ca++) - { + for (ca = cnfa->states[i]+1; ca->co != COLORLESS; + ca++) { if (ca->co <= cnfa->ncolors) - continue; /* NOTE CONTINUE */ + continue; /* NOTE CONTINUE */ sawlacons = 1; if (ISBSET(d->work, ca->to)) - continue; /* NOTE CONTINUE */ + continue; /* NOTE CONTINUE */ if (!lacon(v, cnfa, cp, ca->co)) - continue; /* NOTE CONTINUE */ + continue; /* NOTE CONTINUE */ BSET(d->work, ca->to); dolacons = 1; if (ca->to == cnfa->post) @@ -503,13 +486,11 @@ miss(struct vars * v, /* used only for debug flags */ /* next, is that in the cache? */ for (p = d->ssets, i = d->nssused; i > 0; p++, i--) - if (HIT(h, d->work, p, d->wordsper)) - { + if (HIT(h, d->work, p, d->wordsper)) { FDEBUG(("cached c%d\n", p - d->ssets)); - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } - if (i == 0) - { /* nope, need a new cache entry */ + if (i == 0) { /* nope, need a new cache entry */ p = getvacant(v, d, cp, start); assert(p != css); for (i = 0; i < d->wordsper; i++) @@ -521,97 +502,96 @@ miss(struct vars * v, /* used only for debug flags */ /* lastseen to be dealt with by caller */ } - if (!sawlacons) - { /* lookahead conds. always cache miss */ + if (!sawlacons) { /* lookahead conds. always cache miss */ FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); css->outs[co] = p; css->inchain[co] = p->ins; p->ins.ss = css; - p->ins.co = (color) co; + p->ins.co = (color)co; } return p; } /* - * lacon - lookahead-constraint checker for miss() + - lacon - lookahead-constraint checker for miss() + ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); */ -static int /* predicate: constraint satisfied? */ -lacon(struct vars * v, - struct cnfa * pcnfa, /* parent cnfa */ - chr *cp, - pcolor co) /* "color" of the lookahead constraint */ +static int /* predicate: constraint satisfied? */ +lacon(v, pcnfa, cp, co) +struct vars *v; +struct cnfa *pcnfa; /* parent cnfa */ +chr *cp; +pcolor co; /* "color" of the lookahead constraint */ { - int n; + int n; struct subre *sub; struct dfa *d; struct smalldfa sd; - chr *end; + chr *end; n = co - pcnfa->ncolors; assert(n < v->g->nlacons && v->g->lacons != NULL); FDEBUG(("=== testing lacon %d\n", n)); sub = &v->g->lacons[n]; d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); - if (d == NULL) - { + if (d == NULL) { ERR(REG_ESPACE); return 0; } - end = longest(v, d, cp, v->stop, (int *) NULL); + end = longest(v, d, cp, v->stop, (int *)NULL); freedfa(d); FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); return (sub->subno) ? (end != NULL) : (end == NULL); } /* - * getvacant - get a vacant state set + - getvacant - get a vacant state set * This routine clears out the inarcs and outarcs, but does not otherwise * clear the innards of the state set -- that's up to the caller. + ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); */ static struct sset * -getvacant(struct vars * v, /* used only for debug flags */ - struct dfa * d, - chr *cp, - chr *start) +getvacant(v, d, cp, start) +struct vars *v; /* used only for debug flags */ +struct dfa *d; +chr *cp; +chr *start; { - int i; + int i; struct sset *ss; struct sset *p; struct arcp ap; struct arcp lastap; - color co; + color co; ss = pickss(v, d, cp, start); - assert(!(ss->flags & LOCKED)); + assert(!(ss->flags&LOCKED)); /* clear out its inarcs, including self-referential ones */ ap = ss->ins; - while ((p = ap.ss) != NULL) - { + while ((p = ap.ss) != NULL) { co = ap.co; - FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long) co)); + FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); p->outs[co] = NULL; ap = p->inchain[co]; - p->inchain[co].ss = NULL; /* paranoia */ + p->inchain[co].ss = NULL; /* paranoia */ } ss->ins.ss = NULL; /* take it off the inarc chains of the ssets reached by its outarcs */ - for (i = 0; i < d->ncolors; i++) - { + for (i = 0; i < d->ncolors; i++) { p = ss->outs[i]; assert(p != ss); /* not self-referential */ if (p == NULL) - continue; /* NOTE CONTINUE */ + continue; /* NOTE CONTINUE */ FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); if (p->ins.ss == ss && p->ins.co == i) p->ins = ss->inchain[i]; - else - { + else { assert(p->ins.ss != NULL); for (ap = p->ins; ap.ss != NULL && - !(ap.ss == ss && ap.co == i); - ap = ap.ss->inchain[ap.co]) + !(ap.ss == ss && ap.co == i); + ap = ap.ss->inchain[ap.co]) lastap = ap; assert(ap.ss != NULL); lastap.ss->inchain[lastap.co] = ss->inchain[i]; @@ -621,35 +601,36 @@ getvacant(struct vars * v, /* used only for debug flags */ } /* if ss was a success state, may need to remember location */ - if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost && - (d->lastpost == NULL || d->lastpost < ss->lastseen)) + if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && + (d->lastpost == NULL || d->lastpost < ss->lastseen)) d->lastpost = ss->lastseen; /* likewise for a no-progress state */ - if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr && - (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) + if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr && + (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) d->lastnopr = ss->lastseen; return ss; } /* - * pickss - pick the next stateset to be used + - pickss - pick the next stateset to be used + ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); */ static struct sset * -pickss(struct vars * v, /* used only for debug flags */ - struct dfa * d, - chr *cp, - chr *start) +pickss(v, d, cp, start) +struct vars *v; /* used only for debug flags */ +struct dfa *d; +chr *cp; +chr *start; { - int i; + int i; struct sset *ss; struct sset *end; - chr *ancient; + chr *ancient; /* shortcut for cases where cache isn't full */ - if (d->nssused < d->nssets) - { + if (d->nssused < d->nssets) { i = d->nssused; d->nssused++; ss = &d->ssets[i]; @@ -661,8 +642,7 @@ pickss(struct vars * v, /* used only for debug flags */ ss->ins.co = WHITE; /* give it some value */ ss->outs = &d->outsarea[i * d->ncolors]; ss->inchain = &d->incarea[i * d->ncolors]; - for (i = 0; i < d->ncolors; i++) - { + for (i = 0; i < d->ncolors; i++) { ss->outs[i] = NULL; ss->inchain[i].ss = NULL; } @@ -670,22 +650,20 @@ pickss(struct vars * v, /* used only for debug flags */ } /* look for oldest, or old enough anyway */ - if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */ - ancient = cp - d->nssets * 2 / 3; + if (cp - start > d->nssets*2/3) /* oldest 33% are expendable */ + ancient = cp - d->nssets*2/3; else ancient = start; for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) if ((ss->lastseen == NULL || ss->lastseen < ancient) && - !(ss->flags & LOCKED)) - { + !(ss->flags&LOCKED)) { d->search = ss + 1; FDEBUG(("replacing c%d\n", ss - d->ssets)); return ss; } for (ss = d->ssets, end = d->search; ss < end; ss++) if ((ss->lastseen == NULL || ss->lastseen < ancient) && - !(ss->flags & LOCKED)) - { + !(ss->flags&LOCKED)) { d->search = ss + 1; FDEBUG(("replacing c%d\n", ss - d->ssets)); return ss; diff --git a/src/regex/regerror.c b/src/regex/regerror.c index f923ba572d..aca13aade0 100644 --- a/src/regex/regerror.c +++ b/src/regex/regerror.c @@ -1,21 +1,21 @@ /* * regerror - error-code expansion * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,8 +27,6 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regerror.c,v 1.26 2003/08/04 00:43:21 momjian Exp $ - * */ #include "regguts.h" @@ -37,88 +35,73 @@ static char unk[] = "*** unknown regex error code 0x%x ***"; /* struct to map among codes, code names, and explanations */ -static struct rerr -{ +static struct rerr { int code; char *name; char *explain; -} rerrs[] = - -{ +} rerrs[] = { /* the actual table is built from regex.h */ -#include "regerrs.h" - { - -1, "", "oops" - }, /* explanation special-cased in code */ +# include "regerrs.h" + { -1, "", "oops" }, /* explanation special-cased in code */ }; -size_t -regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */ - const regex_t *preg, /* associated regex_t (unused at present) */ - char *errbuf, /* result buffer (unless errbuf_size==0) */ - size_t errbuf_size) /* available space in errbuf, can be 0 */ - { return wx_regerror(errcode, preg, errbuf, errbuf_size); } /* - * pg_regerror - the interface to error numbers + - regerror - the interface to error numbers */ /* ARGSUSED */ -size_t /* actual space needed (including NUL) */ -wx_regerror(int errcode, /* error code, or REG_ATOI or REG_ITOA */ - const regex_t *preg, /* associated regex_t (unused at present) */ - char *errbuf, /* result buffer (unless errbuf_size==0) */ - size_t errbuf_size) /* available space in errbuf, can be 0 */ +size_t /* actual space needed (including NUL) */ +regerror(errcode, preg, errbuf, errbuf_size) +int errcode; /* error code, or REG_ATOI or REG_ITOA */ +CONST regex_t *preg; /* associated regex_t (unused at present) */ +char *errbuf; /* result buffer (unless errbuf_size==0) */ +size_t errbuf_size; /* available space in errbuf, can be 0 */ { struct rerr *r; - char *msg; - char convbuf[sizeof(unk) + 50]; /* 50 = plenty for int */ - size_t len; - int icode; + char *msg; + char convbuf[sizeof(unk)+50]; /* 50 = plenty for int */ + size_t len; + int icode; - switch (errcode) - { - case REG_ATOI: /* convert name to number */ + switch (errcode) { + case REG_ATOI: /* convert name to number */ for (r = rerrs; r->code >= 0; r++) - if (strcmp(r->name, errbuf) == 0) + if (strcmp(r->name, errbuf) == 0) break; - sprintf(convbuf, "%d", r->code); /* -1 for unknown */ + sprintf(convbuf, "%d", r->code); /* -1 for unknown */ + msg = convbuf; + break; + case REG_ITOA: /* convert number to name */ + icode = atoi(errbuf); /* not our problem if this fails */ + for (r = rerrs; r->code >= 0; r++) + if (r->code == icode) + break; + if (r->code >= 0) + msg = r->name; + else { /* unknown; tell him the number */ + sprintf(convbuf, "REG_%u", (unsigned)icode); msg = convbuf; - break; - case REG_ITOA: /* convert number to name */ - icode = atoi(errbuf); /* not our problem if this fails */ - for (r = rerrs; r->code >= 0; r++) - if (r->code == icode) - break; - if (r->code >= 0) - msg = r->name; - else - { /* unknown; tell him the number */ - sprintf(convbuf, "REG_%u", (unsigned) icode); - msg = convbuf; - } - break; - default: /* a real, normal error code */ - for (r = rerrs; r->code >= 0; r++) - if (r->code == errcode) - break; - if (r->code >= 0) - msg = r->explain; - else - { /* unknown; say so */ - sprintf(convbuf, unk, errcode); - msg = convbuf; - } - break; + } + break; + default: /* a real, normal error code */ + for (r = rerrs; r->code >= 0; r++) + if (r->code == errcode) + break; + if (r->code >= 0) + msg = r->explain; + else { /* unknown; say so */ + sprintf(convbuf, unk, errcode); + msg = convbuf; + } + break; } len = strlen(msg) + 1; /* space needed, including NUL */ - if (errbuf_size > 0) - { + if (errbuf_size > 0) { if (errbuf_size > len) strcpy(errbuf, msg); - else - { /* truncate to fit */ - memcpy(errbuf, msg, errbuf_size - 1); /*RN - was strncpy*/ - errbuf[errbuf_size - 1] = '\0'; + else { /* truncate to fit */ + strncpy(errbuf, msg, errbuf_size-1); + errbuf[errbuf_size-1] = '\0'; } } diff --git a/src/regex/regerrs.h b/src/regex/regerrs.h index f99dbf4f73..a3d98b6818 100644 --- a/src/regex/regerrs.h +++ b/src/regex/regerrs.h @@ -1,75 +1,18 @@ -/* - * $Id$ - */ - -{ - REG_OKAY, "REG_OKAY", "no errors detected" -}, - -{ - REG_NOMATCH, "REG_NOMATCH", "failed to match" -}, - -{ - REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" -}, - -{ - REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" -}, - -{ - REG_ECTYPE, "REG_ECTYPE", "invalid character class" -}, - -{ - REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" -}, - -{ - REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" -}, - -{ - REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" -}, - -{ - REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" -}, - -{ - REG_EBRACE, "REG_EBRACE", "braces {} not balanced" -}, - -{ - REG_BADBR, "REG_BADBR", "invalid repetition count(s)" -}, - -{ - REG_ERANGE, "REG_ERANGE", "invalid character range" -}, - -{ - REG_ESPACE, "REG_ESPACE", "out of memory" -}, - -{ - REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" -}, - -{ - REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" -}, - -{ - REG_INVARG, "REG_INVARG", "invalid argument to regex function" -}, - -{ - REG_MIXED, "REG_MIXED", "character widths of regex and string differ" -}, - -{ - REG_BADOPT, "REG_BADOPT", "invalid embedded option" -}, +{ REG_OKAY, "REG_OKAY", "no errors detected" }, +{ REG_NOMATCH, "REG_NOMATCH", "failed to match" }, +{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" }, +{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, +{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, +{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" }, +{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, +{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" }, +{ REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" }, +{ REG_EBRACE, "REG_EBRACE", "braces {} not balanced" }, +{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, +{ REG_ERANGE, "REG_ERANGE", "invalid character range" }, +{ REG_ESPACE, "REG_ESPACE", "out of memory" }, +{ REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" }, +{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, +{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" }, +{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" }, +{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" }, diff --git a/src/regex/regex.h b/src/regex/regex.h index 304e15d95e..8289a500eb 100644 --- a/src/regex/regex.h +++ b/src/regex/regex.h @@ -1,16 +1,15 @@ #ifndef _REGEX_H_ #define _REGEX_H_ /* never again */ - /* * regular expressions * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and @@ -18,7 +17,7 @@ * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -30,35 +29,98 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id$ + * + * + * Prototypes etc. marked with "^" within comments get gathered up (and + * possibly edited) by the regfwd program and inserted near the bottom of + * this file. + * + * We offer the option of declaring one wide-character version of the + * RE functions as well as the char versions. To do that, define + * __REG_WIDE_T to the type of wide characters (unfortunately, there + * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and + * __REG_WIDE_EXEC to the names to be used for the compile and execute + * functions (suggestion: re_Xcomp and re_Xexec, where X is a letter + * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode). + * For cranky old compilers, it may be necessary to do something like: + * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) + * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g) + * rather than just #defining the names as parameterless macros. + * + * For some specialized purposes, it may be desirable to suppress the + * declarations of the "front end" functions, regcomp() and regexec(), + * or of the char versions of the compile and execute functions. To + * suppress the front-end functions, define __REG_NOFRONT. To suppress + * the char versions, define __REG_NOCHAR. + * + * The right place to do those defines (and some others you may want, see + * below) would be . If you don't have control of that file, + * the right place to add your own defines to this file is marked below. + * This is normally done automatically, by the makefile and regmkhdr, based + * on the contents of regcustom.h. */ + + /* - * Add your own defines, if needed, here. + * voodoo for C++ */ #ifdef __cplusplus extern "C" { #endif -/***************************** - WXWINDOWS CUSTOM -*****************************/ -#ifndef _REGEX_CUSTOM_H_ -# define wx_wchar wxChar -/* FreeBSD, Watcom and DMars require this, CW doesn't have nor need it. */ -/* Others also don't seem to need it. If you have an error related to */ -/* (not) including please report details to */ -/* wx-dev@lists.wxwindows.org */ -# if defined(__UNIX__) || defined(__WATCOMC__) || defined(__DIGITALMARS__) -# include -# endif -#endif /* ndef _REGEX_CUSTOM_H_ */ -/***************************** - END WXWINDOWS CUSTOM -*****************************/ - -#include -#include + + +/* + * Add your own defines, if needed, here. + */ + + + +/* + * Location where a chunk of regcustom.h is automatically spliced into + * this file (working from its prototype, regproto.h). + */ +/* --- begin --- */ +/* ensure certain things don't sneak in from system headers */ +#ifdef __REG_WIDE_T +#undef __REG_WIDE_T +#endif +#ifdef __REG_WIDE_COMPILE +#undef __REG_WIDE_COMPILE +#endif +#ifdef __REG_WIDE_EXEC +#undef __REG_WIDE_EXEC +#endif +#ifdef __REG_REGOFF_T +#undef __REG_REGOFF_T +#endif +#ifdef __REG_VOID_T +#undef __REG_VOID_T +#endif +#ifdef __REG_CONST +#undef __REG_CONST +#endif +#ifdef __REG_NOFRONT +#undef __REG_NOFRONT +#endif +#ifdef __REG_NOCHAR +#undef __REG_NOCHAR +#endif +/* interface types */ +#define __REG_WIDE_T Tcl_UniChar +#define __REG_REGOFF_T long /* not really right, but good enough... */ +#define __REG_VOID_T VOID +#define __REG_CONST CONST +/* names and declarations */ +#define __REG_WIDE_COMPILE TclReComp +#define __REG_WIDE_EXEC TclReExec +#define __REG_NOFRONT /* don't want regcomp() and regexec() */ +#define __REG_NOCHAR /* or the char versions */ +#define regfree TclReFree +#define regerror TclReError +/* --- end --- */ + /* * interface types etc. @@ -66,136 +128,214 @@ extern "C" { /* * regoff_t has to be large enough to hold either off_t or ssize_t, - * and must be signed; it's only a guess that long is suitable. + * and must be signed; it's only a guess that long is suitable, so we + * offer an override. */ +#ifdef __REG_REGOFF_T +typedef __REG_REGOFF_T regoff_t; +#else typedef long regoff_t; +#endif + +/* + * For benefit of old compilers, we offer the option of + * overriding the `void' type used to declare nonexistent return types. + */ +#ifdef __REG_VOID_T +typedef __REG_VOID_T re_void; +#else +typedef void re_void; +#endif + +/* + * Also for benefit of old compilers, can supply a macro + * which expands to a substitute for `const'. + */ +#ifndef __REG_CONST +#define __REG_CONST const +#endif + + /* * other interface types */ /* the biggie, a compiled RE (or rather, a front end to same) */ -typedef struct -{ - int re_magic; /* magic number */ - size_t re_nsub; /* number of subexpressions */ - long re_info; /* information about RE */ -#define REG_UBACKREF 000001 -#define REG_ULOOKAHEAD 000002 -#define REG_UBOUNDS 000004 -#define REG_UBRACES 000010 -#define REG_UBSALNUM 000020 -#define REG_UPBOTCH 000040 -#define REG_UBBS 000100 -#define REG_UNONPOSIX 000200 -#define REG_UUNSPEC 000400 -#define REG_UUNPORT 001000 -#define REG_ULOCALE 002000 -#define REG_UEMPTYMATCH 004000 -#define REG_UIMPOSSIBLE 010000 -#define REG_USHORTEST 020000 - int re_csize; /* sizeof(character) */ - char *re_endp; /* backward compatibility kludge */ +typedef struct { + int re_magic; /* magic number */ + size_t re_nsub; /* number of subexpressions */ + long re_info; /* information about RE */ +# define REG_UBACKREF 000001 +# define REG_ULOOKAHEAD 000002 +# define REG_UBOUNDS 000004 +# define REG_UBRACES 000010 +# define REG_UBSALNUM 000020 +# define REG_UPBOTCH 000040 +# define REG_UBBS 000100 +# define REG_UNONPOSIX 000200 +# define REG_UUNSPEC 000400 +# define REG_UUNPORT 001000 +# define REG_ULOCALE 002000 +# define REG_UEMPTYMATCH 004000 +# define REG_UIMPOSSIBLE 010000 +# define REG_USHORTEST 020000 + int re_csize; /* sizeof(character) */ + char *re_endp; /* backward compatibility kludge */ /* the rest is opaque pointers to hidden innards */ - char *re_guts; /* `char *' is more portable than `void *' */ - char *re_fns; + char *re_guts; /* `char *' is more portable than `void *' */ + char *re_fns; } regex_t; /* result reporting (may acquire more fields later) */ -typedef struct -{ - regoff_t rm_so; /* start of substring */ - regoff_t rm_eo; /* end of substring */ +typedef struct { + regoff_t rm_so; /* start of substring */ + regoff_t rm_eo; /* end of substring */ } regmatch_t; /* supplementary control and reporting */ -typedef struct -{ - regmatch_t rm_extend; /* see REG_EXPECT */ +typedef struct { + regmatch_t rm_extend; /* see REG_EXPECT */ } rm_detail_t; /* - * regex compilation flags + * compilation + ^ #ifndef __REG_NOCHAR + ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); + ^ #endif + ^ #ifndef __REG_NOFRONT + ^ int regcomp(regex_t *, __REG_CONST char *, int); + ^ #endif + ^ #ifdef __REG_WIDE_T + ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); + ^ #endif + */ +#define REG_BASIC 000000 /* BREs (convenience) */ +#define REG_EXTENDED 000001 /* EREs */ +#define REG_ADVF 000002 /* advanced features in EREs */ +#define REG_ADVANCED 000003 /* AREs (which are also EREs) */ +#define REG_QUOTE 000004 /* no special characters, none */ +#define REG_NOSPEC REG_QUOTE /* historical synonym */ +#define REG_ICASE 000010 /* ignore case */ +#define REG_NOSUB 000020 /* don't care about subexpressions */ +#define REG_EXPANDED 000040 /* expanded format, white space & comments */ +#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ +#define REG_NLANCH 000200 /* ^ matches after \n, $ before */ +#define REG_NEWLINE 000300 /* newlines are line terminators */ +#define REG_PEND 000400 /* ugh -- backward-compatibility hack */ +#define REG_EXPECT 001000 /* report details on partial/limited matches */ +#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ +#define REG_DUMP 004000 /* none of your business :-) */ +#define REG_FAKE 010000 /* none of your business :-) */ +#define REG_PROGRESS 020000 /* none of your business :-) */ + + + +/* + * execution + ^ #ifndef __REG_NOCHAR + ^ int re_exec(regex_t *, __REG_CONST char *, size_t, + ^ rm_detail_t *, size_t, regmatch_t [], int); + ^ #endif + ^ #ifndef __REG_NOFRONT + ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); + ^ #endif + ^ #ifdef __REG_WIDE_T + ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, + ^ rm_detail_t *, size_t, regmatch_t [], int); + ^ #endif */ -#define REG_BASIC 000000 /* BREs (convenience) */ -#define REG_EXTENDED 000001 /* EREs */ -#define REG_ADVF 000002 /* advanced features in EREs */ -#define REG_ADVANCED 000003 /* AREs (which are also EREs) */ -#define REG_QUOTE 000004 /* no special characters, none */ -#define REG_NOSPEC REG_QUOTE /* historical synonym */ -#define REG_ICASE 000010 /* ignore case */ -#define REG_NOSUB 000020 /* don't care about subexpressions */ -#define REG_EXPANDED 000040 /* expanded format, white space & comments */ -#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ -#define REG_NLANCH 000200 /* ^ matches after \n, $ before */ -#define REG_NEWLINE 000300 /* newlines are line terminators */ -#define REG_PEND 000400 /* ugh -- backward-compatibility hack */ -#define REG_EXPECT 001000 /* report details on partial/limited - * matches */ -#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ -#define REG_DUMP 004000 /* none of your business :-) */ -#define REG_FAKE 010000 /* none of your business :-) */ -#define REG_PROGRESS 020000 /* none of your business :-) */ +#define REG_NOTBOL 0001 /* BOS is not BOL */ +#define REG_NOTEOL 0002 /* EOS is not EOL */ +#define REG_STARTEND 0004 /* backward compatibility kludge */ +#define REG_FTRACE 0010 /* none of your business */ +#define REG_MTRACE 0020 /* none of your business */ +#define REG_SMALL 0040 /* none of your business */ /* - * regex execution flags + * misc generics (may be more functions here eventually) + ^ re_void regfree(regex_t *); */ -#define REG_NOTBOL 0001 /* BOS is not BOL */ -#define REG_NOTEOL 0002 /* EOS is not EOL */ -#define REG_STARTEND 0004 /* backward compatibility kludge */ -#define REG_FTRACE 0010 /* none of your business */ -#define REG_MTRACE 0020 /* none of your business */ -#define REG_SMALL 0040 /* none of your business */ + /* * error reporting * Be careful if modifying the list of error codes -- the table used by * regerror() is generated automatically from this file! + * + * Note that there is no wide-char variant of regerror at this time; what + * kind of character is used for error reports is independent of what kind + * is used in matching. + * + ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); */ -#define REG_OKAY 0 /* no errors detected */ -#define REG_NOMATCH 1 /* failed to match */ -#define REG_BADPAT 2 /* invalid regexp */ -#define REG_ECOLLATE 3 /* invalid collating element */ -#define REG_ECTYPE 4 /* invalid character class */ -#define REG_EESCAPE 5 /* invalid escape \ sequence */ -#define REG_ESUBREG 6 /* invalid backreference number */ -#define REG_EBRACK 7 /* brackets [] not balanced */ -#define REG_EPAREN 8 /* parentheses () not balanced */ -#define REG_EBRACE 9 /* braces {} not balanced */ -#define REG_BADBR 10 /* invalid repetition count(s) */ -#define REG_ERANGE 11 /* invalid character range */ -#define REG_ESPACE 12 /* out of memory */ -#define REG_BADRPT 13 /* quantifier operand invalid */ -#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ -#define REG_INVARG 16 /* invalid argument to regex function */ -#define REG_MIXED 17 /* character widths of regex and string - * differ */ -#define REG_BADOPT 18 /* invalid embedded option */ +#define REG_OKAY 0 /* no errors detected */ +#define REG_NOMATCH 1 /* failed to match */ +#define REG_BADPAT 2 /* invalid regexp */ +#define REG_ECOLLATE 3 /* invalid collating element */ +#define REG_ECTYPE 4 /* invalid character class */ +#define REG_EESCAPE 5 /* invalid escape \ sequence */ +#define REG_ESUBREG 6 /* invalid backreference number */ +#define REG_EBRACK 7 /* brackets [] not balanced */ +#define REG_EPAREN 8 /* parentheses () not balanced */ +#define REG_EBRACE 9 /* braces {} not balanced */ +#define REG_BADBR 10 /* invalid repetition count(s) */ +#define REG_ERANGE 11 /* invalid character range */ +#define REG_ESPACE 12 /* out of memory */ +#define REG_BADRPT 13 /* quantifier operand invalid */ +#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ +#define REG_INVARG 16 /* invalid argument to regex function */ +#define REG_MIXED 17 /* character widths of regex and string differ */ +#define REG_BADOPT 18 /* invalid embedded option */ /* two specials for debugging and testing */ -#define REG_ATOI 101 /* convert error-code name to number */ -#define REG_ITOA 102 /* convert error-code number to name */ +#define REG_ATOI 101 /* convert error-code name to number */ +#define REG_ITOA 102 /* convert error-code number to name */ /* - * the prototypes for exported functions + * the prototypes, as possibly munched by regfwd */ -extern int wx_regcomp(regex_t *, const wx_wchar *, size_t, int); -extern int regcomp(regex_t *, const wx_wchar *, int); -extern int wx_regexec(regex_t *, const wx_wchar *, size_t, rm_detail_t *, size_t, regmatch_t[], int); -extern int regexec(regex_t *, const wx_wchar *, size_t, regmatch_t[], int); -extern void regfree(regex_t *); -extern size_t regerror(int, const regex_t *, char *, size_t); -extern void wx_regfree(regex_t *); -extern size_t wx_regerror(int, const regex_t *, char *, size_t); +/* =====^!^===== begin forwards =====^!^===== */ +/* automatically gathered by fwd; do not hand-edit */ +/* === regproto.h === */ +#ifndef __REG_NOCHAR +int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int)); +#endif +#ifndef __REG_NOFRONT +int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int)); +#endif +#ifdef __REG_WIDE_T +int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int)); +#endif +#ifndef __REG_NOCHAR +int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); +#endif +#ifndef __REG_NOFRONT +int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int)); +#endif +#ifdef __REG_WIDE_T +int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); +#endif +re_void regfree _ANSI_ARGS_((regex_t *)); +extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t)); +/* automatically gathered by fwd; do not hand-edit */ +/* =====^!^===== end forwards =====^!^===== */ + + +/* + * more C++ voodoo + */ #ifdef __cplusplus } #endif -#endif /* _REGEX_H_ */ + + +#endif diff --git a/src/regex/regexec.c b/src/regex/regexec.c index dbae952f71..41d49bdab5 100644 --- a/src/regex/regexec.c +++ b/src/regex/regexec.c @@ -1,21 +1,21 @@ /* * re_*exec and friends - match REs * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,8 +27,6 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regexec.c,v 1.23 2003/08/08 21:41:56 momjian Exp $ - * */ #include "regguts.h" @@ -36,164 +34,151 @@ /* lazy-DFA representation */ -struct arcp -{ /* "pointer" to an outarc */ +struct arcp { /* "pointer" to an outarc */ struct sset *ss; - color co; + color co; }; -struct sset -{ /* state set */ - unsigned *states; /* pointer to bitvector */ - unsigned hash; /* hash of bitvector */ -#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) -#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ +struct sset { /* state set */ + unsigned *states; /* pointer to bitvector */ + unsigned hash; /* hash of bitvector */ +# define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) +# define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) - int flags; -#define STARTER 01 /* the initial state set */ -#define POSTSTATE 02 /* includes the goal state */ -#define LOCKED 04 /* locked in cache */ -#define NOPROGRESS 010 /* zero-progress state set */ - struct arcp ins; /* chain of inarcs pointing here */ - chr *lastseen; /* last entered on arrival here */ - struct sset **outs; /* outarc vector indexed by color */ - struct arcp *inchain; /* chain-pointer vector for outarcs */ + int flags; +# define STARTER 01 /* the initial state set */ +# define POSTSTATE 02 /* includes the goal state */ +# define LOCKED 04 /* locked in cache */ +# define NOPROGRESS 010 /* zero-progress state set */ + struct arcp ins; /* chain of inarcs pointing here */ + chr *lastseen; /* last entered on arrival here */ + struct sset **outs; /* outarc vector indexed by color */ + struct arcp *inchain; /* chain-pointer vector for outarcs */ }; -struct dfa -{ - int nssets; /* size of cache */ - int nssused; /* how many entries occupied yet */ - int nstates; /* number of states */ - int ncolors; /* length of outarc and inchain vectors */ - int wordsper; /* length of state-set bitvectors */ - struct sset *ssets; /* state-set cache */ - unsigned *statesarea; /* bitvector storage */ - unsigned *work; /* pointer to work area within statesarea */ - struct sset **outsarea; /* outarc-vector storage */ - struct arcp *incarea; /* inchain storage */ +struct dfa { + int nssets; /* size of cache */ + int nssused; /* how many entries occupied yet */ + int nstates; /* number of states */ + int ncolors; /* length of outarc and inchain vectors */ + int wordsper; /* length of state-set bitvectors */ + struct sset *ssets; /* state-set cache */ + unsigned *statesarea; /* bitvector storage */ + unsigned *work; /* pointer to work area within statesarea */ + struct sset **outsarea; /* outarc-vector storage */ + struct arcp *incarea; /* inchain storage */ struct cnfa *cnfa; struct colormap *cm; - chr *lastpost; /* location of last cache-flushed success */ - chr *lastnopr; /* location of last cache-flushed - * NOPROGRESS */ - struct sset *search; /* replacement-search-pointer memory */ - int cptsmalloced; /* were the areas individually malloced? */ - char *mallocarea; /* self, or master malloced area, or NULL */ + chr *lastpost; /* location of last cache-flushed success */ + chr *lastnopr; /* location of last cache-flushed NOPROGRESS */ + struct sset *search; /* replacement-search-pointer memory */ + int cptsmalloced; /* were the areas individually malloced? */ + char *mallocarea; /* self, or master malloced area, or NULL */ }; -#define WORK 1 /* number of work bitvectors needed */ +#define WORK 1 /* number of work bitvectors needed */ /* setup for non-malloc allocation for small cases */ -#define FEWSTATES 20 /* must be less than UBITS */ -#define FEWCOLORS 15 -struct smalldfa -{ - struct dfa dfa; - struct sset ssets[FEWSTATES * 2]; - unsigned statesarea[FEWSTATES * 2 + WORK]; - struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS]; - struct arcp incarea[FEWSTATES * 2 * FEWCOLORS]; +#define FEWSTATES 20 /* must be less than UBITS */ +#define FEWCOLORS 15 +struct smalldfa { + struct dfa dfa; + struct sset ssets[FEWSTATES*2]; + unsigned statesarea[FEWSTATES*2 + WORK]; + struct sset *outsarea[FEWSTATES*2 * FEWCOLORS]; + struct arcp incarea[FEWSTATES*2 * FEWCOLORS]; }; - -#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ +#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ /* internal variables, bundled for easy passing around */ -struct vars -{ - regex_t *re; +struct vars { + regex_t *re; struct guts *g; - int eflags; /* copies of arguments */ - size_t nmatch; + int eflags; /* copies of arguments */ + size_t nmatch; regmatch_t *pmatch; rm_detail_t *details; - chr *start; /* start of string */ - chr *stop; /* just past end of string */ - int err; /* error code if any (0 none) */ - regoff_t *mem; /* memory vector for backtracking */ + chr *start; /* start of string */ + chr *stop; /* just past end of string */ + int err; /* error code if any (0 none) */ + regoff_t *mem; /* memory vector for backtracking */ struct smalldfa dfa1; struct smalldfa dfa2; }; - -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return - * it */ -#define OFF(p) ((p) - v->start) -#define LOFF(p) ((long)OFF(p)) +#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ +#define ISERR() VISERR(v) +#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) +#define ERR(e) VERR(v, e) /* record an error */ +#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ +#define OFF(p) ((p) - v->start) +#define LOFF(p) ((long)OFF(p)) /* * forward declarations */ +/* =====^!^===== begin forwards =====^!^===== */ +/* automatically gathered by fwd; do not hand-edit */ /* === regexec.c === */ -static int find(struct vars *, struct cnfa *, struct colormap *); -static int cfind(struct vars *, struct cnfa *, struct colormap *); -static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **); -static void zapsubs(regmatch_t *, size_t); -static void zapmem(struct vars *, struct subre *); -static void subset(struct vars *, struct subre *, chr *, chr *); -static int dissect(struct vars *, struct subre *, chr *, chr *); -static int condissect(struct vars *, struct subre *, chr *, chr *); -static int altdissect(struct vars *, struct subre *, chr *, chr *); -static int cdissect(struct vars *, struct subre *, chr *, chr *); -static int ccondissect(struct vars *, struct subre *, chr *, chr *); -static int crevdissect(struct vars *, struct subre *, chr *, chr *); -static int cbrdissect(struct vars *, struct subre *, chr *, chr *); -static int caltdissect(struct vars *, struct subre *, chr *, chr *); - +int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); +static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); +static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); +static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)); +static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t)); +static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *)); +static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); +static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); /* === rege_dfa.c === */ -static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); -static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *); -static chr *lastcold(struct vars *, struct dfa *); -static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *); -static void freedfa(struct dfa *); -static unsigned hash(unsigned *, int); -static struct sset *initialize(struct vars *, struct dfa *, chr *); -static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *); -static int lacon(struct vars *, struct cnfa *, chr *, pcolor); -static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); -static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); +static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *)); +static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)); +static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *)); +static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)); +static VOID freedfa _ANSI_ARGS_((struct dfa *)); +static unsigned hash _ANSI_ARGS_((unsigned *, int)); +static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *)); +static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *)); +static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor)); +static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); +static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); +/* automatically gathered by fwd; do not hand-edit */ +/* =====^!^===== end forwards =====^!^===== */ + /* - * regexec - match regular expression + - exec - match regular expression + ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, + ^ size_t, regmatch_t [], int); */ int -regexec(regex_t *re, - const chr *string, - size_t nmatch, - regmatch_t pmatch[], - int flags) -{ - rm_detail_t det; - return wx_regexec(re, string, wx_strlen(string), &det, nmatch, pmatch, flags); -} -int -wx_regexec(regex_t *re, - const chr *string, - size_t len, - rm_detail_t *details, - size_t nmatch, - regmatch_t pmatch[], - int flags) +exec(re, string, len, details, nmatch, pmatch, flags) +regex_t *re; +CONST chr *string; +size_t len; +rm_detail_t *details; +size_t nmatch; +regmatch_t pmatch[]; +int flags; { struct vars var; register struct vars *v = &var; - int st; - size_t n; - int backref; - -#define LOCALMAT 20 - regmatch_t mat[LOCALMAT]; - -#define LOCALMEM 40 - regoff_t mem[LOCALMEM]; + int st; + size_t n; + int backref; +# define LOCALMAT 20 + regmatch_t mat[LOCALMAT]; +# define LOCALMEM 40 + regoff_t mem[LOCALMEM]; /* sanity checks */ if (re == NULL || string == NULL || re->re_magic != REMAGIC) @@ -203,51 +188,46 @@ wx_regexec(regex_t *re, /* setup */ v->re = re; - v->g = (struct guts *) re->re_guts; - if ((v->g->cflags & REG_EXPECT) && details == NULL) + v->g = (struct guts *)re->re_guts; + if ((v->g->cflags®_EXPECT) && details == NULL) return REG_INVARG; - if (v->g->info & REG_UIMPOSSIBLE) + if (v->g->info®_UIMPOSSIBLE) return REG_NOMATCH; - backref = (v->g->info & REG_UBACKREF) ? 1 : 0; + backref = (v->g->info®_UBACKREF) ? 1 : 0; v->eflags = flags; - if (v->g->cflags & REG_NOSUB) - nmatch = 0; /* override client */ + if (v->g->cflags®_NOSUB) + nmatch = 0; /* override client */ v->nmatch = nmatch; - if (backref) - { + if (backref) { /* need work area */ if (v->g->nsub + 1 <= LOCALMAT) v->pmatch = mat; else - v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) * - sizeof(regmatch_t)); + v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) * + sizeof(regmatch_t)); if (v->pmatch == NULL) return REG_ESPACE; v->nmatch = v->g->nsub + 1; - } - else + } else v->pmatch = pmatch; v->details = details; - v->start = (chr *) string; - v->stop = (chr *) string + len; + v->start = (chr *)string; + v->stop = (chr *)string + len; v->err = 0; - if (backref) - { + if (backref) { /* need retry memory */ assert(v->g->ntree >= 0); - n = (size_t) v->g->ntree; + n = (size_t)v->g->ntree; if (n <= LOCALMEM) v->mem = mem; else - v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t)); - if (v->mem == NULL) - { + v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t)); + if (v->mem == NULL) { if (v->pmatch != pmatch && v->pmatch != mat) FREE(v->pmatch); return REG_ESPACE; } - } - else + } else v->mem = NULL; /* do it */ @@ -258,11 +238,10 @@ wx_regexec(regex_t *re, st = find(v, &v->g->tree->cnfa, &v->g->cmap); /* copy (portion of) match vector over if necessary */ - if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) - { + if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { zapsubs(pmatch, nmatch); n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t)); + memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t)); } /* clean up */ @@ -274,23 +253,24 @@ wx_regexec(regex_t *re, } /* - * find - find a match for the main NFA (no-complications case) + - find - find a match for the main NFA (no-complications case) + ^ static int find(struct vars *, struct cnfa *, struct colormap *); */ static int -find(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm) +find(v, cnfa, cm) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; { struct dfa *s; struct dfa *d; - chr *begin; - chr *end = NULL; - chr *cold; - chr *open; /* open and close of range of possible - * starts */ - chr *close; - int hitend; - int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0; + chr *begin; + chr *end = NULL; + chr *cold; + chr *open; /* open and close of range of possible starts */ + chr *close; + int hitend; + int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0; /* first, a shot with the search RE */ s = newdfa(v, &v->g->search, cm, &v->dfa1); @@ -298,21 +278,20 @@ find(struct vars * v, NOERR(); MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); cold = NULL; - close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL); + close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL); freedfa(s); NOERR(); - if (v->g->cflags & REG_EXPECT) - { + if (v->g->cflags®_EXPECT) { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } - if (close == NULL) /* not found */ + if (close == NULL) /* not found */ return REG_NOMATCH; - if (v->nmatch == 0) /* found, don't need exact location */ + if (v->nmatch == 0) /* found, don't need exact location */ return REG_OKAY; /* find starting point and match */ @@ -323,19 +302,18 @@ find(struct vars * v, d = newdfa(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); - for (begin = open; begin <= close; begin++) - { + for (begin = open; begin <= close; begin++) { MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) end = shortest(v, d, begin, begin, v->stop, - (chr **) NULL, &hitend); + (chr **)NULL, &hitend); else end = longest(v, d, begin, v->stop, &hitend); NOERR(); if (hitend && cold == NULL) cold = begin; if (end != NULL) - break; /* NOTE BREAK OUT */ + break; /* NOTE BREAK OUT */ } assert(end != NULL); /* search RE succeeded so loop should */ freedfa(d); @@ -344,15 +322,14 @@ find(struct vars * v, assert(v->nmatch > 0); v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); - if (v->g->cflags & REG_EXPECT) - { + if (v->g->cflags®_EXPECT) { if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } - if (v->nmatch == 1) /* no need for submatches */ + if (v->nmatch == 1) /* no need for submatches */ return REG_OKAY; /* submatches */ @@ -361,23 +338,24 @@ find(struct vars * v, } /* - * cfind - find a match for the main NFA (with complications) + - cfind - find a match for the main NFA (with complications) + ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); */ static int -cfind(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm) +cfind(v, cnfa, cm) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; { struct dfa *s; struct dfa *d; - chr *cold; - int ret; + chr *cold; + int ret; s = newdfa(v, &v->g->search, cm, &v->dfa1); NOERR(); d = newdfa(v, cnfa, cm, &v->dfa2); - if (ISERR()) - { + if (ISERR()) { assert(d == NULL); freedfa(s); return v->err; @@ -388,67 +366,65 @@ cfind(struct vars * v, freedfa(d); freedfa(s); NOERR(); - if (v->g->cflags & REG_EXPECT) - { + if (v->g->cflags®_EXPECT) { assert(v->details != NULL); if (cold != NULL) v->details->rm_extend.rm_so = OFF(cold); else v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ + v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ } return ret; } /* - * cfindloop - the heart of cfind + - cfindloop - the heart of cfind + ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *, + ^ struct dfa *, struct dfa *, chr **); */ static int -cfindloop(struct vars * v, - struct cnfa * cnfa, - struct colormap * cm, - struct dfa * d, - struct dfa * s, - chr **coldp) /* where to put coldstart pointer */ +cfindloop(v, cnfa, cm, d, s, coldp) +struct vars *v; +struct cnfa *cnfa; +struct colormap *cm; +struct dfa *d; +struct dfa *s; +chr **coldp; /* where to put coldstart pointer */ { - chr *begin; - chr *end; - chr *cold; - chr *open; /* open and close of range of possible - * starts */ - chr *close; - chr *estart; - chr *estop; - int er; - int shorter = v->g->tree->flags & SHORTER; - int hitend; + chr *begin; + chr *end; + chr *cold; + chr *open; /* open and close of range of possible starts */ + chr *close; + chr *estart; + chr *estop; + int er; + int shorter = v->g->tree->flags&SHORTER; + int hitend; assert(d != NULL && s != NULL); cold = NULL; close = v->start; - do - { + do { MDEBUG(("\ncsearch at %ld\n", LOFF(close))); - close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL); + close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL); if (close == NULL) break; /* NOTE BREAK */ assert(cold != NULL); open = cold; cold = NULL; MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); - for (begin = open; begin <= close; begin++) - { + for (begin = open; begin <= close; begin++) { MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); estart = begin; estop = v->stop; - for (;;) - { + for (;;) { if (shorter) end = shortest(v, d, begin, estart, - estop, (chr **) NULL, &hitend); + estop, (chr **)NULL, &hitend); else end = longest(v, d, begin, estop, - &hitend); + &hitend); if (hitend && cold == NULL) cold = begin; if (end == NULL) @@ -457,23 +433,19 @@ cfindloop(struct vars * v, zapsubs(v->pmatch, v->nmatch); zapmem(v, v->g->tree); er = cdissect(v, v->g->tree, begin, end); - if (er == REG_OKAY) - { - if (v->nmatch > 0) - { + if (er == REG_OKAY) { + if (v->nmatch > 0) { v->pmatch[0].rm_so = OFF(begin); v->pmatch[0].rm_eo = OFF(end); } *coldp = cold; return REG_OKAY; } - if (er != REG_NOMATCH) - { + if (er != REG_NOMATCH) { ERR(er); return er; } - if ((shorter) ? end == estop : end == begin) - { + if ((shorter) ? end == estop : end == begin) { /* no point in trying again */ *coldp = cold; return REG_NOMATCH; @@ -492,35 +464,37 @@ cfindloop(struct vars * v, } /* - * zapsubs - initialize the subexpression matches to "no match" + - zapsubs - initialize the subexpression matches to "no match" + ^ static VOID zapsubs(regmatch_t *, size_t); */ -static void -zapsubs(regmatch_t *p, - size_t n) +static VOID +zapsubs(p, n) +regmatch_t *p; +size_t n; { - size_t i; + size_t i; - for (i = n - 1; i > 0; i--) - { + for (i = n-1; i > 0; i--) { p[i].rm_so = -1; p[i].rm_eo = -1; } } /* - * zapmem - initialize the retry memory of a subtree to zeros + - zapmem - initialize the retry memory of a subtree to zeros + ^ static VOID zapmem(struct vars *, struct subre *); */ -static void -zapmem(struct vars * v, - struct subre * t) +static VOID +zapmem(v, t) +struct vars *v; +struct subre *t; { if (t == NULL) return; assert(v->mem != NULL); v->mem[t->retry] = 0; - if (t->op == '(') - { + if (t->op == '(') { assert(t->subno > 0); v->pmatch[t->subno].rm_so = -1; v->pmatch[t->subno].rm_eo = -1; @@ -533,18 +507,20 @@ zapmem(struct vars * v, } /* - * subset - set any subexpression relevant to a successful subre + - subset - set any subexpression relevant to a successful subre + ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); */ -static void -subset(struct vars * v, - struct subre * sub, - chr *begin, - chr *end) +static VOID +subset(v, sub, begin, end) +struct vars *v; +struct subre *sub; +chr *begin; +chr *end; { - int n = sub->subno; + int n = sub->subno; assert(n > 0); - if ((size_t) n >= v->nmatch) + if ((size_t)n >= v->nmatch) return; MDEBUG(("setting %d\n", n)); @@ -553,61 +529,64 @@ subset(struct vars * v, } /* - * dissect - determine subexpression matches (uncomplicated case) + - dissect - determine subexpression matches (uncomplicated case) + ^ static int dissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -dissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +dissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { assert(t != NULL); MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); - switch (t->op) - { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return altdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - return REG_ASSERT; - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return condissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - subset(v, t, begin, end); - return dissect(v, t->left, begin, end); - break; - default: - return REG_ASSERT; - break; + switch (t->op) { + case '=': /* terminal node */ + assert(t->left == NULL && t->right == NULL); + return REG_OKAY; /* no action, parent did the work */ + break; + case '|': /* alternation */ + assert(t->left != NULL); + return altdissect(v, t, begin, end); + break; + case 'b': /* back ref -- shouldn't be calling us! */ + return REG_ASSERT; + break; + case '.': /* concatenation */ + assert(t->left != NULL && t->right != NULL); + return condissect(v, t, begin, end); + break; + case '(': /* capturing */ + assert(t->left != NULL && t->right == NULL); + assert(t->subno > 0); + subset(v, t, begin, end); + return dissect(v, t->left, begin, end); + break; + default: + return REG_ASSERT; + break; } } /* - * condissect - determine concatenation subexpression matches (uncomplicated) + - condissect - determine concatenation subexpression matches (uncomplicated) + ^ static int condissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -condissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +condissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int i; - int shorter = (t->left->flags & SHORTER) ? 1 : 0; - chr *stop = (shorter) ? end : begin; + chr *mid; + int i; + int shorter = (t->left->flags&SHORTER) ? 1 : 0; + chr *stop = (shorter) ? end : begin; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); @@ -616,8 +595,7 @@ condissect(struct vars * v, d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); NOERR(); d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); - if (ISERR()) - { + if (ISERR()) { assert(d2 == NULL); freedfa(d); return v->err; @@ -625,12 +603,11 @@ condissect(struct vars * v, /* pick a tentative midpoint */ if (shorter) - mid = shortest(v, d, begin, begin, end, (chr **) NULL, - (int *) NULL); + mid = shortest(v, d, begin, begin, end, (chr **)NULL, + (int *)NULL); else - mid = longest(v, d, begin, end, (int *) NULL); - if (mid == NULL) - { + mid = longest(v, d, begin, end, (int *)NULL); + if (mid == NULL) { freedfa(d); freedfa(d2); return REG_ASSERT; @@ -638,11 +615,9 @@ condissect(struct vars * v, MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); /* iterate until satisfaction or failure */ - while (longest(v, d2, mid, end, (int *) NULL) != end) - { + while (longest(v, d2, mid, end, (int *)NULL) != end) { /* that midpoint didn't work, find a new one */ - if (mid == stop) - { + if (mid == stop) { /* all possibilities exhausted! */ MDEBUG(("no midpoint!\n")); freedfa(d); @@ -650,12 +625,11 @@ condissect(struct vars * v, return REG_ASSERT; } if (shorter) - mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, - (int *) NULL); + mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, + (int *)NULL); else - mid = longest(v, d, begin, mid - 1, (int *) NULL); - if (mid == NULL) - { + mid = longest(v, d, begin, mid-1, (int *)NULL); + if (mid == NULL) { /* failed to find a new one! */ MDEBUG(("failed midpoint!\n")); freedfa(d); @@ -676,168 +650,162 @@ condissect(struct vars * v, } /* - * altdissect - determine alternative subexpression matches (uncomplicated) + - altdissect - determine alternative subexpression matches (uncomplicated) + ^ static int altdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -altdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +altdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; - int i; + int i; assert(t != NULL); assert(t->op == '|'); - for (i = 0; t != NULL; t = t->right, i++) - { + for (i = 0; t != NULL; t = t->right, i++) { MDEBUG(("trying %dth\n", i)); assert(t->left != NULL && t->left->cnfa.nstates > 0); d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); if (ISERR()) return v->err; - if (longest(v, d, begin, end, (int *) NULL) == end) - { + if (longest(v, d, begin, end, (int *)NULL) == end) { MDEBUG(("success\n")); freedfa(d); return dissect(v, t->left, begin, end); } freedfa(d); } - return REG_ASSERT; /* none of them matched?!? */ + return REG_ASSERT; /* none of them matched?!? */ } /* - * cdissect - determine subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, + - cdissect - determine subexpression matches (with complications) + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". + ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -cdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +cdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { - int er; + int er; assert(t != NULL); MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); - switch (t->op) - { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return caltdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - assert(t->left == NULL && t->right == NULL); - return cbrdissect(v, t, begin, end); - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return ccondissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - er = cdissect(v, t->left, begin, end); - if (er == REG_OKAY) - subset(v, t, begin, end); - return er; - break; - default: - return REG_ASSERT; - break; + switch (t->op) { + case '=': /* terminal node */ + assert(t->left == NULL && t->right == NULL); + return REG_OKAY; /* no action, parent did the work */ + break; + case '|': /* alternation */ + assert(t->left != NULL); + return caltdissect(v, t, begin, end); + break; + case 'b': /* back ref -- shouldn't be calling us! */ + assert(t->left == NULL && t->right == NULL); + return cbrdissect(v, t, begin, end); + break; + case '.': /* concatenation */ + assert(t->left != NULL && t->right != NULL); + return ccondissect(v, t, begin, end); + break; + case '(': /* capturing */ + assert(t->left != NULL && t->right == NULL); + assert(t->subno > 0); + er = cdissect(v, t->left, begin, end); + if (er == REG_OKAY) + subset(v, t, begin, end); + return er; + break; + default: + return REG_ASSERT; + break; } } /* - * ccondissect - concatenation subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, + - ccondissect - concatenation subexpression matches (with complications) + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". + ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -ccondissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +ccondissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int er; + chr *mid; + int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); - if (t->left->flags & SHORTER) /* reverse scan */ + if (t->left->flags&SHORTER) /* reverse scan */ return crevdissect(v, t, begin, end); d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - { + if (ISERR()) { freedfa(d); return v->err; } MDEBUG(("cconcat %d\n", t->retry)); /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) - { - mid = longest(v, d, begin, end, (int *) NULL); - if (mid == NULL) - { + if (v->mem[t->retry] == 0) { + mid = longest(v, d, begin, end, (int *)NULL); + if (mid == NULL) { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; - } - else - { + } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ - for (;;) - { + for (;;) { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && - longest(v, d2, mid, end, (int *) NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) - { + longest(v, d2, mid, end, (int *)NULL) == end && + (er = cdissect(v, t->right, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ - if (mid == begin) - { + if (mid == begin) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - mid = longest(v, d, begin, mid - 1, (int *) NULL); - if (mid == NULL) - { + mid = longest(v, d, begin, mid-1, (int *)NULL); + if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); @@ -858,86 +826,79 @@ ccondissect(struct vars * v, } /* - * crevdissect - determine backref shortest-first subexpression matches - * The retry memory stores the offset of the trial midpoint from begin, + - crevdissect - determine backref shortest-first subexpression matches + * The retry memory stores the offset of the trial midpoint from begin, * plus 1 so that 0 uniquely means "clean slate". + ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -crevdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +crevdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; struct dfa *d2; - chr *mid; - int er; + chr *mid; + int er; assert(t->op == '.'); assert(t->left != NULL && t->left->cnfa.nstates > 0); assert(t->right != NULL && t->right->cnfa.nstates > 0); - assert(t->left->flags & SHORTER); + assert(t->left->flags&SHORTER); /* concatenation -- need to split the substring between parts */ d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - { + if (ISERR()) { freedfa(d); return v->err; } MDEBUG(("crev %d\n", t->retry)); /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) - { - mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); - if (mid == NULL) - { + if (v->mem[t->retry] == 0) { + mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL); + if (mid == NULL) { freedfa(d); freedfa(d2); return REG_NOMATCH; } MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); v->mem[t->retry] = (mid - begin) + 1; - } - else - { + } else { mid = begin + (v->mem[t->retry] - 1); MDEBUG(("working midpoint %ld\n", LOFF(mid))); } /* iterate until satisfaction or failure */ - for (;;) - { + for (;;) { /* try this midpoint on for size */ er = cdissect(v, t->left, begin, mid); if (er == REG_OKAY && - longest(v, d2, mid, end, (int *) NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) - { + longest(v, d2, mid, end, (int *)NULL) == end && + (er = cdissect(v, t->right, mid, end)) == + REG_OKAY) + break; /* NOTE BREAK OUT */ + if (er != REG_OKAY && er != REG_NOMATCH) { freedfa(d); freedfa(d2); return er; } /* that midpoint didn't work, find a new one */ - if (mid == end) - { + if (mid == end) { /* all possibilities exhausted */ MDEBUG(("%d no midpoint\n", t->retry)); freedfa(d); freedfa(d2); return REG_NOMATCH; } - mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); - if (mid == NULL) - { + mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL); + if (mid == NULL) { /* failed to find a new one */ MDEBUG(("%d failed midpoint\n", t->retry)); freedfa(d); @@ -958,27 +919,29 @@ crevdissect(struct vars * v, } /* - * cbrdissect - determine backref subexpression matches + - cbrdissect - determine backref subexpression matches + ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -cbrdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +cbrdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { - int i; - int n = t->subno; - size_t len; - chr *paren; - chr *p; - chr *stop; - int min = t->min; - int max = t->max; + int i; + int n = t->subno; + size_t len; + chr *paren; + chr *p; + chr *stop; + int min = t->min; + int max = t->max; assert(t != NULL); assert(t->op == 'b'); assert(n >= 0); - assert((size_t) n < v->nmatch); + assert((size_t)n < v->nmatch); MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); @@ -993,8 +956,7 @@ cbrdissect(struct vars * v, v->mem[t->retry] = 1; /* special-case zero-length string */ - if (len == 0) - { + if (len == 0) { if (begin == end) return REG_OKAY; return REG_NOMATCH; @@ -1002,44 +964,43 @@ cbrdissect(struct vars * v, /* and too-short string */ assert(end >= begin); - if ((size_t) (end - begin) < len) + if ((size_t)(end - begin) < len) return REG_NOMATCH; stop = end - len; /* count occurrences */ i = 0; - for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) - { - if ((*v->g->compare) (paren, p, len) != 0) - break; + for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { + if ((*v->g->compare)(paren, p, len) != 0) + break; i++; } MDEBUG(("cbackref found %d\n", i)); /* and sort it out */ - if (p != end) /* didn't consume all of it */ + if (p != end) /* didn't consume all of it */ return REG_NOMATCH; if (min <= i && (i <= max || max == INFINITY)) return REG_OKAY; - return REG_NOMATCH; /* out of range */ + return REG_NOMATCH; /* out of range */ } /* - * caltdissect - determine alternative subexpression matches (w. complications) + - caltdissect - determine alternative subexpression matches (w. complications) + ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); */ -static int /* regexec return code */ -caltdissect(struct vars * v, - struct subre * t, - chr *begin, /* beginning of relevant substring */ - chr *end) /* end of same */ +static int /* regexec return code */ +caltdissect(v, t, begin, end) +struct vars *v; +struct subre *t; +chr *begin; /* beginning of relevant substring */ +chr *end; /* end of same */ { struct dfa *d; - int er; - -#define UNTRIED 0 /* not yet tried at all */ -#define TRYING 1 /* top matched, trying submatches */ -#define TRIED 2 /* top didn't match or submatches - * exhausted */ + int er; +# define UNTRIED 0 /* not yet tried at all */ +# define TRYING 1 /* top matched, trying submatches */ +# define TRIED 2 /* top didn't match or submatches exhausted */ if (t == NULL) return REG_NOMATCH; @@ -1050,13 +1011,11 @@ caltdissect(struct vars * v, MDEBUG(("calt n%d\n", t->retry)); assert(t->left != NULL); - if (v->mem[t->retry] == UNTRIED) - { + if (v->mem[t->retry] == UNTRIED) { d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); if (ISERR()) return v->err; - if (longest(v, d, begin, end, (int *) NULL) != end) - { + if (longest(v, d, begin, end, (int *)NULL) != end) { freedfa(d); v->mem[t->retry] = TRIED; return caltdissect(v, t->right, begin, end); diff --git a/src/regex/regfree.c b/src/regex/regfree.c index 569bd204aa..17a73896f5 100644 --- a/src/regex/regfree.c +++ b/src/regex/regfree.c @@ -1,21 +1,21 @@ /* * regfree - free an RE * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -27,7 +27,6 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regfree.c,v 1.17 2003/08/04 00:43:21 momjian Exp $ * * * You might think that this could be incorporated into regcomp.c, and @@ -39,19 +38,16 @@ #include "regguts.h" - /* - * pg_regfree - free an RE (generic function, punts to RE-specific function) + - regfree - free an RE (generic function, punts to RE-specific function) * * Ignoring invocation with NULL is a convenience. */ -void -regfree(regex_t *re) -{ wx_regfree(re); } -void -wx_regfree(regex_t *re) +VOID +regfree(re) +regex_t *re; { if (re == NULL) return; - (*((struct fns *) re->re_fns)->free) (re); + (*((struct fns *)re->re_fns)->free)(re); } diff --git a/src/regex/regguts.h b/src/regex/regguts.h index db23fc4000..36e5092367 100644 --- a/src/regex/regguts.h +++ b/src/regex/regguts.h @@ -1,21 +1,21 @@ /* * Internal interface definitions, etc., for the reg package * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -26,8 +26,6 @@ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Id$ */ @@ -45,43 +43,63 @@ * Things that regcustom.h might override. */ +/* standard header files (NULL is a reasonable indicator for them) */ +#ifndef NULL +#include +#include +#include +#include +#include +#endif + /* assertions */ -#ifndef __WXWINCE__ #ifndef assert -#ifndef REG_DEBUG -# ifndef NDEBUG -# define NDEBUG /* no assertions */ +# ifndef REG_DEBUG +# define NDEBUG /* no assertions */ # endif -#endif #include #endif -#else -// To do: assertion on WinCE -#define assert(x) -#endif /* voids */ +#ifndef VOID +#define VOID void /* for function return values */ +#endif #ifndef DISCARD -#define DISCARD void /* for throwing values away */ +#define DISCARD VOID /* for throwing values away */ +#endif +#ifndef PVOID +#define PVOID VOID * /* generic pointer */ #endif #ifndef VS -#define VS(x) ((void *)(x)) /* cast something to generic ptr */ +#define VS(x) ((PVOID)(x)) /* cast something to generic ptr */ +#endif +#ifndef NOPARMS +#define NOPARMS VOID /* for empty parm lists */ +#endif + +/* const */ +#ifndef CONST +#define CONST const /* for old compilers, might be empty */ #endif /* function-pointer declarator */ #ifndef FUNCPTR -#define FUNCPTR(name, args) (*name) args +#if __STDC__ >= 1 +#define FUNCPTR(name, args) (*name)args +#else +#define FUNCPTR(name, args) (*name)() +#endif #endif /* memory allocation */ #ifndef MALLOC -#define MALLOC(n) malloc(n) +#define MALLOC(n) malloc(n) #endif #ifndef REALLOC -#define REALLOC(p, n) realloc(VS(p), n) +#define REALLOC(p, n) realloc(VS(p), n) #endif #ifndef FREE -#define FREE(p) free(VS(p)) +#define FREE(p) free(VS(p)) #endif /* want size of a char in bits, and max value in bounded quantifiers */ @@ -89,7 +107,7 @@ #include #endif #ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 /* normally from */ +#define _POSIX2_RE_DUP_MAX 255 /* normally from */ #endif @@ -98,13 +116,13 @@ * misc */ -#define NOTREACHED 0 -#define xxx 1 +#define NOTREACHED 0 +#define xxx 1 -#define DUPMAX _POSIX2_RE_DUP_MAX -#define INFINITY (DUPMAX+1) +#define DUPMAX _POSIX2_RE_DUP_MAX +#define INFINITY (DUPMAX+1) -#define REMAGIC 0xfed7 /* magic number for main struct */ +#define REMAGIC 0xfed7 /* magic number for main struct */ @@ -113,12 +131,12 @@ */ #ifdef REG_DEBUG /* FDEBUG does finite-state tracing */ -#define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } +#define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } /* MDEBUG does higher-level tracing */ -#define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } +#define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } #else -#define FDEBUG(arglist) {} -#define MDEBUG(arglist) {} +#define FDEBUG(arglist) {} +#define MDEBUG(arglist) {} #endif @@ -126,25 +144,24 @@ /* * bitmap manipulation */ -#define UBITS (CHAR_BIT * sizeof(unsigned)) -#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) -#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) +#define UBITS (CHAR_BIT * sizeof(unsigned)) +#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) +#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) /* - * We dissect a chr into byts for colormap table indexing. Here we define - * a byt, which will be the same as a byte on most machines... The exact + * We dissect a chr into byts for colormap table indexing. Here we define + * a byt, which will be the same as a byte on most machines... The exact * size of a byt is not critical, but about 8 bits is good, and extraction * of 8-bit chunks is sometimes especially fast. */ #ifndef BYTBITS -#define BYTBITS 8 /* bits in a byt */ +#define BYTBITS 8 /* bits in a byt */ #endif -#define BYTTAB (1<flags&FREECOL) - union tree *block; /* block of solid color, if any */ +struct colordesc { + uchr nchrs; /* number of chars of this color */ + color sub; /* open subcolor (if any); free chain ptr */ +# define NOSUB COLORLESS + struct arc *arcs; /* color chain */ + int flags; +# define FREECOL 01 /* currently free */ +# define PSEUDO 02 /* pseudocolor, no real chars */ +# define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) + union tree *block; /* block of solid color, if any */ }; /* the color map itself */ -struct colormap -{ - int magic; -#define CMMAGIC 0x876 - struct vars *v; /* for compile error reporting */ - size_t ncds; /* number of colordescs */ - size_t max; /* highest in use */ - color free; /* beginning of free chain (if non-0) */ +struct colormap { + int magic; +# define CMMAGIC 0x876 + struct vars *v; /* for compile error reporting */ + size_t ncds; /* number of colordescs */ + size_t max; /* highest in use */ + color free; /* beginning of free chain (if non-0) */ struct colordesc *cd; -#define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) -#define NINLINECDS ((size_t)10) +# define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) +# define NINLINECDS ((size_t)10) struct colordesc cdspace[NINLINECDS]; - union tree tree[NBYTS]; /* tree top, plus fill blocks */ + union tree tree[NBYTS]; /* tree top, plus fill blocks */ }; /* optimization magic to do fast chr->color mapping */ -#define B0(c) ((c) & BYTMASK) -#define B1(c) (((c)>>BYTBITS) & BYTMASK) -#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) -#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) +#define B0(c) ((c) & BYTMASK) +#define B1(c) (((c)>>BYTBITS) & BYTMASK) +#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) +#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) #if NBYTS == 1 -#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) +#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) #endif /* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ #if NBYTS == 2 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) +#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) #endif #if NBYTS == 4 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) +#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) #endif @@ -241,23 +251,22 @@ struct colormap * Interface definitions for locale-interface functions in locale.c. * Multi-character collating elements (MCCEs) cause most of the trouble. */ -struct cvec -{ - int nchrs; /* number of chrs */ - int chrspace; /* number of chrs possible */ - chr *chrs; /* pointer to vector of chrs */ - int nranges; /* number of ranges (chr pairs) */ - int rangespace; /* number of chrs possible */ - chr *ranges; /* pointer to vector of chr pairs */ - int nmcces; /* number of MCCEs */ - int mccespace; /* number of MCCEs possible */ - int nmccechrs; /* number of chrs used for MCCEs */ - chr *mcces[1]; /* pointers to 0-terminated MCCEs */ - /* and both batches of chrs are on the end */ +struct cvec { + int nchrs; /* number of chrs */ + int chrspace; /* number of chrs possible */ + chr *chrs; /* pointer to vector of chrs */ + int nranges; /* number of ranges (chr pairs) */ + int rangespace; /* number of chrs possible */ + chr *ranges; /* pointer to vector of chr pairs */ + int nmcces; /* number of MCCEs */ + int mccespace; /* number of MCCEs possible */ + int nmccechrs; /* number of chrs used for MCCEs */ + chr *mcces[1]; /* pointers to 0-terminated MCCEs */ + /* and both batches of chrs are on the end */ }; /* caution: this value cannot be changed easily */ -#define MAXMCCE 2 /* length of longest MCCE */ +#define MAXMCCE 2 /* length of longest MCCE */ @@ -269,59 +278,54 @@ struct cvec */ struct state; -struct arc -{ - int type; -#define ARCFREE '\0' - color co; - struct state *from; /* where it's from (and contained within) */ - struct state *to; /* where it's to */ - struct arc *outchain; /* *from's outs chain or free chain */ -#define freechain outchain - struct arc *inchain; /* *to's ins chain */ - struct arc *colorchain; /* color's arc chain */ +struct arc { + int type; +# define ARCFREE '\0' + color co; + struct state *from; /* where it's from (and contained within) */ + struct state *to; /* where it's to */ + struct arc *outchain; /* *from's outs chain or free chain */ +# define freechain outchain + struct arc *inchain; /* *to's ins chain */ + struct arc *colorchain; /* color's arc chain */ }; -struct arcbatch -{ /* for bulk allocation of arcs */ +struct arcbatch { /* for bulk allocation of arcs */ struct arcbatch *next; -#define ABSIZE 10 - struct arc a[ABSIZE]; +# define ABSIZE 10 + struct arc a[ABSIZE]; }; -struct state -{ - int no; -#define FREESTATE (-1) - char flag; /* marks special states */ - int nins; /* number of inarcs */ - struct arc *ins; /* chain of inarcs */ - int nouts; /* number of outarcs */ - struct arc *outs; /* chain of outarcs */ - struct arc *free; /* chain of free arcs */ - struct state *tmp; /* temporary for traversal algorithms */ - struct state *next; /* chain for traversing all */ - struct state *prev; /* back chain */ - struct arcbatch oas; /* first arcbatch, avoid malloc in easy - * case */ - int noas; /* number of arcs used in first arcbatch */ +struct state { + int no; +# define FREESTATE (-1) + char flag; /* marks special states */ + int nins; /* number of inarcs */ + struct arc *ins; /* chain of inarcs */ + int nouts; /* number of outarcs */ + struct arc *outs; /* chain of outarcs */ + struct arc *free; /* chain of free arcs */ + struct state *tmp; /* temporary for traversal algorithms */ + struct state *next; /* chain for traversing all */ + struct state *prev; /* back chain */ + struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ + int noas; /* number of arcs used in first arcbatch */ }; -struct nfa -{ - struct state *pre; /* pre-initial state */ - struct state *init; /* initial state */ - struct state *final; /* final state */ - struct state *post; /* post-final state */ - int nstates; /* for numbering states */ - struct state *states; /* state-chain header */ - struct state *slast; /* tail of the chain */ - struct state *free; /* free list */ - struct colormap *cm; /* the color map */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct vars *v; /* simplifies compile error reporting */ - struct nfa *parent; /* parent NFA, if any */ +struct nfa { + struct state *pre; /* pre-initial state */ + struct state *init; /* initial state */ + struct state *final; /* final state */ + struct state *post; /* post-final state */ + int nstates; /* for numbering states */ + struct state *states; /* state-chain header */ + struct state *slast; /* tail of the chain */ + struct state *free; /* free list */ + struct colormap *cm; /* the color map */ + color bos[2]; /* colors, if any, assigned to BOS and BOL */ + color eos[2]; /* colors, if any, assigned to EOS and EOL */ + struct vars *v; /* simplifies compile error reporting */ + struct nfa *parent; /* parent NFA, if any */ }; @@ -329,64 +333,58 @@ struct nfa /* * definitions for compacted NFA */ -struct carc -{ - color co; /* COLORLESS is list terminator */ - int to; /* state number */ +struct carc { + color co; /* COLORLESS is list terminator */ + int to; /* state number */ }; -struct cnfa -{ - int nstates; /* number of states */ - int ncolors; /* number of colors */ - int flags; -#define HASLACONS 01 /* uses lookahead constraints */ - int pre; /* setup state number */ - int post; /* teardown state number */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct carc **states; /* vector of pointers to outarc lists */ - struct carc *arcs; /* the area for the lists */ +struct cnfa { + int nstates; /* number of states */ + int ncolors; /* number of colors */ + int flags; +# define HASLACONS 01 /* uses lookahead constraints */ + int pre; /* setup state number */ + int post; /* teardown state number */ + color bos[2]; /* colors, if any, assigned to BOS and BOL */ + color eos[2]; /* colors, if any, assigned to EOS and EOL */ + struct carc **states; /* vector of pointers to outarc lists */ + struct carc *arcs; /* the area for the lists */ }; - -#define ZAPCNFA(cnfa) ((cnfa).nstates = 0) -#define NULLCNFA(cnfa) ((cnfa).nstates == 0) +#define ZAPCNFA(cnfa) ((cnfa).nstates = 0) +#define NULLCNFA(cnfa) ((cnfa).nstates == 0) /* * subexpression tree */ -struct subre -{ - char op; /* '|', '.' (concat), 'b' (backref), '(', - * '=' */ - char flags; -#define LONGER 01 /* prefers longer match */ -#define SHORTER 02 /* prefers shorter match */ -#define MIXED 04 /* mixed preference below */ -#define CAP 010 /* capturing parens below */ -#define BACKR 020 /* back reference below */ -#define INUSE 0100 /* in use in final tree */ -#define LOCAL 03 /* bits which may not propagate up */ -#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ -#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ -#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) -#define MESSY(f) ((f)&(MIXED|CAP|BACKR)) -#define PREF(f) ((f)&LOCAL) -#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) -#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) - short retry; /* index into retry memory */ - int subno; /* subexpression number (for 'b' and '(') */ - short min; /* min repetitions, for backref only */ - short max; /* max repetitions, for backref only */ - struct subre *left; /* left child, if any (also freelist - * chain) */ - struct subre *right; /* right child, if any */ - struct state *begin; /* outarcs from here... */ - struct state *end; /* ...ending in inarcs here */ - struct cnfa cnfa; /* compacted NFA, if any */ - struct subre *chain; /* for bookkeeping and error cleanup */ +struct subre { + char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */ + char flags; +# define LONGER 01 /* prefers longer match */ +# define SHORTER 02 /* prefers shorter match */ +# define MIXED 04 /* mixed preference below */ +# define CAP 010 /* capturing parens below */ +# define BACKR 020 /* back reference below */ +# define INUSE 0100 /* in use in final tree */ +# define LOCAL 03 /* bits which may not propagate up */ +# define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ +# define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ +# define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) +# define MESSY(f) ((f)&(MIXED|CAP|BACKR)) +# define PREF(f) ((f)&LOCAL) +# define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) +# define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) + short retry; /* index into retry memory */ + int subno; /* subexpression number (for 'b' and '(') */ + short min; /* min repetitions, for backref only */ + short max; /* max repetitions, for backref only */ + struct subre *left; /* left child, if any (also freelist chain) */ + struct subre *right; /* right child, if any */ + struct state *begin; /* outarcs from here... */ + struct state *end; /* ...ending in inarcs here */ + struct cnfa cnfa; /* compacted NFA, if any */ + struct subre *chain; /* for bookkeeping and error cleanup */ }; @@ -395,9 +393,8 @@ struct subre * table of function pointers for generic manipulation functions * A regex_t's re_fns points to one of these. */ -struct fns -{ - void FUNCPTR(free, (regex_t *)); +struct fns { + VOID FUNCPTR(free, (regex_t *)); }; @@ -405,18 +402,17 @@ struct fns /* * the insides of a regex_t, hidden behind a void * */ -struct guts -{ - int magic; -#define GUTSMAGIC 0xfed9 - int cflags; /* copy of compile flags */ - long info; /* copy of re_info */ - size_t nsub; /* copy of re_nsub */ +struct guts { + int magic; +# define GUTSMAGIC 0xfed9 + int cflags; /* copy of compile flags */ + long info; /* copy of re_info */ + size_t nsub; /* copy of re_nsub */ struct subre *tree; - struct cnfa search; /* for fast preliminary search */ - int ntree; + struct cnfa search; /* for fast preliminary search */ + int ntree; struct colormap cmap; - int FUNCPTR(compare, (const chr *, const chr *, size_t)); - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ + int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t)); + struct subre *lacons; /* lookahead-constraint vector */ + int nlacons; /* size of lacons */ };