]> git.saurik.com Git - wxWidgets.git/blobdiff - src/regex/regc_lex.c
Added a flag suppressing node content conversion when saving to XML.
[wxWidgets.git] / src / regex / regc_lex.c
index a24290d1a1d9e509c47f67e3df4fcea6c4cf6813..1acc3f4cae2d81444e499b558600a0a69ad82998 100644 (file)
@@ -2,21 +2,21 @@
  * lexical analyzer
  * This file is #included by regcomp.c.
  *
- * Copyright (c) 1998, 1999 Henry Spencer.     All rights reserved.
- *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ * 
  * Development of this software was funded, in part, by Cray Research Inc.,
  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
  * Corporation, none of whom are responsible for the results.  The author
- * thanks all of them.
- *
+ * thanks all of them. 
+ * 
  * Redistribution and use in source and binary forms -- with or without
  * modification -- are permitted for any purpose, provided that
  * redistributions in source form retain this entire copyright notice and
  * indicate the origin and nature of any modifications.
- *
+ * 
  * I'd appreciate being given credit for this package in the documentation
  * of software which uses it, but that is not a requirement.
- *
+ * 
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $Header$
- *
  */
 
 /* scanning macros (know about v) */
-#define ATEOS()                (v->now >= v->stop)
-#define HAVE(n)                (v->stop - v->now >= (n))
-#define NEXT1(c)       (!ATEOS() && *v->now == CHR(c))
-#define NEXT2(a,b)     (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
-#define NEXT3(a,b,c)   (HAVE(3) && *v->now == CHR(a) && \
+#define        ATEOS()         (v->now >= v->stop)
+#define        HAVE(n)         (v->stop - v->now >= (n))
+#define        NEXT1(c)        (!ATEOS() && *v->now == CHR(c))
+#define        NEXT2(a,b)      (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
+#define        NEXT3(a,b,c)    (HAVE(3) && *v->now == CHR(a) && \
                                                *(v->now+1) == CHR(b) && \
                                                *(v->now+2) == CHR(c))
-#define SET(c)         (v->nexttype = (c))
-#define SETV(c, n)     (v->nexttype = (c), v->nextvalue = (n))
-#define RET(c)         return (SET(c), 1)
-#define RETV(c, n)     return (SETV(c, n), 1)
-#define FAILW(e)       return (ERR(e), 0)      /* ERR does SET(EOS) */
-#define LASTTYPE(t) (v->lasttype == (t))
+#define        SET(c)          (v->nexttype = (c))
+#define        SETV(c, n)      (v->nexttype = (c), v->nextvalue = (n))
+#define        RET(c)          return (SET(c), 1)
+#define        RETV(c, n)      return (SETV(c, n), 1)
+#define        FAILW(e)        return (ERR(e), 0)      /* ERR does SET(EOS) */
+#define        LASTTYPE(t)     (v->lasttype == (t))
 
 /* lexical contexts */
-#define L_ERE  1                               /* mainline ERE/ARE */
-#define L_BRE  2                               /* mainline BRE */
-#define L_Q 3                                  /* REG_QUOTE */
-#define L_EBND 4                               /* ERE/ARE bound */
-#define L_BBND 5                               /* BRE bound */
-#define L_BRACK 6                              /* brackets */
-#define L_CEL  7                               /* collating element */
-#define L_ECL  8                               /* equivalence class */
-#define L_CCL  9                               /* character class */
-#define INTOCON(c)     (v->lexcon = (c))
-#define INCON(con)     (v->lexcon == (con))
+#define        L_ERE   1       /* mainline ERE/ARE */
+#define        L_BRE   2       /* mainline BRE */
+#define        L_Q     3       /* REG_QUOTE */
+#define        L_EBND  4       /* ERE/ARE bound */
+#define        L_BBND  5       /* BRE bound */
+#define        L_BRACK 6       /* brackets */
+#define        L_CEL   7       /* collating element */
+#define        L_ECL   8       /* equivalence class */
+#define        L_CCL   9       /* character class */
+#define        INTOCON(c)      (v->lexcon = (c))
+#define        INCON(con)      (v->lexcon == (con))
 
 /* construct pointer past end of chr array */
-#define ENDOF(array)   ((array) + sizeof(array)/sizeof(chr))
+#define        ENDOF(array)    ((array) + sizeof(array)/sizeof(chr))
 
 /*
- * lexstart - set up lexical stuff, scan leading options
+ - lexstart - set up lexical stuff, scan leading options
+ ^ static VOID lexstart(struct vars *);
  */
-static void
-lexstart(struct vars * v)
+static VOID
+lexstart(v)
+struct vars *v;
 {
-       prefixes(v);                            /* may turn on new type bits etc. */
+       prefixes(v);                    /* may turn on new type bits etc. */
        NOERR();
 
-       if (v->cflags & REG_QUOTE)
-       {
-               assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE)));
+       if (v->cflags&REG_QUOTE) {
+               assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
                INTOCON(L_Q);
-       }
-       else if (v->cflags & REG_EXTENDED)
-       {
-               assert(!(v->cflags & REG_QUOTE));
+       } else if (v->cflags&REG_EXTENDED) {
+               assert(!(v->cflags&REG_QUOTE));
                INTOCON(L_ERE);
-       }
-       else
-       {
-               assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
+       } else {
+               assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
                INTOCON(L_BRE);
        }
 
        v->nexttype = EMPTY;            /* remember we were at the start */
-       next(v);                                        /* set up the first token */
+       next(v);                        /* set up the first token */
 }
 
 /*
- * prefixes - implement various special prefixes
+ - prefixes - implement various special prefixes
+ ^ static VOID prefixes(struct vars *);
  */
-static void
-prefixes(struct vars * v)
+static VOID
+prefixes(v)
+struct vars *v;
 {
        /* literal string doesn't get any of this stuff */
-       if (v->cflags & REG_QUOTE)
+       if (v->cflags&REG_QUOTE)
                return;
 
-       /* initial "***" gets special things */
+       /* initial "***" gets special things */ 
        if (HAVE(4) && NEXT3('*', '*', '*'))
-               switch (*(v->now + 3))
-               {
-                       case CHR('?'):          /* "***?" error, msg shows version */
-                               ERR(REG_BADPAT);
-                               return;                 /* proceed no further */
-                               break;
-                       case CHR('='):          /* "***=" shifts to literal string */
-                               NOTE(REG_UNONPOSIX);
-                               v->cflags |= REG_QUOTE;
-                               v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE);
-                               v->now += 4;
-                               return;                 /* and there can be no more prefixes */
-                               break;
-                       case CHR(':'):          /* "***:" shifts to AREs */
-                               NOTE(REG_UNONPOSIX);
-                               v->cflags |= REG_ADVANCED;
-                               v->now += 4;
-                               break;
-                       default:                        /* otherwise *** is just an error */
-                               ERR(REG_BADRPT);
-                               return;
-                               break;
+               switch (*(v->now + 3)) {
+               case CHR('?'):          /* "***?" error, msg shows version */
+                       ERR(REG_BADPAT);
+                       return;         /* proceed no further */
+                       break;
+               case CHR('='):          /* "***=" shifts to literal string */
+                       NOTE(REG_UNONPOSIX);
+                       v->cflags |= REG_QUOTE;
+                       v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
+                       v->now += 4;
+                       return;         /* and there can be no more prefixes */
+                       break;
+               case CHR(':'):          /* "***:" shifts to AREs */
+                       NOTE(REG_UNONPOSIX);
+                       v->cflags |= REG_ADVANCED;
+                       v->now += 4;
+                       break;
+               default:                /* otherwise *** is just an error */
+                       ERR(REG_BADRPT);
+                       return;
+                       break;
                }
 
        /* BREs and EREs don't get embedded options */
-       if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
+       if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
                return;
 
        /* embedded options (AREs only) */
-       if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
-       {
+       if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
                NOTE(REG_UNONPOSIX);
                v->now += 2;
                for (; !ATEOS() && iscalpha(*v->now); v->now++)
-                       switch (*v->now)
-                       {
-                               case CHR('b'):  /* BREs (but why???) */
-                                       v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
-                                       break;
-                               case CHR('c'):  /* case sensitive */
-                                       v->cflags &= ~REG_ICASE;
-                                       break;
-                               case CHR('e'):  /* plain EREs */
-                                       v->cflags |= REG_EXTENDED;
-                                       v->cflags &= ~(REG_ADVF | REG_QUOTE);
-                                       break;
-                               case CHR('i'):  /* case insensitive */
-                                       v->cflags |= REG_ICASE;
-                                       break;
-                               case CHR('m'):  /* Perloid synonym for n */
-                               case CHR('n'):  /* \n affects ^ $ . [^ */
-                                       v->cflags |= REG_NEWLINE;
-                                       break;
-                               case CHR('p'):  /* ~Perl, \n affects . [^ */
-                                       v->cflags |= REG_NLSTOP;
-                                       v->cflags &= ~REG_NLANCH;
-                                       break;
-                               case CHR('q'):  /* literal string */
-                                       v->cflags |= REG_QUOTE;
-                                       v->cflags &= ~REG_ADVANCED;
-                                       break;
-                               case CHR('s'):  /* single line, \n ordinary */
-                                       v->cflags &= ~REG_NEWLINE;
-                                       break;
-                               case CHR('t'):  /* tight syntax */
-                                       v->cflags &= ~REG_EXPANDED;
-                                       break;
-                               case CHR('w'):  /* weird, \n affects ^ $ only */
-                                       v->cflags &= ~REG_NLSTOP;
-                                       v->cflags |= REG_NLANCH;
-                                       break;
-                               case CHR('x'):  /* expanded syntax */
-                                       v->cflags |= REG_EXPANDED;
-                                       break;
-                               default:
-                                       ERR(REG_BADOPT);
-                                       return;
+                       switch (*v->now) {
+                       case CHR('b'):          /* BREs (but why???) */
+                               v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
+                               break;
+                       case CHR('c'):          /* case sensitive */
+                               v->cflags &= ~REG_ICASE;
+                               break;
+                       case CHR('e'):          /* plain EREs */
+                               v->cflags |= REG_EXTENDED;
+                               v->cflags &= ~(REG_ADVF|REG_QUOTE);
+                               break;
+                       case CHR('i'):          /* case insensitive */
+                               v->cflags |= REG_ICASE;
+                               break;
+                       case CHR('m'):          /* Perloid synonym for n */
+                       case CHR('n'):          /* \n affects ^ $ . [^ */
+                               v->cflags |= REG_NEWLINE;
+                               break;
+                       case CHR('p'):          /* ~Perl, \n affects . [^ */
+                               v->cflags |= REG_NLSTOP;
+                               v->cflags &= ~REG_NLANCH;
+                               break;
+                       case CHR('q'):          /* literal string */
+                               v->cflags |= REG_QUOTE;
+                               v->cflags &= ~REG_ADVANCED;
+                               break;
+                       case CHR('s'):          /* single line, \n ordinary */
+                               v->cflags &= ~REG_NEWLINE;
+                               break;
+                       case CHR('t'):          /* tight syntax */
+                               v->cflags &= ~REG_EXPANDED;
+                               break;
+                       case CHR('w'):          /* weird, \n affects ^ $ only */
+                               v->cflags &= ~REG_NLSTOP;
+                               v->cflags |= REG_NLANCH;
+                               break;
+                       case CHR('x'):          /* expanded syntax */
+                               v->cflags |= REG_EXPANDED;
+                               break;
+                       default:
+                               ERR(REG_BADOPT);
+                               return;
                        }
-               if (!NEXT1(')'))
-               {
+               if (!NEXT1(')')) {
                        ERR(REG_BADOPT);
                        return;
                }
                v->now++;
-               if (v->cflags & REG_QUOTE)
-                       v->cflags &= ~(REG_EXPANDED | REG_NEWLINE);
+               if (v->cflags&REG_QUOTE)
+                       v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
        }
 }
 
 /*
- * lexnest - "call a subroutine", interpolating string at the lexical level
- *
+ - lexnest - "call a subroutine", interpolating string at the lexical level
  * Note, this is not a very general facility.  There are a number of
  * implicit assumptions about what sorts of strings can be subroutines.
+ ^ static VOID lexnest(struct vars *, chr *, chr *);
  */
-static void
-lexnest(struct vars * v,
-               chr *beginp,                    /* start of interpolation */
-               chr *endp)                              /* one past end of interpolation */
+static VOID
+lexnest(v, beginp, endp)
+struct vars *v;
+chr *beginp;                           /* start of interpolation */
+chr *endp;                             /* one past end of interpolation */
 {
-       assert(v->savenow == NULL); /* only one level of nesting */
+       assert(v->savenow == NULL);     /* only one level of nesting */
        v->savenow = v->now;
        v->savestop = v->stop;
        v->now = beginp;
@@ -214,124 +208,123 @@ lexnest(struct vars * v,
 /*
  * string constants to interpolate as expansions of things like \d
  */
-static chr     backd[] = {                     /* \d */
+static chr backd[] = {         /* \d */
        CHR('['), CHR('['), CHR(':'),
        CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
        CHR(':'), CHR(']'), CHR(']')
 };
-static chr     backD[] = {                     /* \D */
+static chr backD[] = {         /* \D */
        CHR('['), CHR('^'), CHR('['), CHR(':'),
        CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
        CHR(':'), CHR(']'), CHR(']')
 };
-static chr     brbackd[] = {           /* \d within brackets */
+static chr brbackd[] = {       /* \d within brackets */
        CHR('['), CHR(':'),
        CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
        CHR(':'), CHR(']')
 };
-static chr     backs[] = {                     /* \s */
+static chr backs[] = {         /* \s */
        CHR('['), CHR('['), CHR(':'),
        CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
        CHR(':'), CHR(']'), CHR(']')
 };
-static chr     backS[] = {                     /* \S */
+static chr backS[] = {         /* \S */
        CHR('['), CHR('^'), CHR('['), CHR(':'),
        CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
        CHR(':'), CHR(']'), CHR(']')
 };
-static chr     brbacks[] = {           /* \s within brackets */
+static chr brbacks[] = {       /* \s within brackets */
        CHR('['), CHR(':'),
        CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
        CHR(':'), CHR(']')
 };
-static chr     backw[] = {                     /* \w */
+static chr backw[] = {         /* \w */
        CHR('['), CHR('['), CHR(':'),
        CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
        CHR(':'), CHR(']'), CHR('_'), CHR(']')
 };
-static chr     backW[] = {                     /* \W */
+static chr backW[] = {         /* \W */
        CHR('['), CHR('^'), CHR('['), CHR(':'),
        CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
        CHR(':'), CHR(']'), CHR('_'), CHR(']')
 };
-static chr     brbackw[] = {           /* \w within brackets */
+static chr brbackw[] = {       /* \w within brackets */
        CHR('['), CHR(':'),
        CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
        CHR(':'), CHR(']'), CHR('_')
 };
 
 /*
* lexword - interpolate a bracket expression for word characters
- lexword - interpolate a bracket expression for word characters
  * Possibly ought to inquire whether there is a "word" character class.
+ ^ static VOID lexword(struct vars *);
  */
-static void
-lexword(struct vars * v)
+static VOID
+lexword(v)
+struct vars *v;
 {
        lexnest(v, backw, ENDOF(backw));
 }
 
 /*
- * next - get next token
+ - next - get next token
+ ^ static int next(struct vars *);
  */
-static int                                             /* 1 normal, 0 failure */
-next(struct vars * v)
+static int                     /* 1 normal, 0 failure */
+next(v)
+struct vars *v;
 {
-       chr                     c;
+       chr c;
 
        /* errors yield an infinite sequence of failures */
        if (ISERR())
-               return 0;                               /* the error has set nexttype to EOS */
+               return 0;       /* the error has set nexttype to EOS */
 
        /* remember flavor of last token */
        v->lasttype = v->nexttype;
 
        /* REG_BOSONLY */
-       if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
-       {
+       if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
                /* at start of a REG_BOSONLY RE */
                RETV(SBEGIN, 0);                /* same as \A */
        }
 
        /* if we're nested and we've hit end, return to outer level */
-       if (v->savenow != NULL && ATEOS())
-       {
+       if (v->savenow != NULL && ATEOS()) {
                v->now = v->savenow;
                v->stop = v->savestop;
                v->savenow = v->savestop = NULL;
        }
 
        /* skip white space etc. if appropriate (not in literal or []) */
-       if (v->cflags & REG_EXPANDED)
-               switch (v->lexcon)
-               {
-                       case L_ERE:
-                       case L_BRE:
-                       case L_EBND:
-                       case L_BBND:
-                               skip(v);
-                               break;
+       if (v->cflags&REG_EXPANDED)
+               switch (v->lexcon) {
+               case L_ERE:
+               case L_BRE:
+               case L_EBND:
+               case L_BBND:
+                       skip(v);
+                       break;
                }
 
        /* handle EOS, depending on context */
-       if (ATEOS())
-       {
-               switch (v->lexcon)
-               {
-                       case L_ERE:
-                       case L_BRE:
-                       case L_Q:
-                               RET(EOS);
-                               break;
-                       case L_EBND:
-                       case L_BBND:
-                               FAILW(REG_EBRACE);
-                               break;
-                       case L_BRACK:
-                       case L_CEL:
-                       case L_ECL:
-                       case L_CCL:
-                               FAILW(REG_EBRACK);
-                               break;
+       if (ATEOS()) {
+               switch (v->lexcon) {
+               case L_ERE:
+               case L_BRE:
+               case L_Q:
+                       RET(EOS);
+                       break;
+               case L_EBND:
+               case L_BBND:
+                       FAILW(REG_EBRACE);
+                       break;
+               case L_BRACK:
+               case L_CEL:
+               case L_ECL:
+               case L_CCL:
+                       FAILW(REG_EBRACK);
+                       break;
                }
                assert(NOTREACHED);
        }
@@ -340,365 +333,314 @@ next(struct vars * v)
        c = *v->now++;
 
        /* deal with the easy contexts, punt EREs to code below */
-       switch (v->lexcon)
-       {
-               case L_BRE:                             /* punt BREs to separate function */
-                       return brenext(v, c);
+       switch (v->lexcon) {
+       case L_BRE:                     /* punt BREs to separate function */
+               return brenext(v, c);
+               break;
+       case L_ERE:                     /* see below */
+               break;
+       case L_Q:                       /* literal strings are easy */
+               RETV(PLAIN, c);
+               break;
+       case L_BBND:                    /* bounds are fairly simple */
+       case L_EBND:
+               switch (c) {
+               case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+               case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+               case CHR('8'): case CHR('9'):
+                       RETV(DIGIT, (chr)DIGITVAL(c));
                        break;
-               case L_ERE:                             /* see below */
+               case CHR(','):
+                       RET(',');
                        break;
-               case L_Q:                               /* literal strings are easy */
-                       RETV(PLAIN, c);
+               case CHR('}'):          /* ERE bound ends with } */
+                       if (INCON(L_EBND)) {
+                               INTOCON(L_ERE);
+                               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                                       v->now++;
+                                       NOTE(REG_UNONPOSIX);
+                                       RETV('}', 0);
+                               }
+                               RETV('}', 1);
+                       } else
+                               FAILW(REG_BADBR);
                        break;
-               case L_BBND:                    /* bounds are fairly simple */
-               case L_EBND:
-                       switch (c)
-                       {
-                               case CHR('0'):
-                               case CHR('1'):
-                               case CHR('2'):
-                               case CHR('3'):
-                               case CHR('4'):
-                               case CHR('5'):
-                               case CHR('6'):
-                               case CHR('7'):
-                               case CHR('8'):
-                               case CHR('9'):
-                                       RETV(DIGIT, (chr) DIGITVAL(c));
-                                       break;
-                               case CHR(','):
-                                       RET(',');
-                                       break;
-                               case CHR('}'):  /* ERE bound ends with } */
-                                       if (INCON(L_EBND))
-                                       {
-                                               INTOCON(L_ERE);
-                                               if ((v->cflags & REG_ADVF) && NEXT1('?'))
-                                               {
-                                                       v->now++;
-                                                       NOTE(REG_UNONPOSIX);
-                                                       RETV('}', 0);
-                                               }
-                                               RETV('}', 1);
-                                       }
-                                       else
-                                               FAILW(REG_BADBR);
-                                       break;
-                               case CHR('\\'): /* BRE bound ends with \} */
-                                       if (INCON(L_BBND) && NEXT1('}'))
-                                       {
-                                               v->now++;
-                                               INTOCON(L_BRE);
-                                               RET('}');
-                                       }
-                                       else
-                                               FAILW(REG_BADBR);
-                                       break;
-                               default:
-                                       FAILW(REG_BADBR);
-                                       break;
+               case CHR('\\'):         /* BRE bound ends with \} */
+                       if (INCON(L_BBND) && NEXT1('}')) {
+                               v->now++;
+                               INTOCON(L_BRE);
+                               RET('}');
+                       } else
+                               FAILW(REG_BADBR);
+                       break;
+               default:
+                       FAILW(REG_BADBR);
+                       break;
+               }
+               assert(NOTREACHED);
+               break;
+       case L_BRACK:                   /* brackets are not too hard */
+               switch (c) {
+               case CHR(']'):
+                       if (LASTTYPE('['))
+                               RETV(PLAIN, c);
+                       else {
+                               INTOCON((v->cflags&REG_EXTENDED) ?
+                                                       L_ERE : L_BRE);
+                               RET(']');
                        }
-                       assert(NOTREACHED);
                        break;
-               case L_BRACK:                   /* brackets are not too hard */
-                       switch (c)
-                       {
-                               case CHR(']'):
-                                       if (LASTTYPE('['))
-                                               RETV(PLAIN, c);
-                                       else
-                                       {
-                                               INTOCON((v->cflags & REG_EXTENDED) ?
-                                                               L_ERE : L_BRE);
-                                               RET(']');
-                                       }
-                                       break;
-                               case CHR('\\'):
-                                       NOTE(REG_UBBS);
-                                       if (!(v->cflags & REG_ADVF))
-                                               RETV(PLAIN, c);
-                                       NOTE(REG_UNONPOSIX);
-                                       if (ATEOS())
-                                               FAILW(REG_EESCAPE);
-                                       (DISCARD) lexescape(v);
-                                       switch (v->nexttype)
-                                       {                       /* not all escapes okay here */
-                                               case PLAIN:
-                                                       return 1;
-                                                       break;
-                                               case CCLASS:
-                                                       switch (v->nextvalue)
-                                                       {
-                                                               case 'd':
-                                                                       lexnest(v, brbackd, ENDOF(brbackd));
-                                                                       break;
-                                                               case 's':
-                                                                       lexnest(v, brbacks, ENDOF(brbacks));
-                                                                       break;
-                                                               case 'w':
-                                                                       lexnest(v, brbackw, ENDOF(brbackw));
-                                                                       break;
-                                                               default:
-                                                                       FAILW(REG_EESCAPE);
-                                                                       break;
-                                                       }
-                                                       /* lexnest done, back up and try again */
-                                                       v->nexttype = v->lasttype;
-                                                       return next(v);
-                                                       break;
-                                       }
-                                       /* not one of the acceptable escapes */
-                                       FAILW(REG_EESCAPE);
+               case CHR('\\'):
+                       NOTE(REG_UBBS);
+                       if (!(v->cflags&REG_ADVF))
+                               RETV(PLAIN, c);
+                       NOTE(REG_UNONPOSIX);
+                       if (ATEOS())
+                               FAILW(REG_EESCAPE);
+                       (DISCARD)lexescape(v);
+                       switch (v->nexttype) {  /* not all escapes okay here */
+                       case PLAIN:
+                               return 1;
+                               break;
+                       case CCLASS:
+                               switch (v->nextvalue) {
+                               case 'd':
+                                       lexnest(v, brbackd, ENDOF(brbackd));
                                        break;
-                               case CHR('-'):
-                                       if (LASTTYPE('[') || NEXT1(']'))
-                                               RETV(PLAIN, c);
-                                       else
-                                               RETV(RANGE, c);
+                               case 's':
+                                       lexnest(v, brbacks, ENDOF(brbacks));
                                        break;
-                               case CHR('['):
-                                       if (ATEOS())
-                                               FAILW(REG_EBRACK);
-                                       switch (*v->now++)
-                                       {
-                                               case CHR('.'):
-                                                       INTOCON(L_CEL);
-                                                       /* might or might not be locale-specific */
-                                                       RET(COLLEL);
-                                                       break;
-                                               case CHR('='):
-                                                       INTOCON(L_ECL);
-                                                       NOTE(REG_ULOCALE);
-                                                       RET(ECLASS);
-                                                       break;
-                                               case CHR(':'):
-                                                       INTOCON(L_CCL);
-                                                       NOTE(REG_ULOCALE);
-                                                       RET(CCLASS);
-                                                       break;
-                                               default:                /* oops */
-                                                       v->now--;
-                                                       RETV(PLAIN, c);
-                                                       break;
-                                       }
-                                       assert(NOTREACHED);
+                               case 'w':
+                                       lexnest(v, brbackw, ENDOF(brbackw));
                                        break;
                                default:
-                                       RETV(PLAIN, c);
+                                       FAILW(REG_EESCAPE);
                                        break;
+                               }
+                               /* lexnest done, back up and try again */
+                               v->nexttype = v->lasttype;
+                               return next(v);
+                               break;
                        }
-                       assert(NOTREACHED);
+                       /* not one of the acceptable escapes */
+                       FAILW(REG_EESCAPE);
                        break;
-               case L_CEL:                             /* collating elements are easy */
-                       if (c == CHR('.') && NEXT1(']'))
-                       {
-                               v->now++;
-                               INTOCON(L_BRACK);
-                               RETV(END, '.');
-                       }
-                       else
+               case CHR('-'):
+                       if (LASTTYPE('[') || NEXT1(']'))
                                RETV(PLAIN, c);
-                       break;
-               case L_ECL:                             /* ditto equivalence classes */
-                       if (c == CHR('=') && NEXT1(']'))
-                       {
-                               v->now++;
-                               INTOCON(L_BRACK);
-                               RETV(END, '=');
-                       }
                        else
-                               RETV(PLAIN, c);
+                               RETV(RANGE, c);
                        break;
-               case L_CCL:                             /* ditto character classes */
-                       if (c == CHR(':') && NEXT1(']'))
-                       {
-                               v->now++;
-                               INTOCON(L_BRACK);
-                               RETV(END, ':');
-                       }
-                       else
+               case CHR('['):
+                       if (ATEOS())
+                               FAILW(REG_EBRACK);
+                       switch (*v->now++) {
+                       case CHR('.'):
+                               INTOCON(L_CEL);
+                               /* might or might not be locale-specific */
+                               RET(COLLEL);
+                               break;
+                       case CHR('='):
+                               INTOCON(L_ECL);
+                               NOTE(REG_ULOCALE);
+                               RET(ECLASS);
+                               break;
+                       case CHR(':'):
+                               INTOCON(L_CCL);
+                               NOTE(REG_ULOCALE);
+                               RET(CCLASS);
+                               break;
+                       default:                        /* oops */
+                               v->now--;
                                RETV(PLAIN, c);
+                               break;
+                       }
+                       assert(NOTREACHED);
                        break;
                default:
-                       assert(NOTREACHED);
+                       RETV(PLAIN, c);
                        break;
+               }
+               assert(NOTREACHED);
+               break;
+       case L_CEL:                     /* collating elements are easy */
+               if (c == CHR('.') && NEXT1(']')) {
+                       v->now++;
+                       INTOCON(L_BRACK);
+                       RETV(END, '.');
+               } else
+                       RETV(PLAIN, c);
+               break;
+       case L_ECL:                     /* ditto equivalence classes */
+               if (c == CHR('=') && NEXT1(']')) {
+                       v->now++;
+                       INTOCON(L_BRACK);
+                       RETV(END, '=');
+               } else
+                       RETV(PLAIN, c);
+               break;
+       case L_CCL:                     /* ditto character classes */
+               if (c == CHR(':') && NEXT1(']')) {
+                       v->now++;
+                       INTOCON(L_BRACK);
+                       RETV(END, ':');
+               } else
+                       RETV(PLAIN, c);
+               break;
+       default:
+               assert(NOTREACHED);
+               break;
        }
 
        /* that got rid of everything except EREs and AREs */
        assert(INCON(L_ERE));
 
        /* deal with EREs and AREs, except for backslashes */
-       switch (c)
-       {
-               case CHR('|'):
-                       RET('|');
-                       break;
-               case CHR('*'):
-                       if ((v->cflags & REG_ADVF) && NEXT1('?'))
-                       {
-                               v->now++;
-                               NOTE(REG_UNONPOSIX);
-                               RETV('*', 0);
-                       }
-                       RETV('*', 1);
-                       break;
-               case CHR('+'):
-                       if ((v->cflags & REG_ADVF) && NEXT1('?'))
-                       {
-                               v->now++;
-                               NOTE(REG_UNONPOSIX);
-                               RETV('+', 0);
-                       }
-                       RETV('+', 1);
-                       break;
-               case CHR('?'):
-                       if ((v->cflags & REG_ADVF) && NEXT1('?'))
-                       {
-                               v->now++;
-                               NOTE(REG_UNONPOSIX);
-                               RETV('?', 0);
-                       }
-                       RETV('?', 1);
-                       break;
-               case CHR('{'):                  /* bounds start or plain character */
-                       if (v->cflags & REG_EXPANDED)
-                               skip(v);
-                       if (ATEOS() || !iscdigit(*v->now))
-                       {
-                               NOTE(REG_UBRACES);
-                               NOTE(REG_UUNSPEC);
-                               RETV(PLAIN, c);
-                       }
-                       else
-                       {
-                               NOTE(REG_UBOUNDS);
-                               INTOCON(L_EBND);
-                               RET('{');
+       switch (c) {
+       case CHR('|'):
+               RET('|');
+               break;
+       case CHR('*'):
+               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                       v->now++;
+                       NOTE(REG_UNONPOSIX);
+                       RETV('*', 0);
+               }
+               RETV('*', 1);
+               break;
+       case CHR('+'):
+               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                       v->now++;
+                       NOTE(REG_UNONPOSIX);
+                       RETV('+', 0);
+               }
+               RETV('+', 1);
+               break;
+       case CHR('?'):
+               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                       v->now++;
+                       NOTE(REG_UNONPOSIX);
+                       RETV('?', 0);
+               }
+               RETV('?', 1);
+               break;
+       case CHR('{'):          /* bounds start or plain character */
+               if (v->cflags&REG_EXPANDED)
+                       skip(v);
+               if (ATEOS() || !iscdigit(*v->now)) {
+                       NOTE(REG_UBRACES);
+                       NOTE(REG_UUNSPEC);
+                       RETV(PLAIN, c);
+               } else {
+                       NOTE(REG_UBOUNDS);
+                       INTOCON(L_EBND);
+                       RET('{');
+               }
+               assert(NOTREACHED);
+               break;
+       case CHR('('):          /* parenthesis, or advanced extension */
+               if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+                       NOTE(REG_UNONPOSIX);
+                       v->now++;
+                       switch (*v->now++) {
+                       case CHR(':'):          /* non-capturing paren */
+                               RETV('(', 0);
+                               break;
+                       case CHR('#'):          /* comment */
+                               while (!ATEOS() && *v->now != CHR(')'))
+                                       v->now++;
+                               if (!ATEOS())
+                                       v->now++;
+                               assert(v->nexttype == v->lasttype);
+                               return next(v);
+                               break;
+                       case CHR('='):          /* positive lookahead */
+                               NOTE(REG_ULOOKAHEAD);
+                               RETV(LACON, 1);
+                               break;
+                       case CHR('!'):          /* negative lookahead */
+                               NOTE(REG_ULOOKAHEAD);
+                               RETV(LACON, 0);
+                               break;
+                       default:
+                               FAILW(REG_BADRPT);
+                               break;
                        }
                        assert(NOTREACHED);
-                       break;
-               case CHR('('):                  /* parenthesis, or advanced extension */
-                       if ((v->cflags & REG_ADVF) && NEXT1('?'))
-                       {
-                               NOTE(REG_UNONPOSIX);
-                               v->now++;
-                               switch (*v->now++)
-                               {
-                                       case CHR(':'):          /* non-capturing paren */
-                                               RETV('(', 0);
-                                               break;
-                                       case CHR('#'):          /* comment */
-                                               while (!ATEOS() && *v->now != CHR(')'))
-                                                       v->now++;
-                                               if (!ATEOS())
-                                                       v->now++;
-                                               assert(v->nexttype == v->lasttype);
-                                               return next(v);
-                                               break;
-                                       case CHR('='):          /* positive lookahead */
-                                               NOTE(REG_ULOOKAHEAD);
-                                               RETV(LACON, 1);
-                                               break;
-                                       case CHR('!'):          /* negative lookahead */
-                                               NOTE(REG_ULOOKAHEAD);
-                                               RETV(LACON, 0);
-                                               break;
-                                       default:
-                                               FAILW(REG_BADRPT);
-                                               break;
-                               }
-                               assert(NOTREACHED);
-                       }
-                       if (v->cflags & REG_NOSUB)
-                               RETV('(', 0);   /* all parens non-capturing */
-                       else
-                               RETV('(', 1);
-                       break;
-               case CHR(')'):
-                       if (LASTTYPE('('))
-                               NOTE(REG_UUNSPEC);
-                       RETV(')', c);
-                       break;
-               case CHR('['):                  /* easy except for [[:<:]] and [[:>:]] */
-                       if (HAVE(6) && *(v->now + 0) == CHR('[') &&
-                               *(v->now + 1) == CHR(':') &&
-                               (*(v->now + 2) == CHR('<') ||
-                                *(v->now + 2) == CHR('>')) &&
-                               *(v->now + 3) == CHR(':') &&
-                               *(v->now + 4) == CHR(']') &&
-                               *(v->now + 5) == CHR(']'))
-                       {
-                               c = *(v->now + 2);
-                               v->now += 6;
-                               NOTE(REG_UNONPOSIX);
-                               RET((c == CHR('<')) ? '<' : '>');
-                       }
-                       INTOCON(L_BRACK);
-                       if (NEXT1('^'))
-                       {
-                               v->now++;
-                               RETV('[', 0);
-                       }
-                       RETV('[', 1);
-                       break;
-               case CHR('.'):
-                       RET('.');
-                       break;
-               case CHR('^'):
-                       RET('^');
-                       break;
-               case CHR('$'):
-                       RET('$');
-                       break;
-               case CHR('\\'): /* mostly punt backslashes to code below */
-                       if (ATEOS())
-                               FAILW(REG_EESCAPE);
-                       break;
-               default:                                /* ordinary character */
-                       RETV(PLAIN, c);
-                       break;
+               }
+               if (v->cflags&REG_NOSUB)
+                       RETV('(', 0);           /* all parens non-capturing */
+               else
+                       RETV('(', 1);
+               break;
+       case CHR(')'):
+               if (LASTTYPE('(')) {
+                       NOTE(REG_UUNSPEC);
+               }
+               RETV(')', c);
+               break;
+       case CHR('['):          /* easy except for [[:<:]] and [[:>:]] */
+               if (HAVE(6) &&  *(v->now+0) == CHR('[') &&
+                               *(v->now+1) == CHR(':') &&
+                               (*(v->now+2) == CHR('<') ||
+                                               *(v->now+2) == CHR('>')) &&
+                               *(v->now+3) == CHR(':') &&
+                               *(v->now+4) == CHR(']') &&
+                               *(v->now+5) == CHR(']')) {
+                       c = *(v->now+2);
+                       v->now += 6;
+                       NOTE(REG_UNONPOSIX);
+                       RET((c == CHR('<')) ? '<' : '>');
+               }
+               INTOCON(L_BRACK);
+               if (NEXT1('^')) {
+                       v->now++;
+                       RETV('[', 0);
+               }
+               RETV('[', 1);
+               break;
+       case CHR('.'):
+               RET('.');
+               break;
+       case CHR('^'):
+               RET('^');
+               break;
+       case CHR('$'):
+               RET('$');
+               break;
+       case CHR('\\'):         /* mostly punt backslashes to code below */
+               if (ATEOS())
+                       FAILW(REG_EESCAPE);
+               break;
+       default:                /* ordinary character */
+               RETV(PLAIN, c);
+               break;
        }
 
        /* ERE/ARE backslash handling; backslash already eaten */
        assert(!ATEOS());
-       if (!(v->cflags & REG_ADVF))
-       {                                                       /* only AREs have non-trivial escapes */
-               if (iscalnum(*v->now))
-               {
+       if (!(v->cflags&REG_ADVF)) {    /* only AREs have non-trivial escapes */
+               if (iscalnum(*v->now)) {
                        NOTE(REG_UBSALNUM);
                        NOTE(REG_UUNSPEC);
                }
                RETV(PLAIN, *v->now++);
        }
-       (DISCARD) lexescape(v);
+       (DISCARD)lexescape(v);
        if (ISERR())
                FAILW(REG_EESCAPE);
-       if (v->nexttype == CCLASS)
-       {                                                       /* fudge at lexical level */
-               switch (v->nextvalue)
-               {
-                       case 'd':
-                               lexnest(v, backd, ENDOF(backd));
-                               break;
-                       case 'D':
-                               lexnest(v, backD, ENDOF(backD));
-                               break;
-                       case 's':
-                               lexnest(v, backs, ENDOF(backs));
-                               break;
-                       case 'S':
-                               lexnest(v, backS, ENDOF(backS));
-                               break;
-                       case 'w':
-                               lexnest(v, backw, ENDOF(backw));
-                               break;
-                       case 'W':
-                               lexnest(v, backW, ENDOF(backW));
-                               break;
-                       default:
-                               assert(NOTREACHED);
-                               FAILW(REG_ASSERT);
-                               break;
+       if (v->nexttype == CCLASS) {    /* fudge at lexical level */
+               switch (v->nextvalue) {
+               case 'd':       lexnest(v, backd, ENDOF(backd)); break;
+               case 'D':       lexnest(v, backD, ENDOF(backD)); break;
+               case 's':       lexnest(v, backs, ENDOF(backs)); break;
+               case 'S':       lexnest(v, backS, ENDOF(backS)); break;
+               case 'w':       lexnest(v, backw, ENDOF(backw)); break;
+               case 'W':       lexnest(v, backW, ENDOF(backW)); break;
+               default:
+                       assert(NOTREACHED);
+                       FAILW(REG_ASSERT);
+                       break;
                }
                /* lexnest done, back up and try again */
                v->nexttype = v->lasttype;
@@ -709,23 +651,24 @@ next(struct vars * v)
 }
 
 /*
* lexescape - parse an ARE backslash escape (backslash already eaten)
- lexescape - parse an ARE backslash escape (backslash already eaten)
  * Note slightly nonstandard use of the CCLASS type code.
+ ^ static int lexescape(struct vars *);
  */
-static int                                             /* not actually used, but convenient for
-                                                                * RETV */
-lexescape(struct vars * v)
+static int                     /* not actually used, but convenient for RETV */
+lexescape(v)
+struct vars *v;
 {
-       chr                     c;
-       static chr      alert[] = {
+       chr c;
+       static chr alert[] = {
                CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
        };
-       static chr      esc[] = {
+       static chr esc[] = {
                CHR('E'), CHR('S'), CHR('C')
        };
-       chr                *save;
+       chr *save;
 
-       assert(v->cflags & REG_ADVF);
+       assert(v->cflags&REG_ADVF);
 
        assert(!ATEOS());
        c = *v->now++;
@@ -733,295 +676,255 @@ lexescape(struct vars * v)
                RETV(PLAIN, c);
 
        NOTE(REG_UNONPOSIX);
-       switch (c)
-       {
-               case CHR('a'):
-                       RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
-                       break;
-               case CHR('A'):
-                       RETV(SBEGIN, 0);
-                       break;
-               case CHR('b'):
-                       RETV(PLAIN, CHR('\b'));
-                       break;
-               case CHR('B'):
-                       RETV(PLAIN, CHR('\\'));
-                       break;
-               case CHR('c'):
-                       NOTE(REG_UUNPORT);
-                       if (ATEOS())
-                               FAILW(REG_EESCAPE);
-                       RETV(PLAIN, (chr) (*v->now++ & 037));
-                       break;
-               case CHR('d'):
-                       NOTE(REG_ULOCALE);
-                       RETV(CCLASS, 'd');
-                       break;
-               case CHR('D'):
-                       NOTE(REG_ULOCALE);
-                       RETV(CCLASS, 'D');
-                       break;
-               case CHR('e'):
-                       NOTE(REG_UUNPORT);
-                       RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
-                       break;
-               case CHR('f'):
-                       RETV(PLAIN, CHR('\f'));
-                       break;
-               case CHR('m'):
-                       RET('<');
-                       break;
-               case CHR('M'):
-                       RET('>');
-                       break;
-               case CHR('n'):
-                       RETV(PLAIN, CHR('\n'));
-                       break;
-               case CHR('r'):
-                       RETV(PLAIN, CHR('\r'));
-                       break;
-               case CHR('s'):
-                       NOTE(REG_ULOCALE);
-                       RETV(CCLASS, 's');
-                       break;
-               case CHR('S'):
-                       NOTE(REG_ULOCALE);
-                       RETV(CCLASS, 'S');
-                       break;
-               case CHR('t'):
-                       RETV(PLAIN, CHR('\t'));
-                       break;
-               case CHR('u'):
-                       c = lexdigits(v, 16, 4, 4);
-                       if (ISERR())
-                               FAILW(REG_EESCAPE);
-                       RETV(PLAIN, c);
-                       break;
-               case CHR('U'):
-                       c = lexdigits(v, 16, 8, 8);
-                       if (ISERR())
-                               FAILW(REG_EESCAPE);
-                       RETV(PLAIN, c);
-                       break;
-               case CHR('v'):
-                       RETV(PLAIN, CHR('\v'));
-                       break;
-               case CHR('w'):
-                       NOTE(REG_ULOCALE);
-                       RETV(CCLASS, 'w');
-                       break;
-               case CHR('W'):
-                       NOTE(REG_ULOCALE);
-                       RETV(CCLASS, 'W');
-                       break;
-               case CHR('x'):
-                       NOTE(REG_UUNPORT);
-                       c = lexdigits(v, 16, 1, 255);           /* REs >255 long outside
-                                                                                                * spec */
-                       if (ISERR())
-                               FAILW(REG_EESCAPE);
-                       RETV(PLAIN, c);
-                       break;
-               case CHR('y'):
-                       NOTE(REG_ULOCALE);
-                       RETV(WBDRY, 0);
-                       break;
-               case CHR('Y'):
-                       NOTE(REG_ULOCALE);
-                       RETV(NWBDRY, 0);
-                       break;
-               case CHR('Z'):
-                       RETV(SEND, 0);
-                       break;
-               case CHR('1'):
-               case CHR('2'):
-               case CHR('3'):
-               case CHR('4'):
-               case CHR('5'):
-               case CHR('6'):
-               case CHR('7'):
-               case CHR('8'):
-               case CHR('9'):
-                       save = v->now;
-                       v->now--;                       /* put first digit back */
-                       c = lexdigits(v, 10, 1, 255);           /* REs >255 long outside
-                                                                                                * spec */
-                       if (ISERR())
-                               FAILW(REG_EESCAPE);
-                       /* ugly heuristic (first test is "exactly 1 digit?") */
-                       if (v->now - save == 0 || (int) c <= v->nsubexp)
-                       {
-                               NOTE(REG_UBACKREF);
-                               RETV(BACKREF, (chr) c);
-                       }
-                       /* oops, doesn't look like it's a backref after all... */
-                       v->now = save;
-                       /* and fall through into octal number */
-               case CHR('0'):
-                       NOTE(REG_UUNPORT);
-                       v->now--;                       /* put first digit back */
-                       c = lexdigits(v, 8, 1, 3);
-                       if (ISERR())
-                               FAILW(REG_EESCAPE);
-                       RETV(PLAIN, c);
-                       break;
-               default:
-                       assert(iscalpha(c));
-                       FAILW(REG_EESCAPE); /* unknown alphabetic escape */
-                       break;
+       switch (c) {
+       case CHR('a'):
+               RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
+               break;
+       case CHR('A'):
+               RETV(SBEGIN, 0);
+               break;
+       case CHR('b'):
+               RETV(PLAIN, CHR('\b'));
+               break;
+       case CHR('B'):
+               RETV(PLAIN, CHR('\\'));
+               break;
+       case CHR('c'):
+               NOTE(REG_UUNPORT);
+               if (ATEOS())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, (chr)(*v->now++ & 037));
+               break;
+       case CHR('d'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'd');
+               break;
+       case CHR('D'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'D');
+               break;
+       case CHR('e'):
+               NOTE(REG_UUNPORT);
+               RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
+               break;
+       case CHR('f'):
+               RETV(PLAIN, CHR('\f'));
+               break;
+       case CHR('m'):
+               RET('<');
+               break;
+       case CHR('M'):
+               RET('>');
+               break;
+       case CHR('n'):
+               RETV(PLAIN, CHR('\n'));
+               break;
+       case CHR('r'):
+               RETV(PLAIN, CHR('\r'));
+               break;
+       case CHR('s'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 's');
+               break;
+       case CHR('S'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'S');
+               break;
+       case CHR('t'):
+               RETV(PLAIN, CHR('\t'));
+               break;
+       case CHR('u'):
+               c = lexdigits(v, 16, 4, 4);
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, c);
+               break;
+       case CHR('U'):
+               c = lexdigits(v, 16, 8, 8);
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, c);
+               break;
+       case CHR('v'):
+               RETV(PLAIN, CHR('\v'));
+               break;
+       case CHR('w'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'w');
+               break;
+       case CHR('W'):
+               NOTE(REG_ULOCALE);
+               RETV(CCLASS, 'W');
+               break;
+       case CHR('x'):
+               NOTE(REG_UUNPORT);
+               c = lexdigits(v, 16, 1, 255);   /* REs >255 long outside spec */
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, c);
+               break;
+       case CHR('y'):
+               NOTE(REG_ULOCALE);
+               RETV(WBDRY, 0);
+               break;
+       case CHR('Y'):
+               NOTE(REG_ULOCALE);
+               RETV(NWBDRY, 0);
+               break;
+       case CHR('Z'):
+               RETV(SEND, 0);
+               break;
+       case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+       case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+       case CHR('9'):
+               save = v->now;
+               v->now--;       /* put first digit back */
+               c = lexdigits(v, 10, 1, 255);   /* REs >255 long outside spec */
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               /* ugly heuristic (first test is "exactly 1 digit?") */
+               if (v->now - save == 0 || (int)c <= v->nsubexp) {
+                       NOTE(REG_UBACKREF);
+                       RETV(BACKREF, (chr)c);
+               }
+               /* oops, doesn't look like it's a backref after all... */
+               v->now = save;
+               /* and fall through into octal number */
+       case CHR('0'):
+               NOTE(REG_UUNPORT);
+               v->now--;       /* put first digit back */
+               c = lexdigits(v, 8, 1, 3);
+               if (ISERR())
+                       FAILW(REG_EESCAPE);
+               RETV(PLAIN, c);
+               break;
+       default:
+               assert(iscalpha(c));
+               FAILW(REG_EESCAPE);     /* unknown alphabetic escape */
+               break;
        }
        assert(NOTREACHED);
 }
 
 /*
- * lexdigits - slurp up digits and return chr value
+ - lexdigits - slurp up digits and return chr value
+ ^ static chr lexdigits(struct vars *, int, int, int);
  */
-static chr                                             /* chr value; errors signalled via ERR */
-lexdigits(struct vars * v,
-                 int base,
-                 int minlen,
-                 int maxlen)
+static chr                     /* chr value; errors signalled via ERR */
+lexdigits(v, base, minlen, maxlen)
+struct vars *v;
+int base;
+int minlen;
+int maxlen;
 {
-       uchr            n;                              /* unsigned to avoid overflow misbehavior */
-       int                     len;
-       chr                     c;
-       int                     d;
-       const uchr      ub = (uchr) base;
+       uchr n;                 /* unsigned to avoid overflow misbehavior */
+       int len;
+       chr c;
+       int d;
+       CONST uchr ub = (uchr) base;
 
        n = 0;
-       for (len = 0; len < maxlen && !ATEOS(); len++)
-       {
+       for (len = 0; len < maxlen && !ATEOS(); len++) {
                c = *v->now++;
-               switch (c)
-               {
-                       case CHR('0'):
-                       case CHR('1'):
-                       case CHR('2'):
-                       case CHR('3'):
-                       case CHR('4'):
-                       case CHR('5'):
-                       case CHR('6'):
-                       case CHR('7'):
-                       case CHR('8'):
-                       case CHR('9'):
-                               d = DIGITVAL(c);
-                               break;
-                       case CHR('a'):
-                       case CHR('A'):
-                               d = 10;
-                               break;
-                       case CHR('b'):
-                       case CHR('B'):
-                               d = 11;
-                               break;
-                       case CHR('c'):
-                       case CHR('C'):
-                               d = 12;
-                               break;
-                       case CHR('d'):
-                       case CHR('D'):
-                               d = 13;
-                               break;
-                       case CHR('e'):
-                       case CHR('E'):
-                               d = 14;
-                               break;
-                       case CHR('f'):
-                       case CHR('F'):
-                               d = 15;
-                               break;
-                       default:
-                               v->now--;               /* oops, not a digit at all */
-                               d = -1;
-                               break;
+               switch (c) {
+               case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+               case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+               case CHR('8'): case CHR('9'):
+                       d = DIGITVAL(c);
+                       break;
+               case CHR('a'): case CHR('A'): d = 10; break;
+               case CHR('b'): case CHR('B'): d = 11; break;
+               case CHR('c'): case CHR('C'): d = 12; break;
+               case CHR('d'): case CHR('D'): d = 13; break;
+               case CHR('e'): case CHR('E'): d = 14; break;
+               case CHR('f'): case CHR('F'): d = 15; break;
+               default:
+                       v->now--;       /* oops, not a digit at all */
+                       d = -1;
+                       break;
                }
 
-               if (d >= base)
-               {                                               /* not a plausible digit */
+               if (d >= base) {        /* not a plausible digit */
                        v->now--;
                        d = -1;
                }
                if (d < 0)
-                       break;                          /* NOTE BREAK OUT */
-               n = n * ub + (uchr) d;
+                       break;          /* NOTE BREAK OUT */
+               n = n*ub + (uchr)d;
        }
        if (len < minlen)
                ERR(REG_EESCAPE);
 
-       return (chr) n;
+       return (chr)n;
 }
 
 /*
- * brenext - get next BRE token
- *
+ - brenext - get next BRE token
  * This is much like EREs except for all the stupid backslashes and the
  * context-dependency of some things.
+ ^ static int brenext(struct vars *, pchr);
  */
-static int                                             /* 1 normal, 0 failure */
-brenext(struct vars * v,
-               chr pc)
+static int                     /* 1 normal, 0 failure */
+brenext(v, pc)
+struct vars *v;
+pchr pc;
 {
-       chr                     c = (chr) pc;
+       chr c = (chr)pc;
 
-       switch (c)
-       {
-               case CHR('*'):
-                       if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
-                               RETV(PLAIN, c);
-                       RET('*');
-                       break;
-               case CHR('['):
-                       if (HAVE(6) && *(v->now + 0) == CHR('[') &&
-                               *(v->now + 1) == CHR(':') &&
-                               (*(v->now + 2) == CHR('<') ||
-                                *(v->now + 2) == CHR('>')) &&
-                               *(v->now + 3) == CHR(':') &&
-                               *(v->now + 4) == CHR(']') &&
-                               *(v->now + 5) == CHR(']'))
-                       {
-                               c = *(v->now + 2);
-                               v->now += 6;
-                               NOTE(REG_UNONPOSIX);
-                               RET((c == CHR('<')) ? '<' : '>');
-                       }
-                       INTOCON(L_BRACK);
-                       if (NEXT1('^'))
-                       {
-                               v->now++;
-                               RETV('[', 0);
-                       }
-                       RETV('[', 1);
-                       break;
-               case CHR('.'):
-                       RET('.');
-                       break;
-               case CHR('^'):
-                       if (LASTTYPE(EMPTY))
-                               RET('^');
-                       if (LASTTYPE('('))
-                       {
-                               NOTE(REG_UUNSPEC);
-                               RET('^');
-                       }
-                       RETV(PLAIN, c);
-                       break;
-               case CHR('$'):
-                       if (v->cflags & REG_EXPANDED)
-                               skip(v);
-                       if (ATEOS())
-                               RET('$');
-                       if (NEXT2('\\', ')'))
-                       {
-                               NOTE(REG_UUNSPEC);
-                               RET('$');
-                       }
+       switch (c) {
+       case CHR('*'):
+               if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
                        RETV(PLAIN, c);
-                       break;
-               case CHR('\\'):
-                       break;                          /* see below */
-               default:
-                       RETV(PLAIN, c);
-                       break;
+               RET('*');
+               break;
+       case CHR('['):
+               if (HAVE(6) &&  *(v->now+0) == CHR('[') &&
+                               *(v->now+1) == CHR(':') &&
+                               (*(v->now+2) == CHR('<') ||
+                                               *(v->now+2) == CHR('>')) &&
+                               *(v->now+3) == CHR(':') &&
+                               *(v->now+4) == CHR(']') &&
+                               *(v->now+5) == CHR(']')) {
+                       c = *(v->now+2);
+                       v->now += 6;
+                       NOTE(REG_UNONPOSIX);
+                       RET((c == CHR('<')) ? '<' : '>');
+               }
+               INTOCON(L_BRACK);
+               if (NEXT1('^')) {
+                       v->now++;
+                       RETV('[', 0);
+               }
+               RETV('[', 1);
+               break;
+       case CHR('.'):
+               RET('.');
+               break;
+       case CHR('^'):
+               if (LASTTYPE(EMPTY))
+                       RET('^');
+               if (LASTTYPE('(')) {
+                       NOTE(REG_UUNSPEC);
+                       RET('^');
+               }
+               RETV(PLAIN, c);
+               break;
+       case CHR('$'):
+               if (v->cflags&REG_EXPANDED)
+                       skip(v);
+               if (ATEOS())
+                       RET('$');
+               if (NEXT2('\\', ')')) {
+                       NOTE(REG_UUNSPEC);
+                       RET('$');
+               }
+               RETV(PLAIN, c);
+               break;
+       case CHR('\\'):
+               break;          /* see below */
+       default:
+               RETV(PLAIN, c);
+               break;
        }
 
        assert(c == CHR('\\'));
@@ -1030,64 +933,57 @@ brenext(struct vars * v,
                FAILW(REG_EESCAPE);
 
        c = *v->now++;
-       switch (c)
-       {
-               case CHR('{'):
-                       INTOCON(L_BBND);
-                       NOTE(REG_UBOUNDS);
-                       RET('{');
-                       break;
-               case CHR('('):
-                       RETV('(', 1);
-                       break;
-               case CHR(')'):
-                       RETV(')', c);
-                       break;
-               case CHR('<'):
-                       NOTE(REG_UNONPOSIX);
-                       RET('<');
-                       break;
-               case CHR('>'):
-                       NOTE(REG_UNONPOSIX);
-                       RET('>');
-                       break;
-               case CHR('1'):
-               case CHR('2'):
-               case CHR('3'):
-               case CHR('4'):
-               case CHR('5'):
-               case CHR('6'):
-               case CHR('7'):
-               case CHR('8'):
-               case CHR('9'):
-                       NOTE(REG_UBACKREF);
-                       RETV(BACKREF, (chr) DIGITVAL(c));
-                       break;
-               default:
-                       if (iscalnum(c))
-                       {
-                               NOTE(REG_UBSALNUM);
-                               NOTE(REG_UUNSPEC);
-                       }
-                       RETV(PLAIN, c);
-                       break;
+       switch (c) {
+       case CHR('{'):
+               INTOCON(L_BBND);
+               NOTE(REG_UBOUNDS);
+               RET('{');
+               break;
+       case CHR('('):
+               RETV('(', 1);
+               break;
+       case CHR(')'):
+               RETV(')', c);
+               break;
+       case CHR('<'):
+               NOTE(REG_UNONPOSIX);
+               RET('<');
+               break;
+       case CHR('>'):
+               NOTE(REG_UNONPOSIX);
+               RET('>');
+               break;
+       case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+       case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+       case CHR('9'):
+               NOTE(REG_UBACKREF);
+               RETV(BACKREF, (chr)DIGITVAL(c));
+               break;
+       default:
+               if (iscalnum(c)) {
+                       NOTE(REG_UBSALNUM);
+                       NOTE(REG_UUNSPEC);
+               }
+               RETV(PLAIN, c);
+               break;
        }
 
        assert(NOTREACHED);
 }
 
 /*
- * skip - skip white space and comments in expanded form
+ - skip - skip white space and comments in expanded form
+ ^ static VOID skip(struct vars *);
  */
-static void
-skip(struct vars * v)
+static VOID
+skip(v)
+struct vars *v;
 {
-       chr                *start = v->now;
+       chr *start = v->now;
 
-       assert(v->cflags & REG_EXPANDED);
+       assert(v->cflags&REG_EXPANDED);
 
-       for (;;)
-       {
+       for (;;) {
                while (!ATEOS() && iscspace(*v->now))
                        v->now++;
                if (ATEOS() || *v->now != CHR('#'))
@@ -1103,31 +999,50 @@ skip(struct vars * v)
 }
 
 /*
- * newline - return the chr for a newline
- *
+ - newline - return the chr for a newline
  * This helps confine use of CHR to this source file.
+ ^ static chr newline(NOPARMS);
  */
 static chr
-newline(void)
+newline()
 {
        return CHR('\n');
 }
 
 /*
- * chrnamed - return the chr known by a given (chr string) name
- *
+ - ch - return the chr sequence for regc_locale.c's fake collating element ch
+ * This helps confine use of CHR to this source file.  Beware that the caller
+ * knows how long the sequence is.
+ ^ #ifdef REG_DEBUG
+ ^ static chr *ch(NOPARMS);
+ ^ #endif
+ */
+#ifdef REG_DEBUG
+static chr *
+ch()
+{
+       static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
+
+       return chstr;
+}
+#endif
+
+/*
+ - chrnamed - return the chr known by a given (chr string) name
  * The code is a bit clumsy, but this routine gets only such specialized
  * use that it hardly matters.
+ ^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
  */
 static chr
-chrnamed(struct vars * v,
-                chr *startp,                   /* start of name */
-                chr *endp,                             /* just past end of name */
-                chr lastresort)                /* what to return if name lookup fails */
+chrnamed(v, startp, endp, lastresort)
+struct vars *v;
+chr *startp;                   /* start of name */
+chr *endp;                     /* just past end of name */
+pchr lastresort;               /* what to return if name lookup fails */
 {
-       celt            c;
-       int                     errsave;
-       int                     e;
+       celt c;
+       int errsave;
+       int e;
        struct cvec *cv;
 
        errsave = v->err;
@@ -1137,10 +1052,10 @@ chrnamed(struct vars * v,
        v->err = errsave;
 
        if (e != 0)
-               return (chr) lastresort;
+               return (chr)lastresort;
 
        cv = range(v, c, c, 0);
        if (cv->nchrs == 0)
-               return (chr) lastresort;
+               return (chr)lastresort;
        return cv->chrs[0];
 }