regex/FreeBSD/regcomp.c

   1 /*-
   2  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
   3  * Copyright (c) 1992, 1993, 1994
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * This code is derived from software contributed to Berkeley by
   7  * Henry Spencer.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 3. All advertising materials mentioning features or use of this software
  18  *    must display the following acknowledgement:
  19  *      This product includes software developed by the University of
  20  *      California, Berkeley and its contributors.
  21  * 4. Neither the name of the University nor the names of its contributors
  22  *    may be used to endorse or promote products derived from this software
  23  *    without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  35  * SUCH DAMAGE.
  36  *
  37  *      @(#)regcomp.c   8.5 (Berkeley) 3/20/94
  38  */
  39
  40 #if defined(LIBC_SCCS) && !defined(lint)
  41 static char sccsid[] = "@(#)regcomp.c   8.5 (Berkeley) 3/20/94";
  42 #endif /* LIBC_SCCS and not lint */
  43 #include <sys/cdefs.h>
  44 __FBSDID("$FreeBSD: src/lib/libc/regex/regcomp.c,v 1.30 2003/02/16 17:29:10 nectar Exp $");
  45
  46 #include <sys/types.h>
  47 #include <stdio.h>
  48 #include <string.h>
  49 #include <ctype.h>
  50 #include <limits.h>
  51 #include <stdlib.h>
  52 #include <regex.h>
  53
  54 #include "collate.h"
  55
  56 #include "utils.h"
  57 #include "regex2.h"
  58
  59 #include "cclass.h"
  60 #include "cname.h"
  61
  62 /*
  63  * parse structure, passed up and down to avoid global variables and
  64  * other clumsinesses
  65  */
  66 struct parse {
  67         char *next;             /* next character in RE */
  68         char *end;              /* end of string (-> NUL normally) */
  69         int error;              /* has an error been seen? */
  70         sop *strip;             /* malloced strip */
  71         sopno ssize;            /* malloced strip size (allocated) */
  72         sopno slen;             /* malloced strip length (used) */
  73         int ncsalloc;           /* number of csets allocated */
  74         struct re_guts *g;
  75 #       define  NPAREN  10      /* we need to remember () 1-9 for back refs */
  76         sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
  77         sopno pend[NPAREN];     /* -> ) ([0] unused) */
  78 };
  79
  80 /* ========= begin header generated by ./mkh ========= */
  81 #ifdef __cplusplus
  82 extern "C" {
  83 #endif
  84
  85 /* === regcomp.c === */
  86 static void p_ere(struct parse *p, int stop);
  87 static void p_ere_exp(struct parse *p);
  88 static void p_str(struct parse *p);
  89 static void p_bre(struct parse *p, int end1, int end2);
  90 static int p_simp_re(struct parse *p, int starordinary);
  91 static int p_count(struct parse *p);
  92 static void p_bracket(struct parse *p);
  93 static void p_b_term(struct parse *p, cset *cs);
  94 static void p_b_cclass(struct parse *p, cset *cs);
  95 static void p_b_eclass(struct parse *p, cset *cs);
  96 static char p_b_symbol(struct parse *p);
  97 static char p_b_coll_elem(struct parse *p, int endc);
  98 static char othercase(int ch);
  99 static void bothcases(struct parse *p, int ch);
 100 static void ordinary(struct parse *p, int ch);
 101 static void nonnewline(struct parse *p);
 102 static void repeat(struct parse *p, sopno start, int from, int to);
 103 static int seterr(struct parse *p, int e);
 104 static cset *allocset(struct parse *p);
 105 static void freeset(struct parse *p, cset *cs);
 106 static int freezeset(struct parse *p, cset *cs);
 107 static int firstch(struct parse *p, cset *cs);
 108 static int nch(struct parse *p, cset *cs);
 109 static void mcadd(struct parse *p, cset *cs, char *cp) __unused;
 110 #if used
 111 static void mcsub(cset *cs, char *cp);
 112 static int mcin(cset *cs, char *cp);
 113 static char *mcfind(cset *cs, char *cp);
 114 #endif
 115 static void mcinvert(struct parse *p, cset *cs);
 116 static void mccase(struct parse *p, cset *cs);
 117 static int isinsets(struct re_guts *g, int c);
 118 static int samesets(struct re_guts *g, int c1, int c2);
 119 static void categorize(struct parse *p, struct re_guts *g);
 120 static sopno dupl(struct parse *p, sopno start, sopno finish);
 121 static void doemit(struct parse *p, sop op, size_t opnd);
 122 static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
 123 static void dofwd(struct parse *p, sopno pos, sop value);
 124 static void enlarge(struct parse *p, sopno size);
 125 static void stripsnug(struct parse *p, struct re_guts *g);
 126 static void findmust(struct parse *p, struct re_guts *g);
 127 static int altoffset(sop *scan, int offset, int mccs);
 128 static void computejumps(struct parse *p, struct re_guts *g);
 129 static void computematchjumps(struct parse *p, struct re_guts *g);
 130 static sopno pluscount(struct parse *p, struct re_guts *g);
 131
 132 #ifdef __cplusplus
 133 }
 134 #endif
 135 /* ========= end header generated by ./mkh ========= */
 136
 137 static char nuls[10];           /* place to point scanner in event of error */
 138
 139 /*
 140  * macros for use with parse structure
 141  * BEWARE:  these know that the parse structure is named `p' !!!
 142  */
 143 #define PEEK()  (*p->next)
 144 #define PEEK2() (*(p->next+1))
 145 #define MORE()  (p->next < p->end)
 146 #define MORE2() (p->next+1 < p->end)
 147 #define SEE(c)  (MORE() && PEEK() == (c))
 148 #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
 149 #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
 150 #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
 151 #define NEXT()  (p->next++)
 152 #define NEXT2() (p->next += 2)
 153 #define NEXTn(n)        (p->next += (n))
 154 #define GETNEXT()       (*p->next++)
 155 #define SETERROR(e)     seterr(p, (e))
 156 #define REQUIRE(co, e)  ((co) || SETERROR(e))
 157 #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
 158 #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
 159 #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
 160 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
 161 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
 162 #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
 163 #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
 164 #define HERE()          (p->slen)
 165 #define THERE()         (p->slen - 1)
 166 #define THERETHERE()    (p->slen - 2)
 167 #define DROP(n) (p->slen -= (n))
 168
 169 #ifndef NDEBUG
 170 static int never = 0;           /* for use in asserts; shuts lint up */
 171 #else
 172 #define never   0               /* some <assert.h>s have bugs too */
 173 #endif
 174
 175 /* Macro used by computejump()/computematchjump() */
 176 #define MIN(a,b)        ((a)<(b)?(a):(b))
 177
 178 /*
 179  - regcomp - interface for parser and compilation
 180  = extern int regcomp(regex_t *, const char *, int);
 181  = #define      REG_BASIC       0000
 182  = #define      REG_EXTENDED    0001
 183  = #define      REG_ICASE       0002
 184  = #define      REG_NOSUB       0004
 185  = #define      REG_NEWLINE     0010
 186  = #define      REG_NOSPEC      0020
 187  = #define      REG_PEND        0040
 188  = #define      REG_DUMP        0200
 189  */
 190 int                             /* 0 success, otherwise REG_something */
 191 regcomp(preg, pattern, cflags)
 192 regex_t * __restrict preg;
 193 const char * __restrict pattern;
 194 int cflags;
 195 {
 196         struct parse pa;
 197         struct re_guts *g;
 198         struct parse *p = &pa;
 199         int i;
 200         size_t len;
 201 #ifdef REDEBUG
 202 #       define  GOODFLAGS(f)    (f)
 203 #else
 204 #       define  GOODFLAGS(f)    ((f)&~REG_DUMP)
 205 #endif
 206
 207         cflags = GOODFLAGS(cflags);
 208         if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
 209                 return(REG_INVARG);
 210
 211         if (cflags&REG_PEND) {
 212                 if (preg->re_endp < pattern)
 213                         return(REG_INVARG);
 214                 len = preg->re_endp - pattern;
 215         } else
 216                 len = strlen((char *)pattern);
 217
 218         /* do the mallocs early so failure handling is easy */
 219         g = (struct re_guts *)malloc(sizeof(struct re_guts) +
 220                                                         (NC-1)*sizeof(cat_t));
 221         if (g == NULL)
 222                 return(REG_ESPACE);
 223         p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
 224         p->strip = (sop *)malloc(p->ssize * sizeof(sop));
 225         p->slen = 0;
 226         if (p->strip == NULL) {
 227                 free((char *)g);
 228                 return(REG_ESPACE);
 229         }
 230
 231         /* set things up */
 232         p->g = g;
 233         p->next = (char *)pattern;      /* convenience; we do not modify it */
 234         p->end = p->next + len;
 235         p->error = 0;
 236         p->ncsalloc = 0;
 237         for (i = 0; i < NPAREN; i++) {
 238                 p->pbegin[i] = 0;
 239                 p->pend[i] = 0;
 240         }
 241         g->csetsize = NC;
 242         g->sets = NULL;
 243         g->setbits = NULL;
 244         g->ncsets = 0;
 245         g->cflags = cflags;
 246         g->iflags = 0;
 247         g->nbol = 0;
 248         g->neol = 0;
 249         g->must = NULL;
 250         g->moffset = -1;
 251         g->charjump = NULL;
 252         g->matchjump = NULL;
 253         g->mlen = 0;
 254         g->nsub = 0;
 255         g->ncategories = 1;     /* category 0 is "everything else" */
 256         g->categories = &g->catspace[-(CHAR_MIN)];
 257         (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
 258         g->backrefs = 0;
 259
 260         /* do it */
 261         EMIT(OEND, 0);
 262         g->firststate = THERE();
 263         if (cflags&REG_EXTENDED)
 264                 p_ere(p, OUT);
 265         else if (cflags&REG_NOSPEC)
 266                 p_str(p);
 267         else
 268                 p_bre(p, OUT, OUT);
 269         EMIT(OEND, 0);
 270         g->laststate = THERE();
 271
 272         /* tidy up loose ends and fill things in */
 273         categorize(p, g);
 274         stripsnug(p, g);
 275         findmust(p, g);
 276         /* only use Boyer-Moore algorithm if the pattern is bigger
 277          * than three characters
 278          */
 279         if(g->mlen > 3) {
 280                 computejumps(p, g);
 281                 computematchjumps(p, g);
 282                 if(g->matchjump == NULL && g->charjump != NULL) {
 283                         free(g->charjump);
 284                         g->charjump = NULL;
 285                 }
 286         }
 287         g->nplus = pluscount(p, g);
 288         g->magic = MAGIC2;
 289         preg->re_nsub = g->nsub;
 290         preg->re_g = g;
 291         preg->re_magic = MAGIC1;
 292 #ifndef REDEBUG
 293         /* not debugging, so can't rely on the assert() in regexec() */
 294         if (g->iflags&BAD)
 295                 SETERROR(REG_ASSERT);
 296 #endif
 297
 298         /* win or lose, we're done */
 299         if (p->error != 0)      /* lose */
 300                 regfree(preg);
 301         return(p->error);
 302 }
 303
 304 /*
 305  - p_ere - ERE parser top level, concatenation and alternation
 306  == static void p_ere(struct parse *p, int stop);
 307  */
 308 static void
 309 p_ere(p, stop)
 310 struct parse *p;
 311 int stop;                       /* character this ERE should end at */
 312 {
 313         char c;
 314         sopno prevback;
 315         sopno prevfwd;
 316         sopno conc;
 317         int first = 1;          /* is this the first alternative? */
 318
 319         for (;;) {
 320                 /* do a bunch of concatenated expressions */
 321                 conc = HERE();
 322                 while (MORE() && (c = PEEK()) != '|' && c != stop)
 323                         p_ere_exp(p);
 324                 (void)REQUIRE(HERE() != conc, REG_EMPTY);       /* require nonempty */
 325
 326                 if (!EAT('|'))
 327                         break;          /* NOTE BREAK OUT */
 328
 329                 if (first) {
 330                         INSERT(OCH_, conc);     /* offset is wrong */
 331                         prevfwd = conc;
 332                         prevback = conc;
 333                         first = 0;
 334                 }
 335                 ASTERN(OOR1, prevback);
 336                 prevback = THERE();
 337                 AHEAD(prevfwd);                 /* fix previous offset */
 338                 prevfwd = HERE();
 339                 EMIT(OOR2, 0);                  /* offset is very wrong */
 340         }
 341
 342         if (!first) {           /* tail-end fixups */
 343                 AHEAD(prevfwd);
 344                 ASTERN(O_CH, prevback);
 345         }
 346
 347         assert(!MORE() || SEE(stop));
 348 }
 349
 350 /*
 351  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
 352  == static void p_ere_exp(struct parse *p);
 353  */
 354 static void
 355 p_ere_exp(p)
 356 struct parse *p;
 357 {
 358         char c;
 359         sopno pos;
 360         int count;
 361         int count2;
 362         sopno subno;
 363         int wascaret = 0;
 364
 365         assert(MORE());         /* caller should have ensured this */
 366         c = GETNEXT();
 367
 368         pos = HERE();
 369         switch (c) {
 370         case '(':
 371                 (void)REQUIRE(MORE(), REG_EPAREN);
 372                 p->g->nsub++;
 373                 subno = p->g->nsub;
 374                 if (subno < NPAREN)
 375                         p->pbegin[subno] = HERE();
 376                 EMIT(OLPAREN, subno);
 377                 if (!SEE(')'))
 378                         p_ere(p, ')');
 379                 if (subno < NPAREN) {
 380                         p->pend[subno] = HERE();
 381                         assert(p->pend[subno] != 0);
 382                 }
 383                 EMIT(ORPAREN, subno);
 384                 (void)MUSTEAT(')', REG_EPAREN);
 385                 break;
 386 #ifndef POSIX_MISTAKE
 387         case ')':               /* happens only if no current unmatched ( */
 388                 /*
 389                  * You may ask, why the ifndef?  Because I didn't notice
 390                  * this until slightly too late for 1003.2, and none of the
 391                  * other 1003.2 regular-expression reviewers noticed it at
 392                  * all.  So an unmatched ) is legal POSIX, at least until
 393                  * we can get it fixed.
 394                  */
 395                 SETERROR(REG_EPAREN);
 396                 break;
 397 #endif
 398         case '^':
 399                 EMIT(OBOL, 0);
 400                 p->g->iflags |= USEBOL;
 401                 p->g->nbol++;
 402                 wascaret = 1;
 403                 break;
 404         case '$':
 405                 EMIT(OEOL, 0);
 406                 p->g->iflags |= USEEOL;
 407                 p->g->neol++;
 408                 break;
 409         case '|':
 410                 SETERROR(REG_EMPTY);
 411                 break;
 412         case '*':
 413         case '+':
 414         case '?':
 415                 SETERROR(REG_BADRPT);
 416                 break;
 417         case '.':
 418                 if (p->g->cflags&REG_NEWLINE)
 419                         nonnewline(p);
 420                 else
 421                         EMIT(OANY, 0);
 422                 break;
 423         case '[':
 424                 p_bracket(p);
 425                 break;
 426         case '\\':
 427                 (void)REQUIRE(MORE(), REG_EESCAPE);
 428                 c = GETNEXT();
 429                 ordinary(p, c);
 430                 break;
 431         case '{':               /* okay as ordinary except if digit follows */
 432                 (void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
 433                 /* FALLTHROUGH */
 434         default:
 435                 ordinary(p, c);
 436                 break;
 437         }
 438
 439         if (!MORE())
 440                 return;
 441         c = PEEK();
 442         /* we call { a repetition if followed by a digit */
 443         if (!( c == '*' || c == '+' || c == '?' ||
 444                                 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
 445                 return;         /* no repetition, we're done */
 446         NEXT();
 447
 448         (void)REQUIRE(!wascaret, REG_BADRPT);
 449         switch (c) {
 450         case '*':       /* implemented as +? */
 451                 /* this case does not require the (y|) trick, noKLUDGE */
 452                 INSERT(OPLUS_, pos);
 453                 ASTERN(O_PLUS, pos);
 454                 INSERT(OQUEST_, pos);
 455                 ASTERN(O_QUEST, pos);
 456                 break;
 457         case '+':
 458                 INSERT(OPLUS_, pos);
 459                 ASTERN(O_PLUS, pos);
 460                 break;
 461         case '?':
 462                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
 463                 INSERT(OCH_, pos);              /* offset slightly wrong */
 464                 ASTERN(OOR1, pos);              /* this one's right */
 465                 AHEAD(pos);                     /* fix the OCH_ */
 466                 EMIT(OOR2, 0);                  /* offset very wrong... */
 467                 AHEAD(THERE());                 /* ...so fix it */
 468                 ASTERN(O_CH, THERETHERE());
 469                 break;
 470         case '{':
 471                 count = p_count(p);
 472                 if (EAT(',')) {
 473                         if (isdigit((uch)PEEK())) {
 474                                 count2 = p_count(p);
 475                                 (void)REQUIRE(count <= count2, REG_BADBR);
 476                         } else          /* single number with comma */
 477                                 count2 = INFINITY;
 478                 } else          /* just a single number */
 479                         count2 = count;
 480                 repeat(p, pos, count, count2);
 481                 if (!EAT('}')) {        /* error heuristics */
 482                         while (MORE() && PEEK() != '}')
 483                                 NEXT();
 484                         (void)REQUIRE(MORE(), REG_EBRACE);
 485                         SETERROR(REG_BADBR);
 486                 }
 487                 break;
 488         }
 489
 490         if (!MORE())
 491                 return;
 492         c = PEEK();
 493         if (!( c == '*' || c == '+' || c == '?' ||
 494                                 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
 495                 return;
 496         SETERROR(REG_BADRPT);
 497 }
 498
 499 /*
 500  - p_str - string (no metacharacters) "parser"
 501  == static void p_str(struct parse *p);
 502  */
 503 static void
 504 p_str(p)
 505 struct parse *p;
 506 {
 507         (void)REQUIRE(MORE(), REG_EMPTY);
 508         while (MORE())
 509                 ordinary(p, GETNEXT());
 510 }
 511
 512 /*
 513  - p_bre - BRE parser top level, anchoring and concatenation
 514  == static void p_bre(struct parse *p, int end1, \
 515  ==     int end2);
 516  * Giving end1 as OUT essentially eliminates the end1/end2 check.
 517  *
 518  * This implementation is a bit of a kludge, in that a trailing $ is first
 519  * taken as an ordinary character and then revised to be an anchor.  The
 520  * only undesirable side effect is that '$' gets included as a character
 521  * category in such cases.  This is fairly harmless; not worth fixing.
 522  * The amount of lookahead needed to avoid this kludge is excessive.
 523  */
 524 static void
 525 p_bre(p, end1, end2)
 526 struct parse *p;
 527 int end1;                       /* first terminating character */
 528 int end2;                       /* second terminating character */
 529 {
 530         sopno start = HERE();
 531         int first = 1;                  /* first subexpression? */
 532         int wasdollar = 0;
 533
 534         if (EAT('^')) {
 535                 EMIT(OBOL, 0);
 536                 p->g->iflags |= USEBOL;
 537                 p->g->nbol++;
 538         }
 539         while (MORE() && !SEETWO(end1, end2)) {
 540                 wasdollar = p_simp_re(p, first);
 541                 first = 0;
 542         }
 543         if (wasdollar) {        /* oops, that was a trailing anchor */
 544                 DROP(1);
 545                 EMIT(OEOL, 0);
 546                 p->g->iflags |= USEEOL;
 547                 p->g->neol++;
 548         }
 549
 550         (void)REQUIRE(HERE() != start, REG_EMPTY);      /* require nonempty */
 551 }
 552
 553 /*
 554  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
 555  == static int p_simp_re(struct parse *p, int starordinary);
 556  */
 557 static int                      /* was the simple RE an unbackslashed $? */
 558 p_simp_re(p, starordinary)
 559 struct parse *p;
 560 int starordinary;               /* is a leading * an ordinary character? */
 561 {
 562         int c;
 563         int count;
 564         int count2;
 565         sopno pos;
 566         int i;
 567         sopno subno;
 568 #       define  BACKSL  (1<<CHAR_BIT)
 569
 570         pos = HERE();           /* repetion op, if any, covers from here */
 571
 572         assert(MORE());         /* caller should have ensured this */
 573         c = GETNEXT();
 574         if (c == '\\') {
 575                 (void)REQUIRE(MORE(), REG_EESCAPE);
 576                 c = BACKSL | GETNEXT();
 577         }
 578         switch (c) {
 579         case '.':
 580                 if (p->g->cflags&REG_NEWLINE)
 581                         nonnewline(p);
 582                 else
 583                         EMIT(OANY, 0);
 584                 break;
 585         case '[':
 586                 p_bracket(p);
 587                 break;
 588         case BACKSL|'{':
 589                 SETERROR(REG_BADRPT);
 590                 break;
 591         case BACKSL|'(':
 592                 p->g->nsub++;
 593                 subno = p->g->nsub;
 594                 if (subno < NPAREN)
 595                         p->pbegin[subno] = HERE();
 596                 EMIT(OLPAREN, subno);
 597                 /* the MORE here is an error heuristic */
 598                 if (MORE() && !SEETWO('\\', ')'))
 599                         p_bre(p, '\\', ')');
 600                 if (subno < NPAREN) {
 601                         p->pend[subno] = HERE();
 602                         assert(p->pend[subno] != 0);
 603                 }
 604                 EMIT(ORPAREN, subno);
 605                 (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
 606                 break;
 607         case BACKSL|')':        /* should not get here -- must be user */
 608         case BACKSL|'}':
 609                 SETERROR(REG_EPAREN);
 610                 break;
 611         case BACKSL|'1':
 612         case BACKSL|'2':
 613         case BACKSL|'3':
 614         case BACKSL|'4':
 615         case BACKSL|'5':
 616         case BACKSL|'6':
 617         case BACKSL|'7':
 618         case BACKSL|'8':
 619         case BACKSL|'9':
 620                 i = (c&~BACKSL) - '0';
 621                 assert(i < NPAREN);
 622                 if (p->pend[i] != 0) {
 623                         assert(i <= p->g->nsub);
 624                         EMIT(OBACK_, i);
 625                         assert(p->pbegin[i] != 0);
 626                         assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
 627                         assert(OP(p->strip[p->pend[i]]) == ORPAREN);
 628                         (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
 629                         EMIT(O_BACK, i);
 630                 } else
 631                         SETERROR(REG_ESUBREG);
 632                 p->g->backrefs = 1;
 633                 break;
 634         case '*':
 635                 (void)REQUIRE(starordinary, REG_BADRPT);
 636                 /* FALLTHROUGH */
 637         default:
 638                 ordinary(p, (char)c);
 639                 break;
 640         }
 641
 642         if (EAT('*')) {         /* implemented as +? */
 643                 /* this case does not require the (y|) trick, noKLUDGE */
 644                 INSERT(OPLUS_, pos);
 645                 ASTERN(O_PLUS, pos);
 646                 INSERT(OQUEST_, pos);
 647                 ASTERN(O_QUEST, pos);
 648         } else if (EATTWO('\\', '{')) {
 649                 count = p_count(p);
 650                 if (EAT(',')) {
 651                         if (MORE() && isdigit((uch)PEEK())) {
 652                                 count2 = p_count(p);
 653                                 (void)REQUIRE(count <= count2, REG_BADBR);
 654                         } else          /* single number with comma */
 655                                 count2 = INFINITY;
 656                 } else          /* just a single number */
 657                         count2 = count;
 658                 repeat(p, pos, count, count2);
 659                 if (!EATTWO('\\', '}')) {       /* error heuristics */
 660                         while (MORE() && !SEETWO('\\', '}'))
 661                                 NEXT();
 662                         (void)REQUIRE(MORE(), REG_EBRACE);
 663                         SETERROR(REG_BADBR);
 664                 }
 665         } else if (c == '$')     /* $ (but not \$) ends it */
 666                 return(1);
 667
 668         return(0);
 669 }
 670
 671 /*
 672  - p_count - parse a repetition count
 673  == static int p_count(struct parse *p);
 674  */
 675 static int                      /* the value */
 676 p_count(p)
 677 struct parse *p;
 678 {
 679         int count = 0;
 680         int ndigits = 0;
 681
 682         while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
 683                 count = count*10 + (GETNEXT() - '0');
 684                 ndigits++;
 685         }
 686
 687         (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
 688         return(count);
 689 }
 690
 691 /*
 692  - p_bracket - parse a bracketed character list
 693  == static void p_bracket(struct parse *p);
 694  *
 695  * Note a significant property of this code:  if the allocset() did SETERROR,
 696  * no set operations are done.
 697  */
 698 static void
 699 p_bracket(p)
 700 struct parse *p;
 701 {
 702         cset *cs = allocset(p);
 703         int invert = 0;
 704
 705         /* Dept of Truly Sickening Special-Case Kludges */
 706         if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
 707                 EMIT(OBOW, 0);
 708                 NEXTn(6);
 709                 return;
 710         }
 711         if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
 712                 EMIT(OEOW, 0);
 713                 NEXTn(6);
 714                 return;
 715         }
 716
 717         if (EAT('^'))
 718                 invert++;       /* make note to invert set at end */
 719         if (EAT(']'))
 720                 CHadd(cs, ']');
 721         else if (EAT('-'))
 722                 CHadd(cs, '-');
 723         while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
 724                 p_b_term(p, cs);
 725         if (EAT('-'))
 726                 CHadd(cs, '-');
 727         (void)MUSTEAT(']', REG_EBRACK);
 728
 729         if (p->error != 0)      /* don't mess things up further */
 730                 return;
 731
 732         if (p->g->cflags&REG_ICASE) {
 733                 int i;
 734                 int ci;
 735
 736                 for (i = p->g->csetsize - 1; i >= 0; i--)
 737                         if (CHIN(cs, i) && isalpha(i)) {
 738                                 ci = othercase(i);
 739                                 if (ci != i)
 740                                         CHadd(cs, ci);
 741                         }
 742                 if (cs->multis != NULL)
 743                         mccase(p, cs);
 744         }
 745         if (invert) {
 746                 int i;
 747
 748                 for (i = p->g->csetsize - 1; i >= 0; i--)
 749                         if (CHIN(cs, i))
 750                                 CHsub(cs, i);
 751                         else
 752                                 CHadd(cs, i);
 753                 if (p->g->cflags&REG_NEWLINE)
 754                         CHsub(cs, '\n');
 755                 if (cs->multis != NULL)
 756                         mcinvert(p, cs);
 757         }
 758
 759         assert(cs->multis == NULL);             /* xxx */
 760
 761         if (nch(p, cs) == 1) {          /* optimize singleton sets */
 762                 ordinary(p, firstch(p, cs));
 763                 freeset(p, cs);
 764         } else
 765                 EMIT(OANYOF, freezeset(p, cs));
 766 }
 767
 768 /*
 769  - p_b_term - parse one term of a bracketed character list
 770  == static void p_b_term(struct parse *p, cset *cs);
 771  */
 772 static void
 773 p_b_term(p, cs)
 774 struct parse *p;
 775 cset *cs;
 776 {
 777         char c;
 778         char start, finish;
 779         int i;
 780
 781         /* classify what we've got */
 782         switch ((MORE()) ? PEEK() : '\0') {
 783         case '[':
 784                 c = (MORE2()) ? PEEK2() : '\0';
 785                 break;
 786         case '-':
 787                 SETERROR(REG_ERANGE);
 788                 return;                 /* NOTE RETURN */
 789                 break;
 790         default:
 791                 c = '\0';
 792                 break;
 793         }
 794
 795         switch (c) {
 796         case ':':               /* character class */
 797                 NEXT2();
 798                 (void)REQUIRE(MORE(), REG_EBRACK);
 799                 c = PEEK();
 800                 (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE);
 801                 p_b_cclass(p, cs);
 802                 (void)REQUIRE(MORE(), REG_EBRACK);
 803                 (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
 804                 break;
 805         case '=':               /* equivalence class */
 806                 NEXT2();
 807                 (void)REQUIRE(MORE(), REG_EBRACK);
 808                 c = PEEK();
 809                 (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
 810                 p_b_eclass(p, cs);
 811                 (void)REQUIRE(MORE(), REG_EBRACK);
 812                 (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
 813                 break;
 814         default:                /* symbol, ordinary character, or range */
 815 /* xxx revision needed for multichar stuff */
 816                 start = p_b_symbol(p);
 817                 if (SEE('-') && MORE2() && PEEK2() != ']') {
 818                         /* range */
 819                         NEXT();
 820                         if (EAT('-'))
 821                                 finish = '-';
 822                         else
 823                                 finish = p_b_symbol(p);
 824                 } else
 825                         finish = start;
 826                 if (start == finish)
 827                         CHadd(cs, start);
 828                 else {
 829                         if (__collate_load_error) {
 830                                 (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
 831                                 for (i = (uch)start; i <= (uch)finish; i++)
 832                                         CHadd(cs, i);
 833                         } else {
 834                                 (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
 835                                 for (i = CHAR_MIN; i <= CHAR_MAX; i++) {
 836                                         if (   __collate_range_cmp(start, i) <= 0
 837                                             && __collate_range_cmp(i, finish) <= 0
 838                                            )
 839                                                 CHadd(cs, i);
 840                                 }
 841                         }
 842                 }
 843                 break;
 844         }
 845 }
 846
 847 /*
 848  - p_b_cclass - parse a character-class name and deal with it
 849  == static void p_b_cclass(struct parse *p, cset *cs);
 850  */
 851 static void
 852 p_b_cclass(p, cs)
 853 struct parse *p;
 854 cset *cs;
 855 {
 856         int c;
 857         char *sp = p->next;
 858         struct cclass *cp;
 859         size_t len;
 860
 861         while (MORE() && isalpha((uch)PEEK()))
 862                 NEXT();
 863         len = p->next - sp;
 864         for (cp = cclasses; cp->name != NULL; cp++)
 865                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
 866                         break;
 867         if (cp->name == NULL) {
 868                 /* oops, didn't find it */
 869                 SETERROR(REG_ECTYPE);
 870                 return;
 871         }
 872
 873         switch (cp->fidx) {
 874         case CALNUM:
 875                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 876                         if (isalnum((uch)c))
 877                                 CHadd(cs, c);
 878                 break;
 879         case CALPHA:
 880                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 881                         if (isalpha((uch)c))
 882                                 CHadd(cs, c);
 883                 break;
 884         case CBLANK:
 885                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 886                         if (isblank((uch)c))
 887                                 CHadd(cs, c);
 888                 break;
 889         case CCNTRL:
 890                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 891                         if (iscntrl((uch)c))
 892                                 CHadd(cs, c);
 893                 break;
 894         case CDIGIT:
 895                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 896                         if (isdigit((uch)c))
 897                                 CHadd(cs, c);
 898                 break;
 899         case CGRAPH:
 900                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 901                         if (isgraph((uch)c))
 902                                 CHadd(cs, c);
 903                 break;
 904         case CLOWER:
 905                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 906                         if (islower((uch)c))
 907                                 CHadd(cs, c);
 908                 break;
 909         case CPRINT:
 910                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 911                         if (isprint((uch)c))
 912                                 CHadd(cs, c);
 913                 break;
 914         case CPUNCT:
 915                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 916                         if (ispunct((uch)c))
 917                                 CHadd(cs, c);
 918                 break;
 919         case CSPACE:
 920                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 921                         if (isspace((uch)c))
 922                                 CHadd(cs, c);
 923                 break;
 924         case CUPPER:
 925                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 926                         if (isupper((uch)c))
 927                                 CHadd(cs, c);
 928                 break;
 929         case CXDIGIT:
 930                 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 931                         if (isxdigit((uch)c))
 932                                 CHadd(cs, c);
 933                 break;
 934         }
 935 #if 0
 936         for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
 937                 MCadd(p, cs, u);
 938 #endif
 939 }
 940
 941 /*
 942  - p_b_eclass - parse an equivalence-class name and deal with it
 943  == static void p_b_eclass(struct parse *p, cset *cs);
 944  *
 945  * This implementation is incomplete. xxx
 946  */
 947 static void
 948 p_b_eclass(p, cs)
 949 struct parse *p;
 950 cset *cs;
 951 {
 952         char c;
 953
 954         c = p_b_coll_elem(p, '=');
 955         CHadd(cs, c);
 956 }
 957
 958 /*
 959  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
 960  == static char p_b_symbol(struct parse *p);
 961  */
 962 static char                     /* value of symbol */
 963 p_b_symbol(p)
 964 struct parse *p;
 965 {
 966         char value;
 967
 968         (void)REQUIRE(MORE(), REG_EBRACK);
 969         if (!EATTWO('[', '.'))
 970                 return(GETNEXT());
 971
 972         /* collating symbol */
 973         value = p_b_coll_elem(p, '.');
 974         (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
 975         return(value);
 976 }
 977
 978 /*
 979  - p_b_coll_elem - parse a collating-element name and look it up
 980  == static char p_b_coll_elem(struct parse *p, int endc);
 981  */
 982 static char                     /* value of collating element */
 983 p_b_coll_elem(p, endc)
 984 struct parse *p;
 985 int endc;                       /* name ended by endc,']' */
 986 {
 987         char *sp = p->next;
 988         struct cname *cp;
 989         int len;
 990
 991         while (MORE() && !SEETWO(endc, ']'))
 992                 NEXT();
 993         if (!MORE()) {
 994                 SETERROR(REG_EBRACK);
 995                 return(0);
 996         }
 997         len = p->next - sp;
 998         for (cp = cnames; cp->name != NULL; cp++)
 999                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
1000                         return(cp->code);       /* known name */
1001         if (len == 1)
1002                 return(*sp);    /* single character */
1003         SETERROR(REG_ECOLLATE);                 /* neither */
1004         return(0);
1005 }
1006
1007 /*
1008  - othercase - return the case counterpart of an alphabetic
1009  == static char othercase(int ch);
1010  */
1011 static char                     /* if no counterpart, return ch */
1012 othercase(ch)
1013 int ch;
1014 {
1015         ch = (uch)ch;
1016         assert(isalpha(ch));
1017         if (isupper(ch))
1018                 return(tolower(ch));
1019         else if (islower(ch))
1020                 return(toupper(ch));
1021         else                    /* peculiar, but could happen */
1022                 return(ch);
1023 }
1024
1025 /*
1026  - bothcases - emit a dualcase version of a two-case character
1027  == static void bothcases(struct parse *p, int ch);
1028  *
1029  * Boy, is this implementation ever a kludge...
1030  */
1031 static void
1032 bothcases(p, ch)
1033 struct parse *p;
1034 int ch;
1035 {
1036         char *oldnext = p->next;
1037         char *oldend = p->end;
1038         char bracket[3];
1039
1040         ch = (uch)ch;
1041         assert(othercase(ch) != ch);    /* p_bracket() would recurse */
1042         p->next = bracket;
1043         p->end = bracket+2;
1044         bracket[0] = ch;
1045         bracket[1] = ']';
1046         bracket[2] = '\0';
1047         p_bracket(p);
1048         assert(p->next == bracket+2);
1049         p->next = oldnext;
1050         p->end = oldend;
1051 }
1052
1053 /*
1054  - ordinary - emit an ordinary character
1055  == static void ordinary(struct parse *p, int ch);
1056  */
1057 static void
1058 ordinary(p, ch)
1059 struct parse *p;
1060 int ch;
1061 {
1062         cat_t *cap = p->g->categories;
1063
1064         if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
1065                 bothcases(p, ch);
1066         else {
1067                 EMIT(OCHAR, (uch)ch);
1068                 if (cap[ch] == 0)
1069                         cap[ch] = p->g->ncategories++;
1070         }
1071 }
1072
1073 /*
1074  - nonnewline - emit REG_NEWLINE version of OANY
1075  == static void nonnewline(struct parse *p);
1076  *
1077  * Boy, is this implementation ever a kludge...
1078  */
1079 static void
1080 nonnewline(p)
1081 struct parse *p;
1082 {
1083         char *oldnext = p->next;
1084         char *oldend = p->end;
1085         char bracket[4];
1086
1087         p->next = bracket;
1088         p->end = bracket+3;
1089         bracket[0] = '^';
1090         bracket[1] = '\n';
1091         bracket[2] = ']';
1092         bracket[3] = '\0';
1093         p_bracket(p);
1094         assert(p->next == bracket+3);
1095         p->next = oldnext;
1096         p->end = oldend;
1097 }
1098
1099 /*
1100  - repeat - generate code for a bounded repetition, recursively if needed
1101  == static void repeat(struct parse *p, sopno start, int from, int to);
1102  */
1103 static void
1104 repeat(p, start, from, to)
1105 struct parse *p;
1106 sopno start;                    /* operand from here to end of strip */
1107 int from;                       /* repeated from this number */
1108 int to;                         /* to this number of times (maybe INFINITY) */
1109 {
1110         sopno finish = HERE();
1111 #       define  N       2
1112 #       define  INF     3
1113 #       define  REP(f, t)       ((f)*8 + (t))
1114 #       define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1115         sopno copy;
1116
1117         if (p->error != 0)      /* head off possible runaway recursion */
1118                 return;
1119
1120         assert(from <= to);
1121
1122         switch (REP(MAP(from), MAP(to))) {
1123         case REP(0, 0):                 /* must be user doing this */
1124                 DROP(finish-start);     /* drop the operand */
1125                 break;
1126         case REP(0, 1):                 /* as x{1,1}? */
1127         case REP(0, N):                 /* as x{1,n}? */
1128         case REP(0, INF):               /* as x{1,}? */
1129                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1130                 INSERT(OCH_, start);            /* offset is wrong... */
1131                 repeat(p, start+1, 1, to);
1132                 ASTERN(OOR1, start);
1133                 AHEAD(start);                   /* ... fix it */
1134                 EMIT(OOR2, 0);
1135                 AHEAD(THERE());
1136                 ASTERN(O_CH, THERETHERE());
1137                 break;
1138         case REP(1, 1):                 /* trivial case */
1139                 /* done */
1140                 break;
1141         case REP(1, N):                 /* as x?x{1,n-1} */
1142                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1143                 INSERT(OCH_, start);
1144                 ASTERN(OOR1, start);
1145                 AHEAD(start);
1146                 EMIT(OOR2, 0);                  /* offset very wrong... */
1147                 AHEAD(THERE());                 /* ...so fix it */
1148                 ASTERN(O_CH, THERETHERE());
1149                 copy = dupl(p, start+1, finish+1);
1150                 assert(copy == finish+4);
1151                 repeat(p, copy, 1, to-1);
1152                 break;
1153         case REP(1, INF):               /* as x+ */
1154                 INSERT(OPLUS_, start);
1155                 ASTERN(O_PLUS, start);
1156                 break;
1157         case REP(N, N):                 /* as xx{m-1,n-1} */
1158                 copy = dupl(p, start, finish);
1159                 repeat(p, copy, from-1, to-1);
1160                 break;
1161         case REP(N, INF):               /* as xx{n-1,INF} */
1162                 copy = dupl(p, start, finish);
1163                 repeat(p, copy, from-1, to);
1164                 break;
1165         default:                        /* "can't happen" */
1166                 SETERROR(REG_ASSERT);   /* just in case */
1167                 break;
1168         }
1169 }
1170
1171 /*
1172  - seterr - set an error condition
1173  == static int seterr(struct parse *p, int e);
1174  */
1175 static int                      /* useless but makes type checking happy */
1176 seterr(p, e)
1177 struct parse *p;
1178 int e;
1179 {
1180         if (p->error == 0)      /* keep earliest error condition */
1181                 p->error = e;
1182         p->next = nuls;         /* try to bring things to a halt */
1183         p->end = nuls;
1184         return(0);              /* make the return value well-defined */
1185 }
1186
1187 /*
1188  - allocset - allocate a set of characters for []
1189  == static cset *allocset(struct parse *p);
1190  */
1191 static cset *
1192 allocset(p)
1193 struct parse *p;
1194 {
1195         int no = p->g->ncsets++;
1196         size_t nc;
1197         size_t nbytes;
1198         cset *cs;
1199         size_t css = (size_t)p->g->csetsize;
1200         int i;
1201
1202         if (no >= p->ncsalloc) {        /* need another column of space */
1203                 p->ncsalloc += CHAR_BIT;
1204                 nc = p->ncsalloc;
1205                 assert(nc % CHAR_BIT == 0);
1206                 nbytes = nc / CHAR_BIT * css;
1207                 if (p->g->sets == NULL)
1208                         p->g->sets = (cset *)malloc(nc * sizeof(cset));
1209                 else
1210                         p->g->sets = (cset *)reallocf((char *)p->g->sets,
1211                                                         nc * sizeof(cset));
1212                 if (p->g->setbits == NULL)
1213                         p->g->setbits = (uch *)malloc(nbytes);
1214                 else {
1215                         p->g->setbits = (uch *)reallocf((char *)p->g->setbits,
1216                                                                 nbytes);
1217                         /* xxx this isn't right if setbits is now NULL */
1218                         for (i = 0; i < no; i++)
1219                                 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1220                 }
1221                 if (p->g->sets != NULL && p->g->setbits != NULL)
1222                         (void) memset((char *)p->g->setbits + (nbytes - css),
1223                                                                 0, css);
1224                 else {
1225                         no = 0;
1226                         SETERROR(REG_ESPACE);
1227                         /* caller's responsibility not to do set ops */
1228                 }
1229         }
1230
1231         assert(p->g->sets != NULL);     /* xxx */
1232         cs = &p->g->sets[no];
1233         cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1234         cs->mask = 1 << ((no) % CHAR_BIT);
1235         cs->hash = 0;
1236         cs->smultis = 0;
1237         cs->multis = NULL;
1238
1239         return(cs);
1240 }
1241
1242 /*
1243  - freeset - free a now-unused set
1244  == static void freeset(struct parse *p, cset *cs);
1245  */
1246 static void
1247 freeset(p, cs)
1248 struct parse *p;
1249 cset *cs;
1250 {
1251         int i;
1252         cset *top = &p->g->sets[p->g->ncsets];
1253         size_t css = (size_t)p->g->csetsize;
1254
1255         for (i = 0; i < css; i++)
1256                 CHsub(cs, i);
1257         if (cs == top-1)        /* recover only the easy case */
1258                 p->g->ncsets--;
1259 }
1260
1261 /*
1262  - freezeset - final processing on a set of characters
1263  == static int freezeset(struct parse *p, cset *cs);
1264  *
1265  * The main task here is merging identical sets.  This is usually a waste
1266  * of time (although the hash code minimizes the overhead), but can win
1267  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
1268  * is done using addition rather than xor -- all ASCII [aA] sets xor to
1269  * the same value!
1270  */
1271 static int                      /* set number */
1272 freezeset(p, cs)
1273 struct parse *p;
1274 cset *cs;
1275 {
1276         short h = cs->hash;
1277         int i;
1278         cset *top = &p->g->sets[p->g->ncsets];
1279         cset *cs2;
1280         size_t css = (size_t)p->g->csetsize;
1281
1282         /* look for an earlier one which is the same */
1283         for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1284                 if (cs2->hash == h && cs2 != cs) {
1285                         /* maybe */
1286                         for (i = 0; i < css; i++)
1287                                 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1288                                         break;          /* no */
1289                         if (i == css)
1290                                 break;                  /* yes */
1291                 }
1292
1293         if (cs2 < top) {        /* found one */
1294                 freeset(p, cs);
1295                 cs = cs2;
1296         }
1297
1298         return((int)(cs - p->g->sets));
1299 }
1300
1301 /*
1302  - firstch - return first character in a set (which must have at least one)
1303  == static int firstch(struct parse *p, cset *cs);
1304  */
1305 static int                      /* character; there is no "none" value */
1306 firstch(p, cs)
1307 struct parse *p;
1308 cset *cs;
1309 {
1310         int i;
1311         size_t css = (size_t)p->g->csetsize;
1312
1313         for (i = 0; i < css; i++)
1314                 if (CHIN(cs, i))
1315                         return((char)i);
1316         assert(never);
1317         return(0);              /* arbitrary */
1318 }
1319
1320 /*
1321  - nch - number of characters in a set
1322  == static int nch(struct parse *p, cset *cs);
1323  */
1324 static int
1325 nch(p, cs)
1326 struct parse *p;
1327 cset *cs;
1328 {
1329         int i;
1330         size_t css = (size_t)p->g->csetsize;
1331         int n = 0;
1332
1333         for (i = 0; i < css; i++)
1334                 if (CHIN(cs, i))
1335                         n++;
1336         return(n);
1337 }
1338
1339 /*
1340  - mcadd - add a collating element to a cset
1341  == static void mcadd(struct parse *p, cset *cs, \
1342  ==     char *cp);
1343  */
1344 static void
1345 mcadd(p, cs, cp)
1346 struct parse *p;
1347 cset *cs;
1348 char *cp;
1349 {
1350         size_t oldend = cs->smultis;
1351
1352         cs->smultis += strlen(cp) + 1;
1353         if (cs->multis == NULL)
1354                 cs->multis = malloc(cs->smultis);
1355         else
1356                 cs->multis = reallocf(cs->multis, cs->smultis);
1357         if (cs->multis == NULL) {
1358                 SETERROR(REG_ESPACE);
1359                 return;
1360         }
1361
1362         (void) strcpy(cs->multis + oldend - 1, cp);
1363         cs->multis[cs->smultis - 1] = '\0';
1364 }
1365
1366 #if used
1367 /*
1368  - mcsub - subtract a collating element from a cset
1369  == static void mcsub(cset *cs, char *cp);
1370  */
1371 static void
1372 mcsub(cs, cp)
1373 cset *cs;
1374 char *cp;
1375 {
1376         char *fp = mcfind(cs, cp);
1377         size_t len = strlen(fp);
1378
1379         assert(fp != NULL);
1380         (void) memmove(fp, fp + len + 1,
1381                                 cs->smultis - (fp + len + 1 - cs->multis));
1382         cs->smultis -= len;
1383
1384         if (cs->smultis == 0) {
1385                 free(cs->multis);
1386                 cs->multis = NULL;
1387                 return;
1388         }
1389
1390         cs->multis = reallocf(cs->multis, cs->smultis);
1391         assert(cs->multis != NULL);
1392 }
1393
1394 /*
1395  - mcin - is a collating element in a cset?
1396  == static int mcin(cset *cs, char *cp);
1397  */
1398 static int
1399 mcin(cs, cp)
1400 cset *cs;
1401 char *cp;
1402 {
1403         return(mcfind(cs, cp) != NULL);
1404 }
1405
1406 /*
1407  - mcfind - find a collating element in a cset
1408  == static char *mcfind(cset *cs, char *cp);
1409  */
1410 static char *
1411 mcfind(cs, cp)
1412 cset *cs;
1413 char *cp;
1414 {
1415         char *p;
1416
1417         if (cs->multis == NULL)
1418                 return(NULL);
1419         for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1420                 if (strcmp(cp, p) == 0)
1421                         return(p);
1422         return(NULL);
1423 }
1424 #endif
1425
1426 /*
1427  - mcinvert - invert the list of collating elements in a cset
1428  == static void mcinvert(struct parse *p, cset *cs);
1429  *
1430  * This would have to know the set of possibilities.  Implementation
1431  * is deferred.
1432  */
1433 static void
1434 mcinvert(p, cs)
1435 struct parse *p;
1436 cset *cs;
1437 {
1438         assert(cs->multis == NULL);     /* xxx */
1439 }
1440
1441 /*
1442  - mccase - add case counterparts of the list of collating elements in a cset
1443  == static void mccase(struct parse *p, cset *cs);
1444  *
1445  * This would have to know the set of possibilities.  Implementation
1446  * is deferred.
1447  */
1448 static void
1449 mccase(p, cs)
1450 struct parse *p;
1451 cset *cs;
1452 {
1453         assert(cs->multis == NULL);     /* xxx */
1454 }
1455
1456 /*
1457  - isinsets - is this character in any sets?
1458  == static int isinsets(struct re_guts *g, int c);
1459  */
1460 static int                      /* predicate */
1461 isinsets(g, c)
1462 struct re_guts *g;
1463 int c;
1464 {
1465         uch *col;
1466         int i;
1467         int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1468         unsigned uc = (uch)c;
1469
1470         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1471                 if (col[uc] != 0)
1472                         return(1);
1473         return(0);
1474 }
1475
1476 /*
1477  - samesets - are these two characters in exactly the same sets?
1478  == static int samesets(struct re_guts *g, int c1, int c2);
1479  */
1480 static int                      /* predicate */
1481 samesets(g, c1, c2)
1482 struct re_guts *g;
1483 int c1;
1484 int c2;
1485 {
1486         uch *col;
1487         int i;
1488         int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1489         unsigned uc1 = (uch)c1;
1490         unsigned uc2 = (uch)c2;
1491
1492         for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1493                 if (col[uc1] != col[uc2])
1494                         return(0);
1495         return(1);
1496 }
1497
1498 /*
1499  - categorize - sort out character categories
1500  == static void categorize(struct parse *p, struct re_guts *g);
1501  */
1502 static void
1503 categorize(p, g)
1504 struct parse *p;
1505 struct re_guts *g;
1506 {
1507         cat_t *cats = g->categories;
1508         int c;
1509         int c2;
1510         cat_t cat;
1511
1512         /* avoid making error situations worse */
1513         if (p->error != 0)
1514                 return;
1515
1516         for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1517                 if (cats[c] == 0 && isinsets(g, c)) {
1518                         cat = g->ncategories++;
1519                         cats[c] = cat;
1520                         for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1521                                 if (cats[c2] == 0 && samesets(g, c, c2))
1522                                         cats[c2] = cat;
1523                 }
1524 }
1525
1526 /*
1527  - dupl - emit a duplicate of a bunch of sops
1528  == static sopno dupl(struct parse *p, sopno start, sopno finish);
1529  */
1530 static sopno                    /* start of duplicate */
1531 dupl(p, start, finish)
1532 struct parse *p;
1533 sopno start;                    /* from here */
1534 sopno finish;                   /* to this less one */
1535 {
1536         sopno ret = HERE();
1537         sopno len = finish - start;
1538
1539         assert(finish >= start);
1540         if (len == 0)
1541                 return(ret);
1542         enlarge(p, p->ssize + len);     /* this many unexpected additions */
1543         assert(p->ssize >= p->slen + len);
1544         (void) memcpy((char *)(p->strip + p->slen),
1545                 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1546         p->slen += len;
1547         return(ret);
1548 }
1549
1550 /*
1551  - doemit - emit a strip operator
1552  == static void doemit(struct parse *p, sop op, size_t opnd);
1553  *
1554  * It might seem better to implement this as a macro with a function as
1555  * hard-case backup, but it's just too big and messy unless there are
1556  * some changes to the data structures.  Maybe later.
1557  */
1558 static void
1559 doemit(p, op, opnd)
1560 struct parse *p;
1561 sop op;
1562 size_t opnd;
1563 {
1564         /* avoid making error situations worse */
1565         if (p->error != 0)
1566                 return;
1567
1568         /* deal with oversize operands ("can't happen", more or less) */
1569         assert(opnd < 1<<OPSHIFT);
1570
1571         /* deal with undersized strip */
1572         if (p->slen >= p->ssize)
1573                 enlarge(p, (p->ssize+1) / 2 * 3);       /* +50% */
1574         assert(p->slen < p->ssize);
1575
1576         /* finally, it's all reduced to the easy case */
1577         p->strip[p->slen++] = SOP(op, opnd);
1578 }
1579
1580 /*
1581  - doinsert - insert a sop into the strip
1582  == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
1583  */
1584 static void
1585 doinsert(p, op, opnd, pos)
1586 struct parse *p;
1587 sop op;
1588 size_t opnd;
1589 sopno pos;
1590 {
1591         sopno sn;
1592         sop s;
1593         int i;
1594
1595         /* avoid making error situations worse */
1596         if (p->error != 0)
1597                 return;
1598
1599         sn = HERE();
1600         EMIT(op, opnd);         /* do checks, ensure space */
1601         assert(HERE() == sn+1);
1602         s = p->strip[sn];
1603
1604         /* adjust paren pointers */
1605         assert(pos > 0);
1606         for (i = 1; i < NPAREN; i++) {
1607                 if (p->pbegin[i] >= pos) {
1608                         p->pbegin[i]++;
1609                 }
1610                 if (p->pend[i] >= pos) {
1611                         p->pend[i]++;
1612                 }
1613         }
1614
1615         memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1616                                                 (HERE()-pos-1)*sizeof(sop));
1617         p->strip[pos] = s;
1618 }
1619
1620 /*
1621  - dofwd - complete a forward reference
1622  == static void dofwd(struct parse *p, sopno pos, sop value);
1623  */
1624 static void
1625 dofwd(p, pos, value)
1626 struct parse *p;
1627 sopno pos;
1628 sop value;
1629 {
1630         /* avoid making error situations worse */
1631         if (p->error != 0)
1632                 return;
1633
1634         assert(value < 1<<OPSHIFT);
1635         p->strip[pos] = OP(p->strip[pos]) | value;
1636 }
1637
1638 /*
1639  - enlarge - enlarge the strip
1640  == static void enlarge(struct parse *p, sopno size);
1641  */
1642 static void
1643 enlarge(p, size)
1644 struct parse *p;
1645 sopno size;
1646 {
1647         sop *sp;
1648
1649         if (p->ssize >= size)
1650                 return;
1651
1652         sp = (sop *)realloc(p->strip, size*sizeof(sop));
1653         if (sp == NULL) {
1654                 SETERROR(REG_ESPACE);
1655                 return;
1656         }
1657         p->strip = sp;
1658         p->ssize = size;
1659 }
1660
1661 /*
1662  - stripsnug - compact the strip
1663  == static void stripsnug(struct parse *p, struct re_guts *g);
1664  */
1665 static void
1666 stripsnug(p, g)
1667 struct parse *p;
1668 struct re_guts *g;
1669 {
1670         g->nstates = p->slen;
1671         g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1672         if (g->strip == NULL) {
1673                 SETERROR(REG_ESPACE);
1674                 g->strip = p->strip;
1675         }
1676 }
1677
1678 /*
1679  - findmust - fill in must and mlen with longest mandatory literal string
1680  == static void findmust(struct parse *p, struct re_guts *g);
1681  *
1682  * This algorithm could do fancy things like analyzing the operands of |
1683  * for common subsequences.  Someday.  This code is simple and finds most
1684  * of the interesting cases.
1685  *
1686  * Note that must and mlen got initialized during setup.
1687  */
1688 static void
1689 findmust(p, g)
1690 struct parse *p;
1691 struct re_guts *g;
1692 {
1693         sop *scan;
1694         sop *start;
1695         sop *newstart;
1696         sopno newlen;
1697         sop s;
1698         char *cp;
1699         sopno i;
1700         int offset;
1701         int cs, mccs;
1702
1703         /* avoid making error situations worse */
1704         if (p->error != 0)
1705                 return;
1706
1707         /* Find out if we can handle OANYOF or not */
1708         mccs = 0;
1709         for (cs = 0; cs < g->ncsets; cs++)
1710                 if (g->sets[cs].multis != NULL)
1711                         mccs = 1;
1712
1713         /* find the longest OCHAR sequence in strip */
1714         newlen = 0;
1715         offset = 0;
1716         g->moffset = 0;
1717         scan = g->strip + 1;
1718         do {
1719                 s = *scan++;
1720                 switch (OP(s)) {
1721                 case OCHAR:             /* sequence member */
1722                         if (newlen == 0)                /* new sequence */
1723                                 newstart = scan - 1;
1724                         newlen++;
1725                         break;
1726                 case OPLUS_:            /* things that don't break one */
1727                 case OLPAREN:
1728                 case ORPAREN:
1729                         break;
1730                 case OQUEST_:           /* things that must be skipped */
1731                 case OCH_:
1732                         offset = altoffset(scan, offset, mccs);
1733                         scan--;
1734                         do {
1735                                 scan += OPND(s);
1736                                 s = *scan;
1737                                 /* assert() interferes w debug printouts */
1738                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1739                                                         OP(s) != OOR2) {
1740                                         g->iflags |= BAD;
1741                                         return;
1742                                 }
1743                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
1744                         /* FALLTHROUGH */
1745                 case OBOW:              /* things that break a sequence */
1746                 case OEOW:
1747                 case OBOL:
1748                 case OEOL:
1749                 case O_QUEST:
1750                 case O_CH:
1751                 case OEND:
1752                         if (newlen > g->mlen) {         /* ends one */
1753                                 start = newstart;
1754                                 g->mlen = newlen;
1755                                 if (offset > -1) {
1756                                         g->moffset += offset;
1757                                         offset = newlen;
1758                                 } else
1759                                         g->moffset = offset;
1760                         } else {
1761                                 if (offset > -1)
1762                                         offset += newlen;
1763                         }
1764                         newlen = 0;
1765                         break;
1766                 case OANY:
1767                         if (newlen > g->mlen) {         /* ends one */
1768                                 start = newstart;
1769                                 g->mlen = newlen;
1770                                 if (offset > -1) {
1771                                         g->moffset += offset;
1772                                         offset = newlen;
1773                                 } else
1774                                         g->moffset = offset;
1775                         } else {
1776                                 if (offset > -1)
1777                                         offset += newlen;
1778                         }
1779                         if (offset > -1)
1780                                 offset++;
1781                         newlen = 0;
1782                         break;
1783                 case OANYOF:            /* may or may not invalidate offset */
1784                         /* First, everything as OANY */
1785                         if (newlen > g->mlen) {         /* ends one */
1786                                 start = newstart;
1787                                 g->mlen = newlen;
1788                                 if (offset > -1) {
1789                                         g->moffset += offset;
1790                                         offset = newlen;
1791                                 } else
1792                                         g->moffset = offset;
1793                         } else {
1794                                 if (offset > -1)
1795                                         offset += newlen;
1796                         }
1797                         if (offset > -1)
1798                                 offset++;
1799                         newlen = 0;
1800                         /* And, now, if we found out we can't deal with
1801                          * it, make offset = -1.
1802                          */
1803                         if (mccs)
1804                                 offset = -1;
1805                         break;
1806                 default:
1807                         /* Anything here makes it impossible or too hard
1808                          * to calculate the offset -- so we give up;
1809                          * save the last known good offset, in case the
1810                          * must sequence doesn't occur later.
1811                          */
1812                         if (newlen > g->mlen) {         /* ends one */
1813                                 start = newstart;
1814                                 g->mlen = newlen;
1815                                 if (offset > -1)
1816                                         g->moffset += offset;
1817                                 else
1818                                         g->moffset = offset;
1819                         }
1820                         offset = -1;
1821                         newlen = 0;
1822                         break;
1823                 }
1824         } while (OP(s) != OEND);
1825
1826         if (g->mlen == 0) {             /* there isn't one */
1827                 g->moffset = -1;
1828                 return;
1829         }
1830
1831         /* turn it into a character string */
1832         g->must = malloc((size_t)g->mlen + 1);
1833         if (g->must == NULL) {          /* argh; just forget it */
1834                 g->mlen = 0;
1835                 g->moffset = -1;
1836                 return;
1837         }
1838         cp = g->must;
1839         scan = start;
1840         for (i = g->mlen; i > 0; i--) {
1841                 while (OP(s = *scan++) != OCHAR)
1842                         continue;
1843                 assert(cp < g->must + g->mlen);
1844                 *cp++ = (char)OPND(s);
1845         }
1846         assert(cp == g->must + g->mlen);
1847         *cp++ = '\0';           /* just on general principles */
1848 }
1849
1850 /*
1851  - altoffset - choose biggest offset among multiple choices
1852  == static int altoffset(sop *scan, int offset, int mccs);
1853  *
1854  * Compute, recursively if necessary, the largest offset among multiple
1855  * re paths.
1856  */
1857 static int
1858 altoffset(scan, offset, mccs)
1859 sop *scan;
1860 int offset;
1861 int mccs;
1862 {
1863         int largest;
1864         int try;
1865         sop s;
1866
1867         /* If we gave up already on offsets, return */
1868         if (offset == -1)
1869                 return -1;
1870
1871         largest = 0;
1872         try = 0;
1873         s = *scan++;
1874         while (OP(s) != O_QUEST && OP(s) != O_CH) {
1875                 switch (OP(s)) {
1876                 case OOR1:
1877                         if (try > largest)
1878                                 largest = try;
1879                         try = 0;
1880                         break;
1881                 case OQUEST_:
1882                 case OCH_:
1883                         try = altoffset(scan, try, mccs);
1884                         if (try == -1)
1885                                 return -1;
1886                         scan--;
1887                         do {
1888                                 scan += OPND(s);
1889                                 s = *scan;
1890                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1891                                                         OP(s) != OOR2)
1892                                         return -1;
1893                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
1894                         /* We must skip to the next position, or we'll
1895                          * leave altoffset() too early.
1896                          */
1897                         scan++;
1898                         break;
1899                 case OANYOF:
1900                         if (mccs)
1901                                 return -1;
1902                 case OCHAR:
1903                 case OANY:
1904                         try++;
1905                 case OBOW:
1906                 case OEOW:
1907                 case OLPAREN:
1908                 case ORPAREN:
1909                 case OOR2:
1910                         break;
1911                 default:
1912                         try = -1;
1913                         break;
1914                 }
1915                 if (try == -1)
1916                         return -1;
1917                 s = *scan++;
1918         }
1919
1920         if (try > largest)
1921                 largest = try;
1922
1923         return largest+offset;
1924 }
1925
1926 /*
1927  - computejumps - compute char jumps for BM scan
1928  == static void computejumps(struct parse *p, struct re_guts *g);
1929  *
1930  * This algorithm assumes g->must exists and is has size greater than
1931  * zero. It's based on the algorithm found on Computer Algorithms by
1932  * Sara Baase.
1933  *
1934  * A char jump is the number of characters one needs to jump based on
1935  * the value of the character from the text that was mismatched.
1936  */
1937 static void
1938 computejumps(p, g)
1939 struct parse *p;
1940 struct re_guts *g;
1941 {
1942         int ch;
1943         int mindex;
1944
1945         /* Avoid making errors worse */
1946         if (p->error != 0)
1947                 return;
1948
1949         g->charjump = (int*) malloc((NC + 1) * sizeof(int));
1950         if (g->charjump == NULL)        /* Not a fatal error */
1951                 return;
1952         /* Adjust for signed chars, if necessary */
1953         g->charjump = &g->charjump[-(CHAR_MIN)];
1954
1955         /* If the character does not exist in the pattern, the jump
1956          * is equal to the number of characters in the pattern.
1957          */
1958         for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
1959                 g->charjump[ch] = g->mlen;
1960
1961         /* If the character does exist, compute the jump that would
1962          * take us to the last character in the pattern equal to it
1963          * (notice that we match right to left, so that last character
1964          * is the first one that would be matched).
1965          */
1966         for (mindex = 0; mindex < g->mlen; mindex++)
1967                 g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
1968 }
1969
1970 /*
1971  - computematchjumps - compute match jumps for BM scan
1972  == static void computematchjumps(struct parse *p, struct re_guts *g);
1973  *
1974  * This algorithm assumes g->must exists and is has size greater than
1975  * zero. It's based on the algorithm found on Computer Algorithms by
1976  * Sara Baase.
1977  *
1978  * A match jump is the number of characters one needs to advance based
1979  * on the already-matched suffix.
1980  * Notice that all values here are minus (g->mlen-1), because of the way
1981  * the search algorithm works.
1982  */
1983 static void
1984 computematchjumps(p, g)
1985 struct parse *p;
1986 struct re_guts *g;
1987 {
1988         int mindex;             /* General "must" iterator */
1989         int suffix;             /* Keeps track of matching suffix */
1990         int ssuffix;            /* Keeps track of suffixes' suffix */
1991         int* pmatches;          /* pmatches[k] points to the next i
1992                                  * such that i+1...mlen is a substring
1993                                  * of k+1...k+mlen-i-1
1994                                  */
1995
1996         /* Avoid making errors worse */
1997         if (p->error != 0)
1998                 return;
1999
2000         pmatches = (int*) malloc(g->mlen * sizeof(unsigned int));
2001         if (pmatches == NULL) {
2002                 g->matchjump = NULL;
2003                 return;
2004         }
2005
2006         g->matchjump = (int*) malloc(g->mlen * sizeof(unsigned int));
2007         if (g->matchjump == NULL)       /* Not a fatal error */
2008                 return;
2009
2010         /* Set maximum possible jump for each character in the pattern */
2011         for (mindex = 0; mindex < g->mlen; mindex++)
2012                 g->matchjump[mindex] = 2*g->mlen - mindex - 1;
2013
2014         /* Compute pmatches[] */
2015         for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
2016             mindex--, suffix--) {
2017                 pmatches[mindex] = suffix;
2018
2019                 /* If a mismatch is found, interrupting the substring,
2020                  * compute the matchjump for that position. If no
2021                  * mismatch is found, then a text substring mismatched
2022                  * against the suffix will also mismatch against the
2023                  * substring.
2024                  */
2025                 while (suffix < g->mlen
2026                     && g->must[mindex] != g->must[suffix]) {
2027                         g->matchjump[suffix] = MIN(g->matchjump[suffix],
2028                             g->mlen - mindex - 1);
2029                         suffix = pmatches[suffix];
2030                 }
2031         }
2032
2033         /* Compute the matchjump up to the last substring found to jump
2034          * to the beginning of the largest must pattern prefix matching
2035          * it's own suffix.
2036          */
2037         for (mindex = 0; mindex <= suffix; mindex++)
2038                 g->matchjump[mindex] = MIN(g->matchjump[mindex],
2039                     g->mlen + suffix - mindex);
2040
2041         ssuffix = pmatches[suffix];
2042         while (suffix < g->mlen) {
2043                 while (suffix <= ssuffix && suffix < g->mlen) {
2044                         g->matchjump[suffix] = MIN(g->matchjump[suffix],
2045                             g->mlen + ssuffix - suffix);
2046                         suffix++;
2047                 }
2048                 if (suffix < g->mlen)
2049                         ssuffix = pmatches[ssuffix];
2050         }
2051
2052         free(pmatches);
2053 }
2054
2055 /*
2056  - pluscount - count + nesting
2057  == static sopno pluscount(struct parse *p, struct re_guts *g);
2058  */
2059 static sopno                    /* nesting depth */
2060 pluscount(p, g)
2061 struct parse *p;
2062 struct re_guts *g;
2063 {
2064         sop *scan;
2065         sop s;
2066         sopno plusnest = 0;
2067         sopno maxnest = 0;
2068
2069         if (p->error != 0)
2070                 return(0);      /* there may not be an OEND */
2071
2072         scan = g->strip + 1;
2073         do {
2074                 s = *scan++;
2075                 switch (OP(s)) {
2076                 case OPLUS_:
2077                         plusnest++;
2078                         break;
2079                 case O_PLUS:
2080                         if (plusnest > maxnest)
2081                                 maxnest = plusnest;
2082                         plusnest--;
2083                         break;
2084                 }
2085         } while (OP(s) != OEND);
2086         if (plusnest != 0)
2087                 g->iflags |= BAD;
2088         return(maxnest);
2089 }