]>
git.saurik.com Git - apple/libc.git/blob - regex/FreeBSD/regcomp.c
   2  * Copyright (c) 1992, 1993, 1994 Henry Spencer. 
   3  * Copyright (c) 1992, 1993, 1994 
   4  *      The Regents of the University of California.  All rights reserved. 
   6  * This code is derived from software contributed to Berkeley by 
   9  * Redistribution and use in source and binary forms, with or without 
  10  * modification, are permitted provided that the following conditions 
  12  * 1. Redistributions of source code must retain the above copyright 
  13  *    notice, this list of conditions and the following disclaimer. 
  14  * 2. Redistributions in binary form must reproduce the above copyright 
  15  *    notice, this list of conditions and the following disclaimer in the 
  16  *    documentation and/or other materials provided with the distribution. 
  17  * 3. All advertising materials mentioning features or use of this software 
  18  *    must display the following acknowledgement: 
  19  *      This product includes software developed by the University of 
  20  *      California, Berkeley and its contributors. 
  21  * 4. Neither the name of the University nor the names of its contributors 
  22  *    may be used to endorse or promote products derived from this software 
  23  *    without specific prior written permission. 
  25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  37  *      @(#)regcomp.c   8.5 (Berkeley) 3/20/94 
  40 #if defined(LIBC_SCCS) && !defined(lint) 
  41 static char sccsid
[] = "@(#)regcomp.c   8.5 (Berkeley) 3/20/94"; 
  42 #endif /* LIBC_SCCS and not lint */ 
  43 #include <sys/cdefs.h> 
  44 __FBSDID("$FreeBSD: src/lib/libc/regex/regcomp.c,v 1.30 2003/02/16 17:29:10 nectar Exp $"); 
  46 #include <sys/types.h> 
  63  * parse structure, passed up and down to avoid global variables and 
  67         char *next
;             /* next character in RE */ 
  68         char *end
;              /* end of string (-> NUL normally) */ 
  69         int error
;              /* has an error been seen? */ 
  70         sop 
*strip
;             /* malloced strip */ 
  71         sopno ssize
;            /* malloced strip size (allocated) */ 
  72         sopno slen
;             /* malloced strip length (used) */ 
  73         int ncsalloc
;           /* number of csets allocated */ 
  75 #       define  NPAREN  10      /* we need to remember () 1-9 for back refs */ 
  76         sopno pbegin
[NPAREN
];   /* -> ( ([0] unused) */ 
  77         sopno pend
[NPAREN
];     /* -> ) ([0] unused) */ 
  80 /* ========= begin header generated by ./mkh ========= */ 
  85 /* === regcomp.c === */ 
  86 static void p_ere(struct parse 
*p
, int stop
); 
  87 static void p_ere_exp(struct parse 
*p
); 
  88 static void p_str(struct parse 
*p
); 
  89 static void p_bre(struct parse 
*p
, int end1
, int end2
); 
  90 static int p_simp_re(struct parse 
*p
, int starordinary
); 
  91 static int p_count(struct parse 
*p
); 
  92 static void p_bracket(struct parse 
*p
); 
  93 static void p_b_term(struct parse 
*p
, cset 
*cs
); 
  94 static void p_b_cclass(struct parse 
*p
, cset 
*cs
); 
  95 static void p_b_eclass(struct parse 
*p
, cset 
*cs
); 
  96 static char p_b_symbol(struct parse 
*p
); 
  97 static char p_b_coll_elem(struct parse 
*p
, int endc
); 
  98 static char othercase(int ch
); 
  99 static void bothcases(struct parse 
*p
, int ch
); 
 100 static void ordinary(struct parse 
*p
, int ch
); 
 101 static void nonnewline(struct parse 
*p
); 
 102 static void repeat(struct parse 
*p
, sopno start
, int from
, int to
); 
 103 static int seterr(struct parse 
*p
, int e
); 
 104 static cset 
*allocset(struct parse 
*p
); 
 105 static void freeset(struct parse 
*p
, cset 
*cs
); 
 106 static int freezeset(struct parse 
*p
, cset 
*cs
); 
 107 static int firstch(struct parse 
*p
, cset 
*cs
); 
 108 static int nch(struct parse 
*p
, cset 
*cs
); 
 109 static void mcadd(struct parse 
*p
, cset 
*cs
, char *cp
) __unused
; 
 111 static void mcsub(cset 
*cs
, char *cp
); 
 112 static int mcin(cset 
*cs
, char *cp
); 
 113 static char *mcfind(cset 
*cs
, char *cp
); 
 115 static void mcinvert(struct parse 
*p
, cset 
*cs
); 
 116 static void mccase(struct parse 
*p
, cset 
*cs
); 
 117 static int isinsets(struct re_guts 
*g
, int c
); 
 118 static int samesets(struct re_guts 
*g
, int c1
, int c2
); 
 119 static void categorize(struct parse 
*p
, struct re_guts 
*g
); 
 120 static sopno 
dupl(struct parse 
*p
, sopno start
, sopno finish
); 
 121 static void doemit(struct parse 
*p
, sop op
, size_t opnd
); 
 122 static void doinsert(struct parse 
*p
, sop op
, size_t opnd
, sopno pos
); 
 123 static void dofwd(struct parse 
*p
, sopno pos
, sop value
); 
 124 static void enlarge(struct parse 
*p
, sopno size
); 
 125 static void stripsnug(struct parse 
*p
, struct re_guts 
*g
); 
 126 static void findmust(struct parse 
*p
, struct re_guts 
*g
); 
 127 static int altoffset(sop 
*scan
, int offset
, int mccs
); 
 128 static void computejumps(struct parse 
*p
, struct re_guts 
*g
); 
 129 static void computematchjumps(struct parse 
*p
, struct re_guts 
*g
); 
 130 static sopno 
pluscount(struct parse 
*p
, struct re_guts 
*g
); 
 135 /* ========= end header generated by ./mkh ========= */ 
 137 static char nuls
[10];           /* place to point scanner in event of error */ 
 140  * macros for use with parse structure 
 141  * BEWARE:  these know that the parse structure is named `p' !!! 
 143 #define PEEK()  (*p->next) 
 144 #define PEEK2() (*(p->next+1)) 
 145 #define MORE()  (p->next < p->end) 
 146 #define MORE2() (p->next+1 < p->end) 
 147 #define SEE(c)  (MORE() && PEEK() == (c)) 
 148 #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 
 149 #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0) 
 150 #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 
 151 #define NEXT()  (p->next++) 
 152 #define NEXT2() (p->next += 2) 
 153 #define NEXTn(n)        (p->next += (n)) 
 154 #define GETNEXT()       (*p->next++) 
 155 #define SETERROR(e)     seterr(p, (e)) 
 156 #define REQUIRE(co, e)  ((co) || SETERROR(e)) 
 157 #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e)) 
 158 #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e)) 
 159 #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e)) 
 160 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 
 161 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 
 162 #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos)) 
 163 #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos) 
 164 #define HERE()          (p->slen) 
 165 #define THERE()         (p->slen - 1) 
 166 #define THERETHERE()    (p->slen - 2) 
 167 #define DROP(n) (p->slen -= (n)) 
 170 static int never 
= 0;           /* for use in asserts; shuts lint up */ 
 172 #define never   0               /* some <assert.h>s have bugs too */ 
 175 /* Macro used by computejump()/computematchjump() */ 
 176 #define MIN(a,b)        ((a)<(b)?(a):(b)) 
 179  - regcomp - interface for parser and compilation 
 180  = extern int regcomp(regex_t *, const char *, int); 
 181  = #define      REG_BASIC       0000 
 182  = #define      REG_EXTENDED    0001 
 183  = #define      REG_ICASE       0002 
 184  = #define      REG_NOSUB       0004 
 185  = #define      REG_NEWLINE     0010 
 186  = #define      REG_NOSPEC      0020 
 187  = #define      REG_PEND        0040 
 188  = #define      REG_DUMP        0200 
 190 int                             /* 0 success, otherwise REG_something */ 
 191 regcomp(preg
, pattern
, cflags
) 
 192 regex_t 
* __restrict preg
; 
 193 const char * __restrict pattern
; 
 198         struct parse 
*p 
= &pa
; 
 202 #       define  GOODFLAGS(f)    (f) 
 204 #       define  GOODFLAGS(f)    ((f)&~REG_DUMP) 
 207         cflags 
= GOODFLAGS(cflags
); 
 208         if ((cflags
®_EXTENDED
) && (cflags
®_NOSPEC
)) 
 211         if (cflags
®_PEND
) { 
 212                 if (preg
->re_endp 
< pattern
) 
 214                 len 
= preg
->re_endp 
- pattern
; 
 216                 len 
= strlen((char *)pattern
); 
 218         /* do the mallocs early so failure handling is easy */ 
 219         g 
= (struct re_guts 
*)malloc(sizeof(struct re_guts
) + 
 220                                                         (NC
-1)*sizeof(cat_t
)); 
 223         p
->ssize 
= len
/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 
 224         p
->strip 
= (sop 
*)malloc(p
->ssize 
* sizeof(sop
)); 
 226         if (p
->strip 
== NULL
) { 
 233         p
->next 
= (char *)pattern
;      /* convenience; we do not modify it */ 
 234         p
->end 
= p
->next 
+ len
; 
 237         for (i 
= 0; i 
< NPAREN
; i
++) { 
 255         g
->ncategories 
= 1;     /* category 0 is "everything else" */ 
 256         g
->categories 
= &g
->catspace
[-(CHAR_MIN
)]; 
 257         (void) memset((char *)g
->catspace
, 0, NC
*sizeof(cat_t
)); 
 262         g
->firststate 
= THERE(); 
 263         if (cflags
®_EXTENDED
) 
 265         else if (cflags
®_NOSPEC
) 
 270         g
->laststate 
= THERE(); 
 272         /* tidy up loose ends and fill things in */ 
 276         /* only use Boyer-Moore algorithm if the pattern is bigger 
 277          * than three characters 
 281                 computematchjumps(p
, g
); 
 282                 if(g
->matchjump 
== NULL 
&& g
->charjump 
!= NULL
) { 
 287         g
->nplus 
= pluscount(p
, g
); 
 289         preg
->re_nsub 
= g
->nsub
; 
 291         preg
->re_magic 
= MAGIC1
; 
 293         /* not debugging, so can't rely on the assert() in regexec() */ 
 295                 SETERROR(REG_ASSERT
); 
 298         /* win or lose, we're done */ 
 299         if (p
->error 
!= 0)      /* lose */ 
 305  - p_ere - ERE parser top level, concatenation and alternation 
 306  == static void p_ere(struct parse *p, int stop); 
 311 int stop
;                       /* character this ERE should end at */ 
 317         int first 
= 1;          /* is this the first alternative? */ 
 320                 /* do a bunch of concatenated expressions */ 
 322                 while (MORE() && (c 
= PEEK()) != '|' && c 
!= stop
) 
 324                 (void)REQUIRE(HERE() != conc
, REG_EMPTY
);       /* require nonempty */ 
 327                         break;          /* NOTE BREAK OUT */ 
 330                         INSERT(OCH_
, conc
);     /* offset is wrong */ 
 335                 ASTERN(OOR1
, prevback
); 
 337                 AHEAD(prevfwd
);                 /* fix previous offset */ 
 339                 EMIT(OOR2
, 0);                  /* offset is very wrong */ 
 342         if (!first
) {           /* tail-end fixups */ 
 344                 ASTERN(O_CH
, prevback
); 
 347         assert(!MORE() || SEE(stop
)); 
 351  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 
 352  == static void p_ere_exp(struct parse *p); 
 365         assert(MORE());         /* caller should have ensured this */ 
 371                 (void)REQUIRE(MORE(), REG_EPAREN
); 
 375                         p
->pbegin
[subno
] = HERE(); 
 376                 EMIT(OLPAREN
, subno
); 
 379                 if (subno 
< NPAREN
) { 
 380                         p
->pend
[subno
] = HERE(); 
 381                         assert(p
->pend
[subno
] != 0); 
 383                 EMIT(ORPAREN
, subno
); 
 384                 (void)MUSTEAT(')', REG_EPAREN
); 
 386 #ifndef POSIX_MISTAKE 
 387         case ')':               /* happens only if no current unmatched ( */ 
 389                  * You may ask, why the ifndef?  Because I didn't notice 
 390                  * this until slightly too late for 1003.2, and none of the 
 391                  * other 1003.2 regular-expression reviewers noticed it at 
 392                  * all.  So an unmatched ) is legal POSIX, at least until 
 393                  * we can get it fixed. 
 395                 SETERROR(REG_EPAREN
); 
 400                 p
->g
->iflags 
|= USEBOL
; 
 406                 p
->g
->iflags 
|= USEEOL
; 
 415                 SETERROR(REG_BADRPT
); 
 418                 if (p
->g
->cflags
®_NEWLINE
) 
 427                 (void)REQUIRE(MORE(), REG_EESCAPE
); 
 431         case '{':               /* okay as ordinary except if digit follows */ 
 432                 (void)REQUIRE(!MORE() || !isdigit((uch
)PEEK()), REG_BADRPT
); 
 442         /* we call { a repetition if followed by a digit */ 
 443         if (!( c 
== '*' || c 
== '+' || c 
== '?' || 
 444                                 (c 
== '{' && MORE2() && isdigit((uch
)PEEK2())) )) 
 445                 return;         /* no repetition, we're done */ 
 448         (void)REQUIRE(!wascaret
, REG_BADRPT
); 
 450         case '*':       /* implemented as +? */ 
 451                 /* this case does not require the (y|) trick, noKLUDGE */ 
 454                 INSERT(OQUEST_
, pos
); 
 455                 ASTERN(O_QUEST
, pos
); 
 462                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 
 463                 INSERT(OCH_
, pos
);              /* offset slightly wrong */ 
 464                 ASTERN(OOR1
, pos
);              /* this one's right */ 
 465                 AHEAD(pos
);                     /* fix the OCH_ */ 
 466                 EMIT(OOR2
, 0);                  /* offset very wrong... */ 
 467                 AHEAD(THERE());                 /* ...so fix it */ 
 468                 ASTERN(O_CH
, THERETHERE()); 
 473                         if (isdigit((uch
)PEEK())) { 
 475                                 (void)REQUIRE(count 
<= count2
, REG_BADBR
); 
 476                         } else          /* single number with comma */ 
 478                 } else          /* just a single number */ 
 480                 repeat(p
, pos
, count
, count2
); 
 481                 if (!EAT('}')) {        /* error heuristics */ 
 482                         while (MORE() && PEEK() != '}') 
 484                         (void)REQUIRE(MORE(), REG_EBRACE
); 
 493         if (!( c 
== '*' || c 
== '+' || c 
== '?' || 
 494                                 (c 
== '{' && MORE2() && isdigit((uch
)PEEK2())) ) ) 
 496         SETERROR(REG_BADRPT
); 
 500  - p_str - string (no metacharacters) "parser" 
 501  == static void p_str(struct parse *p); 
 507         (void)REQUIRE(MORE(), REG_EMPTY
); 
 509                 ordinary(p
, GETNEXT()); 
 513  - p_bre - BRE parser top level, anchoring and concatenation 
 514  == static void p_bre(struct parse *p, int end1, \ 
 516  * Giving end1 as OUT essentially eliminates the end1/end2 check. 
 518  * This implementation is a bit of a kludge, in that a trailing $ is first 
 519  * taken as an ordinary character and then revised to be an anchor.  The 
 520  * only undesirable side effect is that '$' gets included as a character 
 521  * category in such cases.  This is fairly harmless; not worth fixing. 
 522  * The amount of lookahead needed to avoid this kludge is excessive. 
 527 int end1
;                       /* first terminating character */ 
 528 int end2
;                       /* second terminating character */ 
 530         sopno start 
= HERE(); 
 531         int first 
= 1;                  /* first subexpression? */ 
 536                 p
->g
->iflags 
|= USEBOL
; 
 539         while (MORE() && !SEETWO(end1
, end2
)) { 
 540                 wasdollar 
= p_simp_re(p
, first
); 
 543         if (wasdollar
) {        /* oops, that was a trailing anchor */ 
 546                 p
->g
->iflags 
|= USEEOL
; 
 550         (void)REQUIRE(HERE() != start
, REG_EMPTY
);      /* require nonempty */ 
 554  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 
 555  == static int p_simp_re(struct parse *p, int starordinary); 
 557 static int                      /* was the simple RE an unbackslashed $? */ 
 558 p_simp_re(p
, starordinary
) 
 560 int starordinary
;               /* is a leading * an ordinary character? */ 
 568 #       define  BACKSL  (1<<CHAR_BIT) 
 570         pos 
= HERE();           /* repetion op, if any, covers from here */ 
 572         assert(MORE());         /* caller should have ensured this */ 
 575                 (void)REQUIRE(MORE(), REG_EESCAPE
); 
 576                 c 
= BACKSL 
| GETNEXT(); 
 580                 if (p
->g
->cflags
®_NEWLINE
) 
 589                 SETERROR(REG_BADRPT
); 
 595                         p
->pbegin
[subno
] = HERE(); 
 596                 EMIT(OLPAREN
, subno
); 
 597                 /* the MORE here is an error heuristic */ 
 598                 if (MORE() && !SEETWO('\\', ')')) 
 600                 if (subno 
< NPAREN
) { 
 601                         p
->pend
[subno
] = HERE(); 
 602                         assert(p
->pend
[subno
] != 0); 
 604                 EMIT(ORPAREN
, subno
); 
 605                 (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN
); 
 607         case BACKSL
|')':        /* should not get here -- must be user */ 
 609                 SETERROR(REG_EPAREN
); 
 620                 i 
= (c
&~BACKSL
) - '0'; 
 622                 if (p
->pend
[i
] != 0) { 
 623                         assert(i 
<= p
->g
->nsub
); 
 625                         assert(p
->pbegin
[i
] != 0); 
 626                         assert(OP(p
->strip
[p
->pbegin
[i
]]) == OLPAREN
); 
 627                         assert(OP(p
->strip
[p
->pend
[i
]]) == ORPAREN
); 
 628                         (void) dupl(p
, p
->pbegin
[i
]+1, p
->pend
[i
]); 
 631                         SETERROR(REG_ESUBREG
); 
 635                 (void)REQUIRE(starordinary
, REG_BADRPT
); 
 638                 ordinary(p
, (char)c
); 
 642         if (EAT('*')) {         /* implemented as +? */ 
 643                 /* this case does not require the (y|) trick, noKLUDGE */ 
 646                 INSERT(OQUEST_
, pos
); 
 647                 ASTERN(O_QUEST
, pos
); 
 648         } else if (EATTWO('\\', '{')) { 
 651                         if (MORE() && isdigit((uch
)PEEK())) { 
 653                                 (void)REQUIRE(count 
<= count2
, REG_BADBR
); 
 654                         } else          /* single number with comma */ 
 656                 } else          /* just a single number */ 
 658                 repeat(p
, pos
, count
, count2
); 
 659                 if (!EATTWO('\\', '}')) {       /* error heuristics */ 
 660                         while (MORE() && !SEETWO('\\', '}')) 
 662                         (void)REQUIRE(MORE(), REG_EBRACE
); 
 665         } else if (c 
== '$')     /* $ (but not \$) ends it */ 
 672  - p_count - parse a repetition count 
 673  == static int p_count(struct parse *p); 
 675 static int                      /* the value */ 
 682         while (MORE() && isdigit((uch
)PEEK()) && count 
<= DUPMAX
) { 
 683                 count 
= count
*10 + (GETNEXT() - '0'); 
 687         (void)REQUIRE(ndigits 
> 0 && count 
<= DUPMAX
, REG_BADBR
); 
 692  - p_bracket - parse a bracketed character list 
 693  == static void p_bracket(struct parse *p); 
 695  * Note a significant property of this code:  if the allocset() did SETERROR, 
 696  * no set operations are done. 
 702         cset 
*cs 
= allocset(p
); 
 705         /* Dept of Truly Sickening Special-Case Kludges */ 
 706         if (p
->next 
+ 5 < p
->end 
&& strncmp(p
->next
, "[:<:]]", 6) == 0) { 
 711         if (p
->next 
+ 5 < p
->end 
&& strncmp(p
->next
, "[:>:]]", 6) == 0) { 
 718                 invert
++;       /* make note to invert set at end */ 
 723         while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 
 727         (void)MUSTEAT(']', REG_EBRACK
); 
 729         if (p
->error 
!= 0)      /* don't mess things up further */ 
 732         if (p
->g
->cflags
®_ICASE
) { 
 736                 for (i 
= p
->g
->csetsize 
- 1; i 
>= 0; i
--) 
 737                         if (CHIN(cs
, i
) && isalpha(i
)) { 
 742                 if (cs
->multis 
!= NULL
) 
 748                 for (i 
= p
->g
->csetsize 
- 1; i 
>= 0; i
--) 
 753                 if (p
->g
->cflags
®_NEWLINE
) 
 755                 if (cs
->multis 
!= NULL
) 
 759         assert(cs
->multis 
== NULL
);             /* xxx */ 
 761         if (nch(p
, cs
) == 1) {          /* optimize singleton sets */ 
 762                 ordinary(p
, firstch(p
, cs
)); 
 765                 EMIT(OANYOF
, freezeset(p
, cs
)); 
 769  - p_b_term - parse one term of a bracketed character list 
 770  == static void p_b_term(struct parse *p, cset *cs); 
 781         /* classify what we've got */ 
 782         switch ((MORE()) ? PEEK() : '\0') { 
 784                 c 
= (MORE2()) ? PEEK2() : '\0'; 
 787                 SETERROR(REG_ERANGE
); 
 788                 return;                 /* NOTE RETURN */ 
 796         case ':':               /* character class */ 
 798                 (void)REQUIRE(MORE(), REG_EBRACK
); 
 800                 (void)REQUIRE(c 
!= '-' && c 
!= ']', REG_ECTYPE
); 
 802                 (void)REQUIRE(MORE(), REG_EBRACK
); 
 803                 (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE
); 
 805         case '=':               /* equivalence class */ 
 807                 (void)REQUIRE(MORE(), REG_EBRACK
); 
 809                 (void)REQUIRE(c 
!= '-' && c 
!= ']', REG_ECOLLATE
); 
 811                 (void)REQUIRE(MORE(), REG_EBRACK
); 
 812                 (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE
); 
 814         default:                /* symbol, ordinary character, or range */ 
 815 /* xxx revision needed for multichar stuff */ 
 816                 start 
= p_b_symbol(p
); 
 817                 if (SEE('-') && MORE2() && PEEK2() != ']') { 
 823                                 finish 
= p_b_symbol(p
); 
 829                         if (__collate_load_error
) { 
 830                                 (void)REQUIRE((uch
)start 
<= (uch
)finish
, REG_ERANGE
); 
 831                                 for (i 
= (uch
)start
; i 
<= (uch
)finish
; i
++) 
 834                                 (void)REQUIRE(__collate_range_cmp(start
, finish
) <= 0, REG_ERANGE
); 
 835                                 for (i 
= CHAR_MIN
; i 
<= CHAR_MAX
; i
++) { 
 836                                         if (   __collate_range_cmp(start
, i
) <= 0 
 837                                             && __collate_range_cmp(i
, finish
) <= 0 
 848  - p_b_cclass - parse a character-class name and deal with it 
 849  == static void p_b_cclass(struct parse *p, cset *cs); 
 861         while (MORE() && isalpha((uch
)PEEK())) 
 864         for (cp 
= cclasses
; cp
->name 
!= NULL
; cp
++) 
 865                 if (strncmp(cp
->name
, sp
, len
) == 0 && cp
->name
[len
] == '\0') 
 867         if (cp
->name 
== NULL
) { 
 868                 /* oops, didn't find it */ 
 869                 SETERROR(REG_ECTYPE
); 
 875                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 880                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 885                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 890                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 895                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 900                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 905                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 910                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 915                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 920                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 925                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 930                 for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
 931                         if (isxdigit((uch
)c
)) 
 936         for (u 
= cp
->multis
; *u 
!= '\0'; u 
+= strlen(u
) + 1) 
 942  - p_b_eclass - parse an equivalence-class name and deal with it 
 943  == static void p_b_eclass(struct parse *p, cset *cs); 
 945  * This implementation is incomplete. xxx 
 954         c 
= p_b_coll_elem(p
, '='); 
 959  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 
 960  == static char p_b_symbol(struct parse *p); 
 962 static char                     /* value of symbol */ 
 968         (void)REQUIRE(MORE(), REG_EBRACK
); 
 969         if (!EATTWO('[', '.')) 
 972         /* collating symbol */ 
 973         value 
= p_b_coll_elem(p
, '.'); 
 974         (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE
); 
 979  - p_b_coll_elem - parse a collating-element name and look it up 
 980  == static char p_b_coll_elem(struct parse *p, int endc); 
 982 static char                     /* value of collating element */ 
 983 p_b_coll_elem(p
, endc
) 
 985 int endc
;                       /* name ended by endc,']' */ 
 991         while (MORE() && !SEETWO(endc
, ']')) 
 994                 SETERROR(REG_EBRACK
); 
 998         for (cp 
= cnames
; cp
->name 
!= NULL
; cp
++) 
 999                 if (strncmp(cp
->name
, sp
, len
) == 0 && cp
->name
[len
] == '\0') 
1000                         return(cp
->code
);       /* known name */ 
1002                 return(*sp
);    /* single character */ 
1003         SETERROR(REG_ECOLLATE
);                 /* neither */ 
1008  - othercase - return the case counterpart of an alphabetic 
1009  == static char othercase(int ch); 
1011 static char                     /* if no counterpart, return ch */ 
1016         assert(isalpha(ch
)); 
1018                 return(tolower(ch
)); 
1019         else if (islower(ch
)) 
1020                 return(toupper(ch
)); 
1021         else                    /* peculiar, but could happen */ 
1026  - bothcases - emit a dualcase version of a two-case character 
1027  == static void bothcases(struct parse *p, int ch); 
1029  * Boy, is this implementation ever a kludge... 
1036         char *oldnext 
= p
->next
; 
1037         char *oldend 
= p
->end
; 
1041         assert(othercase(ch
) != ch
);    /* p_bracket() would recurse */ 
1048         assert(p
->next 
== bracket
+2); 
1054  - ordinary - emit an ordinary character 
1055  == static void ordinary(struct parse *p, int ch); 
1062         cat_t 
*cap 
= p
->g
->categories
; 
1064         if ((p
->g
->cflags
®_ICASE
) && isalpha((uch
)ch
) && othercase(ch
) != ch
) 
1067                 EMIT(OCHAR
, (uch
)ch
); 
1069                         cap
[ch
] = p
->g
->ncategories
++; 
1074  - nonnewline - emit REG_NEWLINE version of OANY 
1075  == static void nonnewline(struct parse *p); 
1077  * Boy, is this implementation ever a kludge... 
1083         char *oldnext 
= p
->next
; 
1084         char *oldend 
= p
->end
; 
1094         assert(p
->next 
== bracket
+3); 
1100  - repeat - generate code for a bounded repetition, recursively if needed 
1101  == static void repeat(struct parse *p, sopno start, int from, int to); 
1104 repeat(p
, start
, from
, to
) 
1106 sopno start
;                    /* operand from here to end of strip */ 
1107 int from
;                       /* repeated from this number */ 
1108 int to
;                         /* to this number of times (maybe INFINITY) */ 
1110         sopno finish 
= HERE(); 
1113 #       define  REP(f, t)       ((f)*8 + (t)) 
1114 #       define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 
1117         if (p
->error 
!= 0)      /* head off possible runaway recursion */ 
1122         switch (REP(MAP(from
), MAP(to
))) { 
1123         case REP(0, 0):                 /* must be user doing this */ 
1124                 DROP(finish
-start
);     /* drop the operand */ 
1126         case REP(0, 1):                 /* as x{1,1}? */ 
1127         case REP(0, N
):                 /* as x{1,n}? */ 
1128         case REP(0, INF
):               /* as x{1,}? */ 
1129                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 
1130                 INSERT(OCH_
, start
);            /* offset is wrong... */ 
1131                 repeat(p
, start
+1, 1, to
); 
1132                 ASTERN(OOR1
, start
); 
1133                 AHEAD(start
);                   /* ... fix it */ 
1136                 ASTERN(O_CH
, THERETHERE()); 
1138         case REP(1, 1):                 /* trivial case */ 
1141         case REP(1, N
):                 /* as x?x{1,n-1} */ 
1142                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 
1143                 INSERT(OCH_
, start
); 
1144                 ASTERN(OOR1
, start
); 
1146                 EMIT(OOR2
, 0);                  /* offset very wrong... */ 
1147                 AHEAD(THERE());                 /* ...so fix it */ 
1148                 ASTERN(O_CH
, THERETHERE()); 
1149                 copy 
= dupl(p
, start
+1, finish
+1); 
1150                 assert(copy 
== finish
+4); 
1151                 repeat(p
, copy
, 1, to
-1); 
1153         case REP(1, INF
):               /* as x+ */ 
1154                 INSERT(OPLUS_
, start
); 
1155                 ASTERN(O_PLUS
, start
); 
1157         case REP(N
, N
):                 /* as xx{m-1,n-1} */ 
1158                 copy 
= dupl(p
, start
, finish
); 
1159                 repeat(p
, copy
, from
-1, to
-1); 
1161         case REP(N
, INF
):               /* as xx{n-1,INF} */ 
1162                 copy 
= dupl(p
, start
, finish
); 
1163                 repeat(p
, copy
, from
-1, to
); 
1165         default:                        /* "can't happen" */ 
1166                 SETERROR(REG_ASSERT
);   /* just in case */ 
1172  - seterr - set an error condition 
1173  == static int seterr(struct parse *p, int e); 
1175 static int                      /* useless but makes type checking happy */ 
1180         if (p
->error 
== 0)      /* keep earliest error condition */ 
1182         p
->next 
= nuls
;         /* try to bring things to a halt */ 
1184         return(0);              /* make the return value well-defined */ 
1188  - allocset - allocate a set of characters for [] 
1189  == static cset *allocset(struct parse *p); 
1195         int no 
= p
->g
->ncsets
++; 
1199         size_t css 
= (size_t)p
->g
->csetsize
; 
1202         if (no 
>= p
->ncsalloc
) {        /* need another column of space */ 
1203                 p
->ncsalloc 
+= CHAR_BIT
; 
1205                 assert(nc 
% CHAR_BIT 
== 0); 
1206                 nbytes 
= nc 
/ CHAR_BIT 
* css
; 
1207                 if (p
->g
->sets 
== NULL
) 
1208                         p
->g
->sets 
= (cset 
*)malloc(nc 
* sizeof(cset
)); 
1210                         p
->g
->sets 
= (cset 
*)reallocf((char *)p
->g
->sets
, 
1212                 if (p
->g
->setbits 
== NULL
) 
1213                         p
->g
->setbits 
= (uch 
*)malloc(nbytes
); 
1215                         p
->g
->setbits 
= (uch 
*)reallocf((char *)p
->g
->setbits
, 
1217                         /* xxx this isn't right if setbits is now NULL */ 
1218                         for (i 
= 0; i 
< no
; i
++) 
1219                                 p
->g
->sets
[i
].ptr 
= p
->g
->setbits 
+ css
*(i
/CHAR_BIT
); 
1221                 if (p
->g
->sets 
!= NULL 
&& p
->g
->setbits 
!= NULL
) 
1222                         (void) memset((char *)p
->g
->setbits 
+ (nbytes 
- css
), 
1226                         SETERROR(REG_ESPACE
); 
1227                         /* caller's responsibility not to do set ops */ 
1231         assert(p
->g
->sets 
!= NULL
);     /* xxx */ 
1232         cs 
= &p
->g
->sets
[no
]; 
1233         cs
->ptr 
= p
->g
->setbits 
+ css
*((no
)/CHAR_BIT
); 
1234         cs
->mask 
= 1 << ((no
) % CHAR_BIT
); 
1243  - freeset - free a now-unused set 
1244  == static void freeset(struct parse *p, cset *cs); 
1252         cset 
*top 
= &p
->g
->sets
[p
->g
->ncsets
]; 
1253         size_t css 
= (size_t)p
->g
->csetsize
; 
1255         for (i 
= 0; i 
< css
; i
++) 
1257         if (cs 
== top
-1)        /* recover only the easy case */ 
1262  - freezeset - final processing on a set of characters 
1263  == static int freezeset(struct parse *p, cset *cs); 
1265  * The main task here is merging identical sets.  This is usually a waste 
1266  * of time (although the hash code minimizes the overhead), but can win 
1267  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash 
1268  * is done using addition rather than xor -- all ASCII [aA] sets xor to 
1271 static int                      /* set number */ 
1278         cset 
*top 
= &p
->g
->sets
[p
->g
->ncsets
]; 
1280         size_t css 
= (size_t)p
->g
->csetsize
; 
1282         /* look for an earlier one which is the same */ 
1283         for (cs2 
= &p
->g
->sets
[0]; cs2 
< top
; cs2
++) 
1284                 if (cs2
->hash 
== h 
&& cs2 
!= cs
) { 
1286                         for (i 
= 0; i 
< css
; i
++) 
1287                                 if (!!CHIN(cs2
, i
) != !!CHIN(cs
, i
)) 
1293         if (cs2 
< top
) {        /* found one */ 
1298         return((int)(cs 
- p
->g
->sets
)); 
1302  - firstch - return first character in a set (which must have at least one) 
1303  == static int firstch(struct parse *p, cset *cs); 
1305 static int                      /* character; there is no "none" value */ 
1311         size_t css 
= (size_t)p
->g
->csetsize
; 
1313         for (i 
= 0; i 
< css
; i
++) 
1317         return(0);              /* arbitrary */ 
1321  - nch - number of characters in a set 
1322  == static int nch(struct parse *p, cset *cs); 
1330         size_t css 
= (size_t)p
->g
->csetsize
; 
1333         for (i 
= 0; i 
< css
; i
++) 
1340  - mcadd - add a collating element to a cset 
1341  == static void mcadd(struct parse *p, cset *cs, \ 
1350         size_t oldend 
= cs
->smultis
; 
1352         cs
->smultis 
+= strlen(cp
) + 1; 
1353         if (cs
->multis 
== NULL
) 
1354                 cs
->multis 
= malloc(cs
->smultis
); 
1356                 cs
->multis 
= reallocf(cs
->multis
, cs
->smultis
); 
1357         if (cs
->multis 
== NULL
) { 
1358                 SETERROR(REG_ESPACE
); 
1362         (void) strcpy(cs
->multis 
+ oldend 
- 1, cp
); 
1363         cs
->multis
[cs
->smultis 
- 1] = '\0'; 
1368  - mcsub - subtract a collating element from a cset 
1369  == static void mcsub(cset *cs, char *cp); 
1376         char *fp 
= mcfind(cs
, cp
); 
1377         size_t len 
= strlen(fp
); 
1380         (void) memmove(fp
, fp 
+ len 
+ 1, 
1381                                 cs
->smultis 
- (fp 
+ len 
+ 1 - cs
->multis
)); 
1384         if (cs
->smultis 
== 0) { 
1390         cs
->multis 
= reallocf(cs
->multis
, cs
->smultis
); 
1391         assert(cs
->multis 
!= NULL
); 
1395  - mcin - is a collating element in a cset? 
1396  == static int mcin(cset *cs, char *cp); 
1403         return(mcfind(cs
, cp
) != NULL
); 
1407  - mcfind - find a collating element in a cset 
1408  == static char *mcfind(cset *cs, char *cp); 
1417         if (cs
->multis 
== NULL
) 
1419         for (p 
= cs
->multis
; *p 
!= '\0'; p 
+= strlen(p
) + 1) 
1420                 if (strcmp(cp
, p
) == 0) 
1427  - mcinvert - invert the list of collating elements in a cset 
1428  == static void mcinvert(struct parse *p, cset *cs); 
1430  * This would have to know the set of possibilities.  Implementation 
1438         assert(cs
->multis 
== NULL
);     /* xxx */ 
1442  - mccase - add case counterparts of the list of collating elements in a cset 
1443  == static void mccase(struct parse *p, cset *cs); 
1445  * This would have to know the set of possibilities.  Implementation 
1453         assert(cs
->multis 
== NULL
);     /* xxx */ 
1457  - isinsets - is this character in any sets? 
1458  == static int isinsets(struct re_guts *g, int c); 
1460 static int                      /* predicate */ 
1467         int ncols 
= (g
->ncsets
+(CHAR_BIT
-1)) / CHAR_BIT
; 
1468         unsigned uc 
= (uch
)c
; 
1470         for (i 
= 0, col 
= g
->setbits
; i 
< ncols
; i
++, col 
+= g
->csetsize
) 
1477  - samesets - are these two characters in exactly the same sets? 
1478  == static int samesets(struct re_guts *g, int c1, int c2); 
1480 static int                      /* predicate */ 
1488         int ncols 
= (g
->ncsets
+(CHAR_BIT
-1)) / CHAR_BIT
; 
1489         unsigned uc1 
= (uch
)c1
; 
1490         unsigned uc2 
= (uch
)c2
; 
1492         for (i 
= 0, col 
= g
->setbits
; i 
< ncols
; i
++, col 
+= g
->csetsize
) 
1493                 if (col
[uc1
] != col
[uc2
]) 
1499  - categorize - sort out character categories 
1500  == static void categorize(struct parse *p, struct re_guts *g); 
1507         cat_t 
*cats 
= g
->categories
; 
1512         /* avoid making error situations worse */ 
1516         for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
1517                 if (cats
[c
] == 0 && isinsets(g
, c
)) { 
1518                         cat 
= g
->ncategories
++; 
1520                         for (c2 
= c
+1; c2 
<= CHAR_MAX
; c2
++) 
1521                                 if (cats
[c2
] == 0 && samesets(g
, c
, c2
)) 
1527  - dupl - emit a duplicate of a bunch of sops 
1528  == static sopno dupl(struct parse *p, sopno start, sopno finish); 
1530 static sopno                    
/* start of duplicate */ 
1531 dupl(p
, start
, finish
) 
1533 sopno start
;                    /* from here */ 
1534 sopno finish
;                   /* to this less one */ 
1537         sopno len 
= finish 
- start
; 
1539         assert(finish 
>= start
); 
1542         enlarge(p
, p
->ssize 
+ len
);     /* this many unexpected additions */ 
1543         assert(p
->ssize 
>= p
->slen 
+ len
); 
1544         (void) memcpy((char *)(p
->strip 
+ p
->slen
), 
1545                 (char *)(p
->strip 
+ start
), (size_t)len
*sizeof(sop
)); 
1551  - doemit - emit a strip operator 
1552  == static void doemit(struct parse *p, sop op, size_t opnd); 
1554  * It might seem better to implement this as a macro with a function as 
1555  * hard-case backup, but it's just too big and messy unless there are 
1556  * some changes to the data structures.  Maybe later. 
1564         /* avoid making error situations worse */ 
1568         /* deal with oversize operands ("can't happen", more or less) */ 
1569         assert(opnd 
< 1<<OPSHIFT
); 
1571         /* deal with undersized strip */ 
1572         if (p
->slen 
>= p
->ssize
) 
1573                 enlarge(p
, (p
->ssize
+1) / 2 * 3);       /* +50% */ 
1574         assert(p
->slen 
< p
->ssize
); 
1576         /* finally, it's all reduced to the easy case */ 
1577         p
->strip
[p
->slen
++] = SOP(op
, opnd
); 
1581  - doinsert - insert a sop into the strip 
1582  == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); 
1585 doinsert(p
, op
, opnd
, pos
) 
1595         /* avoid making error situations worse */ 
1600         EMIT(op
, opnd
);         /* do checks, ensure space */ 
1601         assert(HERE() == sn
+1); 
1604         /* adjust paren pointers */ 
1606         for (i 
= 1; i 
< NPAREN
; i
++) { 
1607                 if (p
->pbegin
[i
] >= pos
) { 
1610                 if (p
->pend
[i
] >= pos
) { 
1615         memmove((char *)&p
->strip
[pos
+1], (char *)&p
->strip
[pos
], 
1616                                                 (HERE()-pos
-1)*sizeof(sop
)); 
1621  - dofwd - complete a forward reference 
1622  == static void dofwd(struct parse *p, sopno pos, sop value); 
1625 dofwd(p
, pos
, value
) 
1630         /* avoid making error situations worse */ 
1634         assert(value 
< 1<<OPSHIFT
); 
1635         p
->strip
[pos
] = OP(p
->strip
[pos
]) | value
; 
1639  - enlarge - enlarge the strip 
1640  == static void enlarge(struct parse *p, sopno size); 
1649         if (p
->ssize 
>= size
) 
1652         sp 
= (sop 
*)realloc(p
->strip
, size
*sizeof(sop
)); 
1654                 SETERROR(REG_ESPACE
); 
1662  - stripsnug - compact the strip 
1663  == static void stripsnug(struct parse *p, struct re_guts *g); 
1670         g
->nstates 
= p
->slen
; 
1671         g
->strip 
= (sop 
*)realloc((char *)p
->strip
, p
->slen 
* sizeof(sop
)); 
1672         if (g
->strip 
== NULL
) { 
1673                 SETERROR(REG_ESPACE
); 
1674                 g
->strip 
= p
->strip
; 
1679  - findmust - fill in must and mlen with longest mandatory literal string 
1680  == static void findmust(struct parse *p, struct re_guts *g); 
1682  * This algorithm could do fancy things like analyzing the operands of | 
1683  * for common subsequences.  Someday.  This code is simple and finds most 
1684  * of the interesting cases. 
1686  * Note that must and mlen got initialized during setup. 
1703         /* avoid making error situations worse */ 
1707         /* Find out if we can handle OANYOF or not */ 
1709         for (cs 
= 0; cs 
< g
->ncsets
; cs
++) 
1710                 if (g
->sets
[cs
].multis 
!= NULL
) 
1713         /* find the longest OCHAR sequence in strip */ 
1717         scan 
= g
->strip 
+ 1; 
1721                 case OCHAR
:             /* sequence member */ 
1722                         if (newlen 
== 0)                /* new sequence */ 
1723                                 newstart 
= scan 
- 1; 
1726                 case OPLUS_
:            /* things that don't break one */ 
1730                 case OQUEST_
:           /* things that must be skipped */ 
1732                         offset 
= altoffset(scan
, offset
, mccs
); 
1737                                 /* assert() interferes w debug printouts */ 
1738                                 if (OP(s
) != O_QUEST 
&& OP(s
) != O_CH 
&& 
1743                         } while (OP(s
) != O_QUEST 
&& OP(s
) != O_CH
); 
1745                 case OBOW
:              /* things that break a sequence */ 
1752                         if (newlen 
> g
->mlen
) {         /* ends one */ 
1756                                         g
->moffset 
+= offset
; 
1759                                         g
->moffset 
= offset
; 
1767                         if (newlen 
> g
->mlen
) {         /* ends one */ 
1771                                         g
->moffset 
+= offset
; 
1774                                         g
->moffset 
= offset
; 
1783                 case OANYOF
:            /* may or may not invalidate offset */ 
1784                         /* First, everything as OANY */ 
1785                         if (newlen 
> g
->mlen
) {         /* ends one */ 
1789                                         g
->moffset 
+= offset
; 
1792                                         g
->moffset 
= offset
; 
1800                         /* And, now, if we found out we can't deal with 
1801                          * it, make offset = -1. 
1807                         /* Anything here makes it impossible or too hard 
1808                          * to calculate the offset -- so we give up; 
1809                          * save the last known good offset, in case the 
1810                          * must sequence doesn't occur later. 
1812                         if (newlen 
> g
->mlen
) {         /* ends one */ 
1816                                         g
->moffset 
+= offset
; 
1818                                         g
->moffset 
= offset
; 
1824         } while (OP(s
) != OEND
); 
1826         if (g
->mlen 
== 0) {             /* there isn't one */ 
1831         /* turn it into a character string */ 
1832         g
->must 
= malloc((size_t)g
->mlen 
+ 1); 
1833         if (g
->must 
== NULL
) {          /* argh; just forget it */ 
1840         for (i 
= g
->mlen
; i 
> 0; i
--) { 
1841                 while (OP(s 
= *scan
++) != OCHAR
) 
1843                 assert(cp 
< g
->must 
+ g
->mlen
); 
1844                 *cp
++ = (char)OPND(s
); 
1846         assert(cp 
== g
->must 
+ g
->mlen
); 
1847         *cp
++ = '\0';           /* just on general principles */ 
1851  - altoffset - choose biggest offset among multiple choices 
1852  == static int altoffset(sop *scan, int offset, int mccs); 
1854  * Compute, recursively if necessary, the largest offset among multiple 
1858 altoffset(scan
, offset
, mccs
) 
1867         /* If we gave up already on offsets, return */ 
1874         while (OP(s
) != O_QUEST 
&& OP(s
) != O_CH
) { 
1883                         try = altoffset(scan
, try, mccs
); 
1890                                 if (OP(s
) != O_QUEST 
&& OP(s
) != O_CH 
&& 
1893                         } while (OP(s
) != O_QUEST 
&& OP(s
) != O_CH
); 
1894                         /* We must skip to the next position, or we'll 
1895                          * leave altoffset() too early. 
1923         return largest
+offset
; 
1927  - computejumps - compute char jumps for BM scan 
1928  == static void computejumps(struct parse *p, struct re_guts *g); 
1930  * This algorithm assumes g->must exists and is has size greater than 
1931  * zero. It's based on the algorithm found on Computer Algorithms by 
1934  * A char jump is the number of characters one needs to jump based on 
1935  * the value of the character from the text that was mismatched. 
1945         /* Avoid making errors worse */ 
1949         g
->charjump 
= (int*) malloc((NC 
+ 1) * sizeof(int)); 
1950         if (g
->charjump 
== NULL
)        /* Not a fatal error */ 
1952         /* Adjust for signed chars, if necessary */ 
1953         g
->charjump 
= &g
->charjump
[-(CHAR_MIN
)]; 
1955         /* If the character does not exist in the pattern, the jump 
1956          * is equal to the number of characters in the pattern. 
1958         for (ch 
= CHAR_MIN
; ch 
< (CHAR_MAX 
+ 1); ch
++) 
1959                 g
->charjump
[ch
] = g
->mlen
; 
1961         /* If the character does exist, compute the jump that would 
1962          * take us to the last character in the pattern equal to it 
1963          * (notice that we match right to left, so that last character 
1964          * is the first one that would be matched). 
1966         for (mindex 
= 0; mindex 
< g
->mlen
; mindex
++) 
1967                 g
->charjump
[(int)g
->must
[mindex
]] = g
->mlen 
- mindex 
- 1; 
1971  - computematchjumps - compute match jumps for BM scan 
1972  == static void computematchjumps(struct parse *p, struct re_guts *g); 
1974  * This algorithm assumes g->must exists and is has size greater than 
1975  * zero. It's based on the algorithm found on Computer Algorithms by 
1978  * A match jump is the number of characters one needs to advance based 
1979  * on the already-matched suffix. 
1980  * Notice that all values here are minus (g->mlen-1), because of the way 
1981  * the search algorithm works. 
1984 computematchjumps(p
, g
) 
1988         int mindex
;             /* General "must" iterator */ 
1989         int suffix
;             /* Keeps track of matching suffix */ 
1990         int ssuffix
;            /* Keeps track of suffixes' suffix */ 
1991         int* pmatches
;          /* pmatches[k] points to the next i 
1992                                  * such that i+1...mlen is a substring 
1993                                  * of k+1...k+mlen-i-1 
1996         /* Avoid making errors worse */ 
2000         pmatches 
= (int*) malloc(g
->mlen 
* sizeof(unsigned int)); 
2001         if (pmatches 
== NULL
) { 
2002                 g
->matchjump 
= NULL
; 
2006         g
->matchjump 
= (int*) malloc(g
->mlen 
* sizeof(unsigned int)); 
2007         if (g
->matchjump 
== NULL
)       /* Not a fatal error */ 
2010         /* Set maximum possible jump for each character in the pattern */ 
2011         for (mindex 
= 0; mindex 
< g
->mlen
; mindex
++) 
2012                 g
->matchjump
[mindex
] = 2*g
->mlen 
- mindex 
- 1; 
2014         /* Compute pmatches[] */ 
2015         for (mindex 
= g
->mlen 
- 1, suffix 
= g
->mlen
; mindex 
>= 0; 
2016             mindex
--, suffix
--) { 
2017                 pmatches
[mindex
] = suffix
; 
2019                 /* If a mismatch is found, interrupting the substring, 
2020                  * compute the matchjump for that position. If no 
2021                  * mismatch is found, then a text substring mismatched 
2022                  * against the suffix will also mismatch against the 
2025                 while (suffix 
< g
->mlen
 
2026                     && g
->must
[mindex
] != g
->must
[suffix
]) { 
2027                         g
->matchjump
[suffix
] = MIN(g
->matchjump
[suffix
], 
2028                             g
->mlen 
- mindex 
- 1); 
2029                         suffix 
= pmatches
[suffix
]; 
2033         /* Compute the matchjump up to the last substring found to jump 
2034          * to the beginning of the largest must pattern prefix matching 
2037         for (mindex 
= 0; mindex 
<= suffix
; mindex
++) 
2038                 g
->matchjump
[mindex
] = MIN(g
->matchjump
[mindex
], 
2039                     g
->mlen 
+ suffix 
- mindex
); 
2041         ssuffix 
= pmatches
[suffix
]; 
2042         while (suffix 
< g
->mlen
) { 
2043                 while (suffix 
<= ssuffix 
&& suffix 
< g
->mlen
) { 
2044                         g
->matchjump
[suffix
] = MIN(g
->matchjump
[suffix
], 
2045                             g
->mlen 
+ ssuffix 
- suffix
); 
2048                 if (suffix 
< g
->mlen
) 
2049                         ssuffix 
= pmatches
[ssuffix
]; 
2056  - pluscount - count + nesting 
2057  == static sopno pluscount(struct parse *p, struct re_guts *g); 
2059 static sopno                    
/* nesting depth */ 
2070                 return(0);      /* there may not be an OEND */ 
2072         scan 
= g
->strip 
+ 1; 
2080                         if (plusnest 
> maxnest
) 
2085         } while (OP(s
) != OEND
);