]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regcomp.c
   1 #if defined(__MWERKS__) && !defined(__MACH__) 
  20  * parse structure, passed up and down to avoid global variables and 
  24         char *next
;             /* next character in RE */ 
  25         char *end
;              /* end of string (-> NUL normally) */ 
  26         int error
;              /* has an error been seen? */ 
  27         sop 
*strip
;             /* malloced strip */ 
  28         sopno ssize
;            /* malloced strip size (allocated) */ 
  29         sopno slen
;             /* malloced strip length (used) */ 
  30         int ncsalloc
;           /* number of csets allocated */ 
  32 #       define  NPAREN  10      /* we need to remember () 1-9 for back refs */ 
  33         sopno pbegin
[NPAREN
];   /* -> ( ([0] unused) */ 
  34         sopno pend
[NPAREN
];     /* -> ) ([0] unused) */ 
  39 static char nuls
[10];           /* place to point scanner in event of error */ 
  42  * macros for use with parse structure 
  43  * BEWARE:  these know that the parse structure is named `p' !!! 
  45 #define PEEK()  (*p->next) 
  46 #define PEEK2() (*(p->next+1)) 
  47 #define MORE()  (p->next < p->end) 
  48 #define MORE2() (p->next+1 < p->end) 
  49 #define SEE(c)  (MORE() && PEEK() == (c)) 
  50 #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 
  51 #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0) 
  52 #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 
  53 #define NEXT()  (p->next++) 
  54 #define NEXT2() (p->next += 2) 
  55 #define NEXTn(n)        (p->next += (n)) 
  56 #define GETNEXT()       (*p->next++) 
  57 #define SETERROR(e)     seterr(p, (e)) 
  58 #define REQUIRE(co, e)  ((void)((co) || SETERROR(e))) 
  59 #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e)) 
  60 #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e)) 
  61 #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e)) 
  62 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 
  63 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 
  64 #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos)) 
  65 #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos) 
  66 #define HERE()          (p->slen) 
  67 #define THERE()         (p->slen - 1) 
  68 #define THERETHERE()    (p->slen - 2) 
  69 #define DROP(n) (p->slen -= (n)) 
  72 static int never 
= 0;           /* for use in asserts; shuts lint up */ 
  74 #define never   0               /* some <assert.h>s have bugs too */ 
  78  - regcomp - interface for parser and compilation 
  79  = extern int regcomp(regex_t *, const char *, int); 
  80  = #define      REG_BASIC       0000 
  81  = #define      REG_EXTENDED    0001 
  82  = #define      REG_ICASE       0002 
  83  = #define      REG_NOSUB       0004 
  84  = #define      REG_NEWLINE     0010 
  85  = #define      REG_NOSPEC      0020 
  86  = #define      REG_PEND        0040 
  87  = #define      REG_DUMP        0200 
  89 int                             /* 0 success, otherwise REG_something */ 
  90 regcomp(preg
, pattern
, cflags
) 
  96         register struct re_guts 
*g
; 
  97         register struct parse 
*p 
= &pa
; 
 101 #       define  GOODFLAGS(f)    (f) 
 103 #       define  GOODFLAGS(f)    ((f)&~REG_DUMP) 
 106         cflags 
= GOODFLAGS(cflags
); 
 107         if ((cflags
®_EXTENDED
) && (cflags
®_NOSPEC
)) 
 110         if (cflags
®_PEND
) { 
 111                 if (preg
->re_endp 
< pattern
) 
 113                 len 
= preg
->re_endp 
- pattern
; 
 115                 len 
= strlen((char *)pattern
); 
 117         /* do the mallocs early so failure handling is easy */ 
 118         g 
= (struct re_guts 
*)malloc(sizeof(struct re_guts
) + 
 119                                                         (NC
-1)*sizeof(cat_t
)); 
 122         p
->ssize 
= len
/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 
 123         p
->strip 
= (sop 
*)malloc(p
->ssize 
* sizeof(sop
)); 
 125         if (p
->strip 
== NULL
) { 
 132         p
->next 
= (char *)pattern
;      /* convenience; we do not modify it */ 
 133         p
->end 
= p
->next 
+ len
; 
 136         for (i 
= 0; i 
< NPAREN
; i
++) { 
 151         g
->ncategories 
= 1;     /* category 0 is "everything else" */ 
 152         g
->categories 
= &g
->catspace
[-(CHAR_MIN
)]; 
 153         (void) memset((char *)g
->catspace
, 0, NC
*sizeof(cat_t
)); 
 158         g
->firststate 
= THERE(); 
 159         if (cflags
®_EXTENDED
) 
 161         else if (cflags
®_NOSPEC
) 
 166         g
->laststate 
= THERE(); 
 168         /* tidy up loose ends and fill things in */ 
 172         g
->nplus 
= pluscount(p
, g
); 
 174         preg
->re_nsub 
= g
->nsub
; 
 176         preg
->re_magic 
= MAGIC1
; 
 178         /* not debugging, so can't rely on the assert() in regexec() */ 
 180                 SETERROR(REG_ASSERT
); 
 183         /* win or lose, we're done */ 
 184         if (p
->error 
!= 0)      /* lose */ 
 190  - p_ere - ERE parser top level, concatenation and alternation 
 191  == static void p_ere(register struct parse *p, int stop); 
 195 register struct parse 
*p
; 
 196 int stop
;                       /* character this ERE should end at */ 
 199         register sopno prevback
; 
 200         register sopno prevfwd
; 
 202         register int first 
= 1;         /* is this the first alternative? */ 
 205                 /* do a bunch of concatenated expressions */ 
 207                 while (MORE() && (c 
= PEEK()) != '|' && c 
!= stop
) 
 209                 REQUIRE(HERE() != conc
, REG_EMPTY
);     /* require nonempty */ 
 212                         break;          /* NOTE BREAK OUT */ 
 215                         INSERT(OCH_
, conc
);     /* offset is wrong */ 
 220                 ASTERN(OOR1
, prevback
); 
 222                 AHEAD(prevfwd
);                 /* fix previous offset */ 
 224                 EMIT(OOR2
, 0);                  /* offset is very wrong */ 
 227         if (!first
) {           /* tail-end fixups */ 
 229                 ASTERN(O_CH
, prevback
); 
 232         assert(!MORE() || SEE(stop
)); 
 236  - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 
 237  == static void p_ere_exp(register struct parse *p); 
 241 register struct parse 
*p
; 
 247         register sopno subno
; 
 250         assert(MORE());         /* caller should have ensured this */ 
 256                 REQUIRE(MORE(), REG_EPAREN
); 
 260                         p
->pbegin
[subno
] = HERE(); 
 261                 EMIT(OLPAREN
, subno
); 
 264                 if (subno 
< NPAREN
) { 
 265                         p
->pend
[subno
] = HERE(); 
 266                         assert(p
->pend
[subno
] != 0); 
 268                 EMIT(ORPAREN
, subno
); 
 269                 MUSTEAT(')', REG_EPAREN
); 
 271 #ifndef POSIX_MISTAKE 
 272         case ')':               /* happens only if no current unmatched ( */ 
 274                  * You may ask, why the ifndef?  Because I didn't notice 
 275                  * this until slightly too late for 1003.2, and none of the 
 276                  * other 1003.2 regular-expression reviewers noticed it at 
 277                  * all.  So an unmatched ) is legal POSIX, at least until 
 278                  * we can get it fixed. 
 280                 SETERROR(REG_EPAREN
); 
 285                 p
->g
->iflags 
|= USEBOL
; 
 291                 p
->g
->iflags 
|= USEEOL
; 
 300                 SETERROR(REG_BADRPT
); 
 303                 if (p
->g
->cflags
®_NEWLINE
) 
 312                 REQUIRE(MORE(), REG_EESCAPE
); 
 316         case '{':               /* okay as ordinary except if digit follows */ 
 317                 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT
); 
 327         /* we call { a repetition if followed by a digit */ 
 328         if (!( c 
== '*' || c 
== '+' || c 
== '?' || 
 329                                 (c 
== '{' && MORE2() && isdigit(PEEK2())) )) 
 330                 return;         /* no repetition, we're done */ 
 333         REQUIRE(!wascaret
, REG_BADRPT
); 
 335         case '*':       /* implemented as +? */ 
 336                 /* this case does not require the (y|) trick, noKLUDGE */ 
 339                 INSERT(OQUEST_
, pos
); 
 340                 ASTERN(O_QUEST
, pos
); 
 347                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 
 348                 INSERT(OCH_
, pos
);              /* offset slightly wrong */ 
 349                 ASTERN(OOR1
, pos
);              /* this one's right */ 
 350                 AHEAD(pos
);                     /* fix the OCH_ */ 
 351                 EMIT(OOR2
, 0);                  /* offset very wrong... */ 
 352                 AHEAD(THERE());                 /* ...so fix it */ 
 353                 ASTERN(O_CH
, THERETHERE()); 
 358                         if (isdigit(PEEK())) { 
 360                                 REQUIRE(count 
<= count2
, REG_BADBR
); 
 361                         } else          /* single number with comma */ 
 363                 } else          /* just a single number */ 
 365                 repeat(p
, pos
, count
, count2
); 
 366                 if (!EAT('}')) {        /* error heuristics */ 
 367                         while (MORE() && PEEK() != '}') 
 369                         REQUIRE(MORE(), REG_EBRACE
); 
 378         if (!( c 
== '*' || c 
== '+' || c 
== '?' || 
 379                                 (c 
== '{' && MORE2() && isdigit(PEEK2())) ) ) 
 381         SETERROR(REG_BADRPT
); 
 385  - p_str - string (no metacharacters) "parser" 
 386  == static void p_str(register struct parse *p); 
 390 register struct parse 
*p
; 
 392         REQUIRE(MORE(), REG_EMPTY
); 
 394                 ordinary(p
, GETNEXT()); 
 398  - p_bre - BRE parser top level, anchoring and concatenation 
 399  == static void p_bre(register struct parse *p, register int end1, \ 
 400  ==     register int end2); 
 401  * Giving end1 as OUT essentially eliminates the end1/end2 check. 
 403  * This implementation is a bit of a kludge, in that a trailing $ is first 
 404  * taken as an ordinary character and then revised to be an anchor.  The 
 405  * only undesirable side effect is that '$' gets included as a character 
 406  * category in such cases.  This is fairly harmless; not worth fixing. 
 407  * The amount of lookahead needed to avoid this kludge is excessive. 
 411 register struct parse 
*p
; 
 412 register int end1
;              /* first terminating character */ 
 413 register int end2
;              /* second terminating character */ 
 415         register sopno start 
= HERE(); 
 416         register int first 
= 1;                 /* first subexpression? */ 
 417         register int wasdollar 
= 0; 
 421                 p
->g
->iflags 
|= USEBOL
; 
 424         while (MORE() && !SEETWO(end1
, end2
)) { 
 425                 wasdollar 
= p_simp_re(p
, first
); 
 428         if (wasdollar
) {        /* oops, that was a trailing anchor */ 
 431                 p
->g
->iflags 
|= USEEOL
; 
 435         REQUIRE(HERE() != start
, REG_EMPTY
);    /* require nonempty */ 
 439  - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 
 440  == static int p_simp_re(register struct parse *p, int starordinary); 
 442 static int                      /* was the simple RE an unbackslashed $? */ 
 443 p_simp_re(p
, starordinary
) 
 444 register struct parse 
*p
; 
 445 int starordinary
;               /* is a leading * an ordinary character? */ 
 452         register sopno subno
; 
 453 #       define  BACKSL  (1<<CHAR_BIT) 
 455         pos 
= HERE();           /* repetion op, if any, covers from here */ 
 457         assert(MORE());         /* caller should have ensured this */ 
 460                 REQUIRE(MORE(), REG_EESCAPE
); 
 461                 c 
= BACKSL 
| (unsigned char)GETNEXT(); 
 465                 if (p
->g
->cflags
®_NEWLINE
) 
 474                 SETERROR(REG_BADRPT
); 
 480                         p
->pbegin
[subno
] = HERE(); 
 481                 EMIT(OLPAREN
, subno
); 
 482                 /* the MORE here is an error heuristic */ 
 483                 if (MORE() && !SEETWO('\\', ')')) 
 485                 if (subno 
< NPAREN
) { 
 486                         p
->pend
[subno
] = HERE(); 
 487                         assert(p
->pend
[subno
] != 0); 
 489                 EMIT(ORPAREN
, subno
); 
 490                 REQUIRE(EATTWO('\\', ')'), REG_EPAREN
); 
 492         case BACKSL
|')':        /* should not get here -- must be user */ 
 494                 SETERROR(REG_EPAREN
); 
 505                 i 
= (c
&~BACKSL
) - '0'; 
 507                 if (p
->pend
[i
] != 0) { 
 508                         assert(i 
<= p
->g
->nsub
); 
 510                         assert(p
->pbegin
[i
] != 0); 
 511                         assert(OP(p
->strip
[p
->pbegin
[i
]]) == OLPAREN
); 
 512                         assert(OP(p
->strip
[p
->pend
[i
]]) == ORPAREN
); 
 513                         (void) dupl(p
, p
->pbegin
[i
]+1, p
->pend
[i
]); 
 516                         SETERROR(REG_ESUBREG
); 
 520                 REQUIRE(starordinary
, REG_BADRPT
); 
 523                 ordinary(p
, (char)c
);   /* takes off BACKSL, if any */ 
 527         if (EAT('*')) {         /* implemented as +? */ 
 528                 /* this case does not require the (y|) trick, noKLUDGE */ 
 531                 INSERT(OQUEST_
, pos
); 
 532                 ASTERN(O_QUEST
, pos
); 
 533         } else if (EATTWO('\\', '{')) { 
 536                         if (MORE() && isdigit(PEEK())) { 
 538                                 REQUIRE(count 
<= count2
, REG_BADBR
); 
 539                         } else          /* single number with comma */ 
 541                 } else          /* just a single number */ 
 543                 repeat(p
, pos
, count
, count2
); 
 544                 if (!EATTWO('\\', '}')) {       /* error heuristics */ 
 545                         while (MORE() && !SEETWO('\\', '}')) 
 547                         REQUIRE(MORE(), REG_EBRACE
); 
 550         } else if (c 
== (unsigned char)'$')     /* $ (but not \$) ends it */ 
 557  - p_count - parse a repetition count 
 558  == static int p_count(register struct parse *p); 
 560 static int                      /* the value */ 
 562 register struct parse 
*p
; 
 564         register int count 
= 0; 
 565         register int ndigits 
= 0; 
 567         while (MORE() && isdigit(PEEK()) && count 
<= DUPMAX
) { 
 568                 count 
= count
*10 + (GETNEXT() - '0'); 
 572         REQUIRE(ndigits 
> 0 && count 
<= DUPMAX
, REG_BADBR
); 
 577  - p_bracket - parse a bracketed character list 
 578  == static void p_bracket(register struct parse *p); 
 580  * Note a significant property of this code:  if the allocset() did SETERROR, 
 581  * no set operations are done. 
 585 register struct parse 
*p
; 
 587         register cset 
*cs 
= allocset(p
); 
 588         register int invert 
= 0; 
 590         /* Dept of Truly Sickening Special-Case Kludges */ 
 591         if (p
->next 
+ 5 < p
->end 
&& strncmp(p
->next
, "[:<:]]", 6) == 0) { 
 596         if (p
->next 
+ 5 < p
->end 
&& strncmp(p
->next
, "[:>:]]", 6) == 0) { 
 603                 invert
++;       /* make note to invert set at end */ 
 608         while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 
 612         MUSTEAT(']', REG_EBRACK
); 
 614         if (p
->error 
!= 0)      /* don't mess things up further */ 
 617         if (p
->g
->cflags
®_ICASE
) { 
 621                 for (i 
= p
->g
->csetsize 
- 1; i 
>= 0; i
--) 
 622                         if (CHIN(cs
, i
) && isalpha(i
)) { 
 627                 if (cs
->multis 
!= NULL
) 
 633                 for (i 
= p
->g
->csetsize 
- 1; i 
>= 0; i
--) 
 638                 if (p
->g
->cflags
®_NEWLINE
) 
 640                 if (cs
->multis 
!= NULL
) 
 644         assert(cs
->multis 
== NULL
);             /* xxx */ 
 646         if (nch(p
, cs
) == 1) {          /* optimize singleton sets */ 
 647                 ordinary(p
, firstch(p
, cs
)); 
 650                 EMIT(OANYOF
, freezeset(p
, cs
)); 
 654  - p_b_term - parse one term of a bracketed character list 
 655  == static void p_b_term(register struct parse *p, register cset *cs); 
 659 register struct parse 
*p
; 
 663         register char start
, finish
; 
 666         /* classify what we've got */ 
 667         switch ((MORE()) ? PEEK() : '\0') { 
 669                 c 
= (MORE2()) ? PEEK2() : '\0'; 
 672                 SETERROR(REG_ERANGE
); 
 673                 return;                 /* NOTE RETURN */ 
 681         case ':':               /* character class */ 
 683                 REQUIRE(MORE(), REG_EBRACK
); 
 685                 REQUIRE(c 
!= '-' && c 
!= ']', REG_ECTYPE
); 
 687                 REQUIRE(MORE(), REG_EBRACK
); 
 688                 REQUIRE(EATTWO(':', ']'), REG_ECTYPE
); 
 690         case '=':               /* equivalence class */ 
 692                 REQUIRE(MORE(), REG_EBRACK
); 
 694                 REQUIRE(c 
!= '-' && c 
!= ']', REG_ECOLLATE
); 
 696                 REQUIRE(MORE(), REG_EBRACK
); 
 697                 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE
); 
 699         default:                /* symbol, ordinary character, or range */ 
 700 /* xxx revision needed for multichar stuff */ 
 701                 start 
= p_b_symbol(p
); 
 702                 if (SEE('-') && MORE2() && PEEK2() != ']') { 
 708                                 finish 
= p_b_symbol(p
); 
 711 /* xxx what about signed chars here... */ 
 712                 REQUIRE(start 
<= finish
, REG_ERANGE
); 
 713                 for (i 
= start
; i 
<= finish
; i
++) 
 720  - p_b_cclass - parse a character-class name and deal with it 
 721  == static void p_b_cclass(register struct parse *p, register cset *cs); 
 725 register struct parse 
*p
; 
 728         register char *sp 
= p
->next
; 
 729         register struct cclass 
*cp
; 
 734         while (MORE() && isalpha(PEEK())) 
 737         for (cp 
= cclasses
; cp
->name 
!= NULL
; cp
++) 
 738                 if (strncmp(cp
->name
, sp
, len
) == 0 && cp
->name
[len
] == '\0') 
 740         if (cp
->name 
== NULL
) { 
 741                 /* oops, didn't find it */ 
 742                 SETERROR(REG_ECTYPE
); 
 747         while ((c 
= *u
++) != '\0') 
 749         for (u 
= cp
->multis
; *u 
!= '\0'; u 
+= strlen(u
) + 1) 
 754  - p_b_eclass - parse an equivalence-class name and deal with it 
 755  == static void p_b_eclass(register struct parse *p, register cset *cs); 
 757  * This implementation is incomplete. xxx 
 761 register struct parse 
*p
; 
 766         c 
= p_b_coll_elem(p
, '='); 
 771  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 
 772  == static char p_b_symbol(register struct parse *p); 
 774 static char                     /* value of symbol */ 
 776 register struct parse 
*p
; 
 780         REQUIRE(MORE(), REG_EBRACK
); 
 781         if (!EATTWO('[', '.')) 
 784         /* collating symbol */ 
 785         value 
= p_b_coll_elem(p
, '.'); 
 786         REQUIRE(EATTWO('.', ']'), REG_ECOLLATE
); 
 791  - p_b_coll_elem - parse a collating-element name and look it up 
 792  == static char p_b_coll_elem(register struct parse *p, int endc); 
 794 static char                     /* value of collating element */ 
 795 p_b_coll_elem(p
, endc
) 
 796 register struct parse 
*p
; 
 797 int endc
;                       /* name ended by endc,']' */ 
 799         register char *sp 
= p
->next
; 
 800         register struct cname 
*cp
; 
 803         while (MORE() && !SEETWO(endc
, ']')) 
 806                 SETERROR(REG_EBRACK
); 
 810         for (cp 
= cnames
; cp
->name 
!= NULL
; cp
++) 
 811                 if (strncmp(cp
->name
, sp
, len
) == 0 && cp
->name
[len
] == '\0') 
 812                         return(cp
->code
);       /* known name */ 
 814                 return(*sp
);    /* single character */ 
 815         SETERROR(REG_ECOLLATE
);                 /* neither */ 
 820  - othercase - return the case counterpart of an alphabetic 
 821  == static char othercase(int ch); 
 823 static char                     /* if no counterpart, return ch */ 
 830         else if (islower(ch
)) 
 832         else                    /* peculiar, but could happen */ 
 837  - bothcases - emit a dualcase version of a two-case character 
 838  == static void bothcases(register struct parse *p, int ch); 
 840  * Boy, is this implementation ever a kludge... 
 844 register struct parse 
*p
; 
 847         register char *oldnext 
= p
->next
; 
 848         register char *oldend 
= p
->end
; 
 851         assert(othercase(ch
) != ch
);    /* p_bracket() would recurse */ 
 858         assert(p
->next 
== bracket
+2); 
 864  - ordinary - emit an ordinary character 
 865  == static void ordinary(register struct parse *p, register int ch); 
 869 register struct parse 
*p
; 
 872         register cat_t 
*cap 
= p
->g
->categories
; 
 874         if ((p
->g
->cflags
®_ICASE
) && isalpha(ch
) && othercase(ch
) != ch
) 
 877                 EMIT(OCHAR
, (unsigned char)ch
); 
 879                         cap
[ch
] = p
->g
->ncategories
++; 
 884  - nonnewline - emit REG_NEWLINE version of OANY 
 885  == static void nonnewline(register struct parse *p); 
 887  * Boy, is this implementation ever a kludge... 
 891 register struct parse 
*p
; 
 893         register char *oldnext 
= p
->next
; 
 894         register char *oldend 
= p
->end
; 
 904         assert(p
->next 
== bracket
+3); 
 910  - repeat - generate code for a bounded repetition, recursively if needed 
 911  == static void repeat(register struct parse *p, sopno start, int from, int to); 
 914 repeat(p
, start
, from
, to
) 
 915 register struct parse 
*p
; 
 916 sopno start
;                    /* operand from here to end of strip */ 
 917 int from
;                       /* repeated from this number */ 
 918 int to
;                         /* to this number of times (maybe INFINITY) */ 
 920         register sopno finish 
= HERE(); 
 923 #       define  REP(f, t)       ((f)*8 + (t)) 
 924 #       define  MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 
 927         if (p
->error 
!= 0)      /* head off possible runaway recursion */ 
 932         switch (REP(MAP(from
), MAP(to
))) { 
 933         case REP(0, 0):                 /* must be user doing this */ 
 934                 DROP(finish
-start
);     /* drop the operand */ 
 936         case REP(0, 1):                 /* as x{1,1}? */ 
 937         case REP(0, N
):                 /* as x{1,n}? */ 
 938         case REP(0, INF
):               /* as x{1,}? */ 
 939                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 
 940                 INSERT(OCH_
, start
);            /* offset is wrong... */ 
 941                 repeat(p
, start
+1, 1, to
); 
 943                 AHEAD(start
);                   /* ... fix it */ 
 946                 ASTERN(O_CH
, THERETHERE()); 
 948         case REP(1, 1):                 /* trivial case */ 
 951         case REP(1, N
):                 /* as x?x{1,n-1} */ 
 952                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 
 956                 EMIT(OOR2
, 0);                  /* offset very wrong... */ 
 957                 AHEAD(THERE());                 /* ...so fix it */ 
 958                 ASTERN(O_CH
, THERETHERE()); 
 959                 copy 
= dupl(p
, start
+1, finish
+1); 
 960                 assert(copy 
== finish
+4); 
 961                 repeat(p
, copy
, 1, to
-1); 
 963         case REP(1, INF
):               /* as x+ */ 
 964                 INSERT(OPLUS_
, start
); 
 965                 ASTERN(O_PLUS
, start
); 
 967         case REP(N
, N
):                 /* as xx{m-1,n-1} */ 
 968                 copy 
= dupl(p
, start
, finish
); 
 969                 repeat(p
, copy
, from
-1, to
-1); 
 971         case REP(N
, INF
):               /* as xx{n-1,INF} */ 
 972                 copy 
= dupl(p
, start
, finish
); 
 973                 repeat(p
, copy
, from
-1, to
); 
 975         default:                        /* "can't happen" */ 
 976                 SETERROR(REG_ASSERT
);   /* just in case */ 
 982  - seterr - set an error condition 
 983  == static int seterr(register struct parse *p, int e); 
 985 static int                      /* useless but makes type checking happy */ 
 987 register struct parse 
*p
; 
 990         if (p
->error 
== 0)      /* keep earliest error condition */ 
 992         p
->next 
= nuls
;         /* try to bring things to a halt */ 
 994         return(0);              /* make the return value well-defined */ 
 998  - allocset - allocate a set of characters for [] 
 999  == static cset *allocset(register struct parse *p); 
1003 register struct parse 
*p
; 
1005         register int no 
= p
->g
->ncsets
++; 
1007         register size_t nbytes
; 
1009         register size_t css 
= (size_t)p
->g
->csetsize
; 
1012         if (no 
>= p
->ncsalloc
) {        /* need another column of space */ 
1013                 p
->ncsalloc 
+= CHAR_BIT
; 
1015                 assert(nc 
% CHAR_BIT 
== 0); 
1016                 nbytes 
= nc 
/ CHAR_BIT 
* css
; 
1017                 if (p
->g
->sets 
== NULL
) 
1018                         p
->g
->sets 
= (cset 
*)malloc(nc 
* sizeof(cset
)); 
1020                         p
->g
->sets 
= (cset 
*)realloc((char *)p
->g
->sets
, 
1022                 if (p
->g
->setbits 
== NULL
) 
1023                         p
->g
->setbits 
= (uch 
*)malloc(nbytes
); 
1025                         p
->g
->setbits 
= (uch 
*)realloc((char *)p
->g
->setbits
, 
1027                         /* xxx this isn't right if setbits is now NULL */ 
1028                         for (i 
= 0; i 
< no
; i
++) 
1029                                 p
->g
->sets
[i
].ptr 
= p
->g
->setbits 
+ css
*(i
/CHAR_BIT
); 
1031                 if (p
->g
->sets 
!= NULL 
&& p
->g
->setbits 
!= NULL
) 
1032                         (void) memset((char *)p
->g
->setbits 
+ (nbytes 
- css
), 
1036                         SETERROR(REG_ESPACE
); 
1037                         /* caller's responsibility not to do set ops */ 
1041         assert(p
->g
->sets 
!= NULL
);     /* xxx */ 
1042         cs 
= &p
->g
->sets
[no
]; 
1043         cs
->ptr 
= p
->g
->setbits 
+ css
*((no
)/CHAR_BIT
); 
1044         cs
->mask 
= 1 << ((no
) % CHAR_BIT
); 
1053  - freeset - free a now-unused set 
1054  == static void freeset(register struct parse *p, register cset *cs); 
1058 register struct parse 
*p
; 
1062         register cset 
*top 
= &p
->g
->sets
[p
->g
->ncsets
]; 
1063         register size_t css 
= (size_t)p
->g
->csetsize
; 
1065         for (i 
= 0; i 
< css
; i
++) 
1067         if (cs 
== top
-1)        /* recover only the easy case */ 
1072  - freezeset - final processing on a set of characters 
1073  == static int freezeset(register struct parse *p, register cset *cs); 
1075  * The main task here is merging identical sets.  This is usually a waste 
1076  * of time (although the hash code minimizes the overhead), but can win 
1077  * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash 
1078  * is done using addition rather than xor -- all ASCII [aA] sets xor to 
1081 static int                      /* set number */ 
1083 register struct parse 
*p
; 
1086         register uch h 
= cs
->hash
; 
1088         register cset 
*top 
= &p
->g
->sets
[p
->g
->ncsets
]; 
1090         register size_t css 
= (size_t)p
->g
->csetsize
; 
1092         /* look for an earlier one which is the same */ 
1093         for (cs2 
= &p
->g
->sets
[0]; cs2 
< top
; cs2
++) 
1094                 if (cs2
->hash 
== h 
&& cs2 
!= cs
) { 
1096                         for (i 
= 0; i 
< css
; i
++) 
1097                                 if (!!CHIN(cs2
, i
) != !!CHIN(cs
, i
)) 
1103         if (cs2 
< top
) {        /* found one */ 
1108         return((int)(cs 
- p
->g
->sets
)); 
1112  - firstch - return first character in a set (which must have at least one) 
1113  == static int firstch(register struct parse *p, register cset *cs); 
1115 static int                      /* character; there is no "none" value */ 
1117 register struct parse 
*p
; 
1121         register size_t css 
= (size_t)p
->g
->csetsize
; 
1123         for (i 
= 0; i 
< css
; i
++) 
1127         return(0);              /* arbitrary */ 
1131  - nch - number of characters in a set 
1132  == static int nch(register struct parse *p, register cset *cs); 
1136 register struct parse 
*p
; 
1140         register size_t css 
= (size_t)p
->g
->csetsize
; 
1143         for (i 
= 0; i 
< css
; i
++) 
1150  - mcadd - add a collating element to a cset 
1151  == static void mcadd(register struct parse *p, register cset *cs, \ 
1152  ==     register char *cp); 
1156 register struct parse 
*p
; 
1160         register size_t oldend 
= cs
->smultis
; 
1162         cs
->smultis 
+= strlen(cp
) + 1; 
1163         if (cs
->multis 
== NULL
) 
1164                 cs
->multis 
= malloc(cs
->smultis
); 
1166                 cs
->multis 
= realloc(cs
->multis
, cs
->smultis
); 
1167         if (cs
->multis 
== NULL
) { 
1168                 SETERROR(REG_ESPACE
); 
1172         (void) strcpy(cs
->multis 
+ oldend 
- 1, cp
); 
1173         cs
->multis
[cs
->smultis 
- 1] = '\0'; 
1176 /* these functions don't seem to be used (yet?), suppress warnings */ 
1179  - mcsub - subtract a collating element from a cset 
1180  == static void mcsub(register cset *cs, register char *cp); 
1187         register char *fp 
= mcfind(cs
, cp
); 
1188         register size_t len 
= strlen(fp
); 
1191         (void) memmove(fp
, fp 
+ len 
+ 1, 
1192                                 cs
->smultis 
- (fp 
+ len 
+ 1 - cs
->multis
)); 
1195         if (cs
->smultis 
== 0) { 
1201         cs
->multis 
= realloc(cs
->multis
, cs
->smultis
); 
1202         assert(cs
->multis 
!= NULL
); 
1206  - mcin - is a collating element in a cset? 
1207  == static int mcin(register cset *cs, register char *cp); 
1214         return(mcfind(cs
, cp
) != NULL
); 
1218  - mcfind - find a collating element in a cset 
1219  == static char *mcfind(register cset *cs, register char *cp); 
1228         if (cs
->multis 
== NULL
) 
1230         for (p 
= cs
->multis
; *p 
!= '\0'; p 
+= strlen(p
) + 1) 
1231                 if (strcmp(cp
, p
) == 0) 
1238  - mcinvert - invert the list of collating elements in a cset 
1239  == static void mcinvert(register struct parse *p, register cset *cs); 
1241  * This would have to know the set of possibilities.  Implementation 
1246 register struct parse 
*p
; 
1249         assert(cs
->multis 
== NULL
);     /* xxx */ 
1253  - mccase - add case counterparts of the list of collating elements in a cset 
1254  == static void mccase(register struct parse *p, register cset *cs); 
1256  * This would have to know the set of possibilities.  Implementation 
1261 register struct parse 
*p
; 
1264         assert(cs
->multis 
== NULL
);     /* xxx */ 
1268  - isinsets - is this character in any sets? 
1269  == static int isinsets(register struct re_guts *g, int c); 
1271 static int                      /* predicate */ 
1273 register struct re_guts 
*g
; 
1278         register int ncols 
= (g
->ncsets
+(CHAR_BIT
-1)) / CHAR_BIT
; 
1279         register unsigned uc 
= (unsigned char)c
; 
1281         for (i 
= 0, col 
= g
->setbits
; i 
< ncols
; i
++, col 
+= g
->csetsize
) 
1288  - samesets - are these two characters in exactly the same sets? 
1289  == static int samesets(register struct re_guts *g, int c1, int c2); 
1291 static int                      /* predicate */ 
1293 register struct re_guts 
*g
; 
1299         register int ncols 
= (g
->ncsets
+(CHAR_BIT
-1)) / CHAR_BIT
; 
1300         register unsigned uc1 
= (unsigned char)c1
; 
1301         register unsigned uc2 
= (unsigned char)c2
; 
1303         for (i 
= 0, col 
= g
->setbits
; i 
< ncols
; i
++, col 
+= g
->csetsize
) 
1304                 if (col
[uc1
] != col
[uc2
]) 
1310  - categorize - sort out character categories 
1311  == static void categorize(struct parse *p, register struct re_guts *g); 
1316 register struct re_guts 
*g
; 
1318         register cat_t 
*cats 
= g
->categories
; 
1323         /* avoid making error situations worse */ 
1327         for (c 
= CHAR_MIN
; c 
<= CHAR_MAX
; c
++) 
1328                 if (cats
[c
] == 0 && isinsets(g
, c
)) { 
1329                         cat 
= g
->ncategories
++; 
1331                         for (c2 
= c
+1; c2 
<= CHAR_MAX
; c2
++) 
1332                                 if (cats
[c2
] == 0 && samesets(g
, c
, c2
)) 
1338  - dupl - emit a duplicate of a bunch of sops 
1339  == static sopno dupl(register struct parse *p, sopno start, sopno finish); 
1341 static sopno                    
/* start of duplicate */ 
1342 dupl(p
, start
, finish
) 
1343 register struct parse 
*p
; 
1344 sopno start
;                    /* from here */ 
1345 sopno finish
;                   /* to this less one */ 
1347         register sopno ret 
= HERE(); 
1348         register sopno len 
= finish 
- start
; 
1350         assert(finish 
>= start
); 
1353         enlarge(p
, p
->ssize 
+ len
);     /* this many unexpected additions */ 
1354         assert(p
->ssize 
>= p
->slen 
+ len
); 
1355         (void) memcpy((char *)(p
->strip 
+ p
->slen
), 
1356                 (char *)(p
->strip 
+ start
), (size_t)len
*sizeof(sop
)); 
1362  - doemit - emit a strip operator 
1363  == static void doemit(register struct parse *p, sop op, size_t opnd); 
1365  * It might seem better to implement this as a macro with a function as 
1366  * hard-case backup, but it's just too big and messy unless there are 
1367  * some changes to the data structures.  Maybe later. 
1371 register struct parse 
*p
; 
1375         /* avoid making error situations worse */ 
1379         /* deal with oversize operands ("can't happen", more or less) */ 
1380         assert(opnd 
< 1<<OPSHIFT
); 
1382         /* deal with undersized strip */ 
1383         if (p
->slen 
>= p
->ssize
) 
1384                 enlarge(p
, (p
->ssize
+1) / 2 * 3);       /* +50% */ 
1385         assert(p
->slen 
< p
->ssize
); 
1387         /* finally, it's all reduced to the easy case */ 
1388         p
->strip
[p
->slen
++] = SOP(op
, opnd
); 
1392  - doinsert - insert a sop into the strip 
1393  == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 
1396 doinsert(p
, op
, opnd
, pos
) 
1397 register struct parse 
*p
; 
1406         /* avoid making error situations worse */ 
1411         EMIT(op
, opnd
);         /* do checks, ensure space */ 
1412         assert(HERE() == sn
+1); 
1415         /* adjust paren pointers */ 
1417         for (i 
= 1; i 
< NPAREN
; i
++) { 
1418                 if (p
->pbegin
[i
] >= pos
) { 
1421                 if (p
->pend
[i
] >= pos
) { 
1426         memmove((char *)&p
->strip
[pos
+1], (char *)&p
->strip
[pos
], 
1427                                                 (HERE()-pos
-1)*sizeof(sop
)); 
1432  - dofwd - complete a forward reference 
1433  == static void dofwd(register struct parse *p, sopno pos, sop value); 
1436 dofwd(p
, pos
, value
) 
1437 register struct parse 
*p
; 
1441         /* avoid making error situations worse */ 
1445         assert(value 
< 1<<OPSHIFT
); 
1446         p
->strip
[pos
] = OP(p
->strip
[pos
]) | value
; 
1450  - enlarge - enlarge the strip 
1451  == static void enlarge(register struct parse *p, sopno size); 
1455 register struct parse 
*p
; 
1456 register sopno size
; 
1460         if (p
->ssize 
>= size
) 
1463         sp 
= (sop 
*)realloc(p
->strip
, size
*sizeof(sop
)); 
1465                 SETERROR(REG_ESPACE
); 
1473  - stripsnug - compact the strip 
1474  == static void stripsnug(register struct parse *p, register struct re_guts *g); 
1478 register struct parse 
*p
; 
1479 register struct re_guts 
*g
; 
1481         g
->nstates 
= p
->slen
; 
1482         g
->strip 
= (sop 
*)realloc((char *)p
->strip
, p
->slen 
* sizeof(sop
)); 
1483         if (g
->strip 
== NULL
) { 
1484                 SETERROR(REG_ESPACE
); 
1485                 g
->strip 
= p
->strip
; 
1490  - findmust - fill in must and mlen with longest mandatory literal string 
1491  == static void findmust(register struct parse *p, register struct re_guts *g); 
1493  * This algorithm could do fancy things like analyzing the operands of | 
1494  * for common subsequences.  Someday.  This code is simple and finds most 
1495  * of the interesting cases. 
1497  * Note that must and mlen got initialized during setup. 
1502 register struct re_guts 
*g
; 
1506         register sop 
*newstart
; 
1507         register sopno newlen
; 
1512         /* avoid making error situations worse */ 
1516         /* find the longest OCHAR sequence in strip */ 
1518         scan 
= g
->strip 
+ 1; 
1522                 case OCHAR
:             /* sequence member */ 
1523                         if (newlen 
== 0)                /* new sequence */ 
1524                                 newstart 
= scan 
- 1; 
1527                 case OPLUS_
:            /* things that don't break one */ 
1531                 case OQUEST_
:           /* things that must be skipped */ 
1537                                 /* assert() interferes w debug printouts */ 
1538                                 if (OP(s
) != O_QUEST 
&& OP(s
) != O_CH 
&& 
1543                         } while (OP(s
) != O_QUEST 
&& OP(s
) != O_CH
); 
1545                 default:                /* things that break a sequence */ 
1546                         if (newlen 
> g
->mlen
) {         /* ends one */ 
1553         } while (OP(s
) != OEND
); 
1555         if (g
->mlen 
== 0)               /* there isn't one */ 
1558         /* turn it into a character string */ 
1559         g
->must 
= malloc((size_t)g
->mlen 
+ 1); 
1560         if (g
->must 
== NULL
) {          /* argh; just forget it */ 
1566         for (i 
= g
->mlen
; i 
> 0; i
--) { 
1567                 while (OP(s 
= *scan
++) != OCHAR
) 
1569                 assert(cp 
< g
->must 
+ g
->mlen
); 
1570                 *cp
++ = (char)OPND(s
); 
1572         assert(cp 
== g
->must 
+ g
->mlen
); 
1573         *cp
++ = '\0';           /* just on general principles */ 
1577  - pluscount - count + nesting 
1578  == static sopno pluscount(register struct parse *p, register struct re_guts *g); 
1580 static sopno                    
/* nesting depth */ 
1583 register struct re_guts 
*g
; 
1587         register sopno plusnest 
= 0; 
1588         register sopno maxnest 
= 0; 
1591                 return(0);      /* there may not be an OEND */ 
1593         scan 
= g
->strip 
+ 1; 
1601                         if (plusnest 
> maxnest
) 
1606         } while (OP(s
) != OEND
);