]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regcomp.c
   2  * re_*comp and friends - compile REs 
   3  * This file #includes several others (see the bottom). 
   5  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved. 
   7  * Development of this software was funded, in part, by Cray Research Inc., 
   8  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 
   9  * Corporation, none of whom are responsible for the results.  The author 
  12  * Redistribution and use in source and binary forms -- with or without 
  13  * modification -- are permitted for any purpose, provided that 
  14  * redistributions in source form retain this entire copyright notice and 
  15  * indicate the origin and nature of any modifications. 
  17  * I'd appreciate being given credit for this package in the documentation 
  18  * of software which uses it, but that is not a requirement. 
  20  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 
  21  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
  22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
  23  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
  28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
  29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  36  * forward declarations, up here so forward datatypes etc. are defined early 
  38 /* =====^!^===== begin forwards =====^!^===== */ 
  39 /* automatically gathered by fwd; do not hand-edit */ 
  40 /* === regcomp.c === */ 
  41 int compile 
_ANSI_ARGS_((regex_t 
*, CONST chr 
*, size_t, int)); 
  42 static VOID moresubs 
_ANSI_ARGS_((struct vars 
*, int)); 
  43 static int freev 
_ANSI_ARGS_((struct vars 
*, int)); 
  44 static VOID makesearch 
_ANSI_ARGS_((struct vars 
*, struct nfa 
*)); 
  45 static struct subre 
*parse 
_ANSI_ARGS_((struct vars 
*, int, int, struct state 
*, struct state 
*)); 
  46 static struct subre 
*parsebranch 
_ANSI_ARGS_((struct vars 
*, int, int, struct state 
*, struct state 
*, int)); 
  47 static VOID parseqatom 
_ANSI_ARGS_((struct vars 
*, int, int, struct state 
*, struct state 
*, struct subre 
*)); 
  48 static VOID nonword 
_ANSI_ARGS_((struct vars 
*, int, struct state 
*, struct state 
*)); 
  49 static VOID word 
_ANSI_ARGS_((struct vars 
*, int, struct state 
*, struct state 
*)); 
  50 static int scannum 
_ANSI_ARGS_((struct vars 
*)); 
  51 static VOID repeat 
_ANSI_ARGS_((struct vars 
*, struct state 
*, struct state 
*, int, int)); 
  52 static VOID bracket 
_ANSI_ARGS_((struct vars 
*, struct state 
*, struct state 
*)); 
  53 static VOID cbracket 
_ANSI_ARGS_((struct vars 
*, struct state 
*, struct state 
*)); 
  54 static VOID brackpart 
_ANSI_ARGS_((struct vars 
*, struct state 
*, struct state 
*)); 
  55 static chr 
*scanplain 
_ANSI_ARGS_((struct vars 
*)); 
  56 static VOID leaders 
_ANSI_ARGS_((struct vars 
*, struct cvec 
*)); 
  57 static VOID onechr 
_ANSI_ARGS_((struct vars 
*, pchr
, struct state 
*, struct state 
*)); 
  58 static VOID dovec 
_ANSI_ARGS_((struct vars 
*, struct cvec 
*, struct state 
*, struct state 
*)); 
  59 static celt nextleader 
_ANSI_ARGS_((struct vars 
*, pchr
, pchr
)); 
  60 static VOID wordchrs 
_ANSI_ARGS_((struct vars 
*)); 
  61 static struct subre 
*subre 
_ANSI_ARGS_((struct vars 
*, int, int, struct state 
*, struct state 
*)); 
  62 static VOID freesubre 
_ANSI_ARGS_((struct vars 
*, struct subre 
*)); 
  63 static VOID freesrnode 
_ANSI_ARGS_((struct vars 
*, struct subre 
*)); 
  64 static VOID optst 
_ANSI_ARGS_((struct vars 
*, struct subre 
*)); 
  65 static int numst 
_ANSI_ARGS_((struct subre 
*, int)); 
  66 static VOID markst 
_ANSI_ARGS_((struct subre 
*)); 
  67 static VOID cleanst 
_ANSI_ARGS_((struct vars 
*)); 
  68 static long nfatree 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, FILE *)); 
  69 static long nfanode 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, FILE *)); 
  70 static int newlacon 
_ANSI_ARGS_((struct vars 
*, struct state 
*, struct state 
*, int)); 
  71 static VOID freelacons 
_ANSI_ARGS_((struct subre 
*, int)); 
  72 static VOID rfree 
_ANSI_ARGS_((regex_t 
*)); 
  73 static VOID dump 
_ANSI_ARGS_((regex_t 
*, FILE *)); 
  74 static VOID dumpst 
_ANSI_ARGS_((struct subre 
*, FILE *, int)); 
  75 static VOID stdump 
_ANSI_ARGS_((struct subre 
*, FILE *, int)); 
  76 static char *stid 
_ANSI_ARGS_((struct subre 
*, char *, size_t)); 
  77 /* === regc_lex.c === */ 
  78 static VOID lexstart 
_ANSI_ARGS_((struct vars 
*)); 
  79 static VOID prefixes 
_ANSI_ARGS_((struct vars 
*)); 
  80 static VOID lexnest 
_ANSI_ARGS_((struct vars 
*, chr 
*, chr 
*)); 
  81 static VOID lexword 
_ANSI_ARGS_((struct vars 
*)); 
  82 static int next 
_ANSI_ARGS_((struct vars 
*)); 
  83 static int lexescape 
_ANSI_ARGS_((struct vars 
*)); 
  84 static chr lexdigits 
_ANSI_ARGS_((struct vars 
*, int, int, int)); 
  85 static int brenext 
_ANSI_ARGS_((struct vars 
*, pchr
)); 
  86 static VOID skip 
_ANSI_ARGS_((struct vars 
*)); 
  87 static chr newline 
_ANSI_ARGS_((NOPARMS
)); 
  89 static chr 
*ch 
_ANSI_ARGS_((NOPARMS
)); 
  91 static chr chrnamed 
_ANSI_ARGS_((struct vars 
*, chr 
*, chr 
*, pchr
)); 
  92 /* === regc_color.c === */ 
  93 static VOID initcm 
_ANSI_ARGS_((struct vars 
*, struct colormap 
*)); 
  94 static VOID freecm 
_ANSI_ARGS_((struct colormap 
*)); 
  95 static VOID cmtreefree 
_ANSI_ARGS_((struct colormap 
*, union tree 
*, int)); 
  96 static color setcolor 
_ANSI_ARGS_((struct colormap 
*, pchr
, pcolor
)); 
  97 static color maxcolor 
_ANSI_ARGS_((struct colormap 
*)); 
  98 static color newcolor 
_ANSI_ARGS_((struct colormap 
*)); 
  99 static VOID freecolor 
_ANSI_ARGS_((struct colormap 
*, pcolor
)); 
 100 static color pseudocolor 
_ANSI_ARGS_((struct colormap 
*)); 
 101 static color subcolor 
_ANSI_ARGS_((struct colormap 
*, pchr c
)); 
 102 static color newsub 
_ANSI_ARGS_((struct colormap 
*, pcolor
)); 
 103 static VOID subrange 
_ANSI_ARGS_((struct vars 
*, pchr
, pchr
, struct state 
*, struct state 
*)); 
 104 static VOID subblock 
_ANSI_ARGS_((struct vars 
*, pchr
, struct state 
*, struct state 
*)); 
 105 static VOID okcolors 
_ANSI_ARGS_((struct nfa 
*, struct colormap 
*)); 
 106 static VOID colorchain 
_ANSI_ARGS_((struct colormap 
*, struct arc 
*)); 
 107 static VOID uncolorchain 
_ANSI_ARGS_((struct colormap 
*, struct arc 
*)); 
 108 static int singleton 
_ANSI_ARGS_((struct colormap 
*, pchr c
)); 
 109 static VOID rainbow 
_ANSI_ARGS_((struct nfa 
*, struct colormap 
*, int, pcolor
, struct state 
*, struct state 
*)); 
 110 static VOID colorcomplement 
_ANSI_ARGS_((struct nfa 
*, struct colormap 
*, int, struct state 
*, struct state 
*, struct state 
*)); 
 112 static VOID dumpcolors 
_ANSI_ARGS_((struct colormap 
*, FILE *)); 
 113 static VOID fillcheck 
_ANSI_ARGS_((struct colormap 
*, union tree 
*, int, FILE *)); 
 114 static VOID dumpchr 
_ANSI_ARGS_((pchr
, FILE *)); 
 116 /* === regc_nfa.c === */ 
 117 static struct nfa 
*newnfa 
_ANSI_ARGS_((struct vars 
*, struct colormap 
*, struct nfa 
*)); 
 118 static VOID freenfa 
_ANSI_ARGS_((struct nfa 
*)); 
 119 static struct state 
*newstate 
_ANSI_ARGS_((struct nfa 
*)); 
 120 static struct state 
*newfstate 
_ANSI_ARGS_((struct nfa 
*, int flag
)); 
 121 static VOID dropstate 
_ANSI_ARGS_((struct nfa 
*, struct state 
*)); 
 122 static VOID freestate 
_ANSI_ARGS_((struct nfa 
*, struct state 
*)); 
 123 static VOID destroystate 
_ANSI_ARGS_((struct nfa 
*, struct state 
*)); 
 124 static VOID newarc 
_ANSI_ARGS_((struct nfa 
*, int, pcolor
, struct state 
*, struct state 
*)); 
 125 static struct arc 
*allocarc 
_ANSI_ARGS_((struct nfa 
*, struct state 
*)); 
 126 static VOID freearc 
_ANSI_ARGS_((struct nfa 
*, struct arc 
*)); 
 127 static struct arc 
*findarc 
_ANSI_ARGS_((struct state 
*, int, pcolor
)); 
 128 static VOID cparc 
_ANSI_ARGS_((struct nfa 
*, struct arc 
*, struct state 
*, struct state 
*)); 
 129 static VOID moveins 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*)); 
 130 static VOID copyins 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*)); 
 131 static VOID moveouts 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*)); 
 132 static VOID copyouts 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*)); 
 133 static VOID cloneouts 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*, struct state 
*, int)); 
 134 static VOID delsub 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*)); 
 135 static VOID deltraverse 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*)); 
 136 static VOID dupnfa 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*, struct state 
*, struct state 
*)); 
 137 static VOID duptraverse 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*)); 
 138 static VOID cleartraverse 
_ANSI_ARGS_((struct nfa 
*, struct state 
*)); 
 139 static VOID specialcolors 
_ANSI_ARGS_((struct nfa 
*)); 
 140 static long optimize 
_ANSI_ARGS_((struct nfa 
*, FILE *)); 
 141 static VOID pullback 
_ANSI_ARGS_((struct nfa 
*, FILE *)); 
 142 static int pull 
_ANSI_ARGS_((struct nfa 
*, struct arc 
*)); 
 143 static VOID pushfwd 
_ANSI_ARGS_((struct nfa 
*, FILE *)); 
 144 static int push 
_ANSI_ARGS_((struct nfa 
*, struct arc 
*)); 
 145 #define INCOMPATIBLE    1       /* destroys arc */ 
 146 #define SATISFIED       2       /* constraint satisfied */ 
 147 #define COMPATIBLE      3       /* compatible but not satisfied yet */ 
 148 static int combine 
_ANSI_ARGS_((struct arc 
*, struct arc 
*)); 
 149 static VOID fixempties 
_ANSI_ARGS_((struct nfa 
*, FILE *)); 
 150 static int unempty 
_ANSI_ARGS_((struct nfa 
*, struct arc 
*)); 
 151 static VOID cleanup 
_ANSI_ARGS_((struct nfa 
*)); 
 152 static VOID markreachable 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*, struct state 
*)); 
 153 static VOID markcanreach 
_ANSI_ARGS_((struct nfa 
*, struct state 
*, struct state 
*, struct state 
*)); 
 154 static long analyze 
_ANSI_ARGS_((struct nfa 
*)); 
 155 static VOID compact 
_ANSI_ARGS_((struct nfa 
*, struct cnfa 
*)); 
 156 static VOID carcsort 
_ANSI_ARGS_((struct carc 
*, struct carc 
*)); 
 157 static VOID freecnfa 
_ANSI_ARGS_((struct cnfa 
*)); 
 158 static VOID dumpnfa 
_ANSI_ARGS_((struct nfa 
*, FILE *)); 
 160 static VOID dumpstate 
_ANSI_ARGS_((struct state 
*, FILE *)); 
 161 static VOID dumparcs 
_ANSI_ARGS_((struct state 
*, FILE *)); 
 162 static int dumprarcs 
_ANSI_ARGS_((struct arc 
*, struct state 
*, FILE *, int)); 
 163 static VOID dumparc 
_ANSI_ARGS_((struct arc 
*, struct state 
*, FILE *)); 
 165 static VOID dumpcnfa 
_ANSI_ARGS_((struct cnfa 
*, FILE *)); 
 167 static VOID dumpcstate 
_ANSI_ARGS_((int, struct carc 
*, struct cnfa 
*, FILE *)); 
 169 /* === regc_cvec.c === */ 
 170 static struct cvec 
*newcvec 
_ANSI_ARGS_((int, int, int)); 
 171 static struct cvec 
*clearcvec 
_ANSI_ARGS_((struct cvec 
*)); 
 172 static VOID addchr 
_ANSI_ARGS_((struct cvec 
*, pchr
)); 
 173 static VOID addrange 
_ANSI_ARGS_((struct cvec 
*, pchr
, pchr
)); 
 174 static VOID addmcce 
_ANSI_ARGS_((struct cvec 
*, chr 
*, chr 
*)); 
 175 static int haschr 
_ANSI_ARGS_((struct cvec 
*, pchr
)); 
 176 static struct cvec 
*getcvec 
_ANSI_ARGS_((struct vars 
*, int, int, int)); 
 177 static VOID freecvec 
_ANSI_ARGS_((struct cvec 
*)); 
 178 /* === regc_locale.c === */ 
 179 static int nmcces 
_ANSI_ARGS_((struct vars 
*)); 
 180 static int nleaders 
_ANSI_ARGS_((struct vars 
*)); 
 181 static struct cvec 
*allmcces 
_ANSI_ARGS_((struct vars 
*, struct cvec 
*)); 
 182 static celt element 
_ANSI_ARGS_((struct vars 
*, chr 
*, chr 
*)); 
 183 static struct cvec 
*range 
_ANSI_ARGS_((struct vars 
*, celt
, celt
, int)); 
 184 static int before 
_ANSI_ARGS_((celt
, celt
)); 
 185 static struct cvec 
*eclass 
_ANSI_ARGS_((struct vars 
*, celt
, int)); 
 186 static struct cvec 
*cclass 
_ANSI_ARGS_((struct vars 
*, chr 
*, chr 
*, int)); 
 187 static struct cvec 
*allcases 
_ANSI_ARGS_((struct vars 
*, pchr
)); 
 188 static int cmp 
_ANSI_ARGS_((CONST chr 
*, CONST chr 
*, size_t)); 
 189 static int casecmp 
_ANSI_ARGS_((CONST chr 
*, CONST chr 
*, size_t)); 
 190 /* automatically gathered by fwd; do not hand-edit */ 
 191 /* =====^!^===== end forwards =====^!^===== */ 
 195 /* internal variables, bundled for easy passing around */ 
 198         chr 
*now
;               /* scan pointer into string */ 
 199         chr 
*stop
;              /* end of string */ 
 200         chr 
*savenow
;           /* saved now and stop for "subroutine call" */ 
 202         int err
;                /* error code (0 if none) */ 
 203         int cflags
;             /* copy of compile flags */ 
 204         int lasttype
;           /* type of previous token */ 
 205         int nexttype
;           /* type of next token */ 
 206         chr nextvalue
;          /* value (if any) of next token */ 
 207         int lexcon
;             /* lexical context type (see lex.c) */ 
 208         int nsubexp
;            /* subexpression count */ 
 209         struct subre 
**subs
;    /* subRE pointer vector */ 
 210         size_t nsubs
;           /* length of vector */ 
 211         struct subre 
*sub10
[10];        /* initial vector, enough for most */ 
 212         struct nfa 
*nfa
;        /* the NFA */ 
 213         struct colormap 
*cm
;    /* character color map */ 
 214         color nlcolor
;          /* color of newline */ 
 215         struct state 
*wordchrs
; /* state in nfa holding word-char outarcs */ 
 216         struct subre 
*tree
;     /* subexpression tree */ 
 217         struct subre 
*treechain
;        /* all tree nodes allocated */ 
 218         struct subre 
*treefree
;         /* any free tree nodes */ 
 219         int ntree
;              /* number of tree nodes */ 
 220         struct cvec 
*cv
;        /* interface cvec */ 
 221         struct cvec 
*cv2
;       /* utility cvec */ 
 222         struct cvec 
*mcces
;     /* collating-element information */ 
 223 #               define  ISCELEADER(v,c) (v->mcces != NULL && haschr(v->mcces, (c))) 
 224         struct state 
*mccepbegin
;       /* in nfa, start of MCCE prototypes */ 
 225         struct state 
*mccepend
; /* in nfa, end of MCCE prototypes */ 
 226         struct subre 
*lacons
;   /* lookahead-constraint vector */ 
 227         int nlacons
;            /* size of lacons */ 
 230 /* parsing macros; most know that `v' is the struct vars pointer */ 
 231 #define NEXT()  (next(v))               /* advance by one token */ 
 232 #define SEE(t)  (v->nexttype == (t))    /* is next token this? */ 
 233 #define EAT(t)  (SEE(t) && next(v))     /* if next is this, swallow it */ 
 234 #define VISERR(vv)      ((vv)->err != 0)        /* have we seen an error yet? */ 
 235 #define ISERR() VISERR(v) 
 236 #define VERR(vv,e)      ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err :\ 
 238 #define ERR(e)  (void)VERR(v, e)                /* record an error */ 
 239 #define NOERR() {if (ISERR()) return;}  /* if error seen, return */ 
 240 #define NOERRN()        {if (ISERR()) return NULL;}     /* NOERR with retval */ 
 241 #define NOERRZ()        {if (ISERR()) return 0;}        /* NOERR with retval */ 
 242 #define INSIST(c, e)    (void)((c) ? (void)0 : ERR(e))  /* if condition false, error */ 
 243 #define NOTE(b) (v->re->re_info |= (b))         /* note visible condition */ 
 244 #define EMPTYARC(x, y)  newarc(v->nfa, EMPTY, 0, x, y) 
 246 /* token type codes, some also used as NFA arc types */ 
 247 #define EMPTY   'n'             /* no token present */ 
 248 #define EOS     'e'             /* end of string */ 
 249 #define PLAIN   'p'             /* ordinary character */ 
 250 #define DIGIT   'd'             /* digit (in bound) */ 
 251 #define BACKREF 'b'             /* back reference */ 
 252 #define COLLEL  'I'             /* start of [. */ 
 253 #define ECLASS  'E'             /* start of [= */ 
 254 #define CCLASS  'C'             /* start of [: */ 
 255 #define END     'X'             /* end of [. [= [: */ 
 256 #define RANGE   'R'             /* - within [] which might be range delim. */ 
 257 #define LACON   'L'             /* lookahead constraint subRE */ 
 258 #define AHEAD   'a'             /* color-lookahead arc */ 
 259 #define BEHIND  'r'             /* color-lookbehind arc */ 
 260 #define WBDRY   'w'             /* word boundary constraint */ 
 261 #define NWBDRY  'W'             /* non-word-boundary constraint */ 
 262 #define SBEGIN  'A'             /* beginning of string (even if not BOL) */ 
 263 #define SEND    'Z'             /* end of string (even if not EOL) */ 
 264 #define PREFER  'P'             /* length preference */ 
 266 /* is an arc colored, and hence on a color chain? */ 
 267 #define COLORED(a)      ((a)->type == PLAIN || (a)->type == AHEAD || \ 
 272 /* static function list */ 
 273 static struct fns functions 
= { 
 274         rfree
,                  /* regfree insides */ 
 280  - compile - compile regular expression 
 281  ^ int compile(regex_t *, CONST chr *, size_t, int); 
 284 compile(re
, string
, len
, flags
) 
 291         struct vars 
*v 
= &var
; 
 295         FILE *debug 
= (flags
®_PROGRESS
) ? stdout 
: (FILE *)NULL
; 
 296 #       define  CNOERR()        { if (ISERR()) return freev(v, v->err); } 
 300         if (re 
== NULL 
|| string 
== NULL
) 
 302         if ((flags
®_QUOTE
) && 
 303                         (flags
&(REG_ADVANCED
|REG_EXPANDED
|REG_NEWLINE
))) 
 305         if (!(flags
®_EXTENDED
) && (flags
®_ADVF
)) 
 308         /* initial setup (after which freev() is callable) */ 
 310         v
->now 
= (chr 
*)string
; 
 311         v
->stop 
= v
->now 
+ len
; 
 312         v
->savenow 
= v
->savestop 
= NULL
; 
 318         for (j 
= 0; j 
< v
->nsubs
; j
++) 
 322         v
->nlcolor 
= COLORLESS
; 
 332         re
->re_magic 
= REMAGIC
; 
 333         re
->re_info 
= 0;                /* bits get set during parse */ 
 334         re
->re_csize 
= sizeof(chr
); 
 336         re
->re_fns 
= VS(&functions
); 
 338         /* more complex setup, malloced things */ 
 339         re
->re_guts 
= VS(MALLOC(sizeof(struct guts
))); 
 340         if (re
->re_guts 
== NULL
) 
 341                 return freev(v
, REG_ESPACE
); 
 342         g 
= (struct guts 
*)re
->re_guts
; 
 349         v
->nfa 
= newnfa(v
, v
->cm
, (struct nfa 
*)NULL
); 
 351         v
->cv 
= newcvec(100, 20, 10); 
 353                 return freev(v
, REG_ESPACE
); 
 356                 v
->mcces 
= newcvec(nleaders(v
), 0, i
); 
 358                 v
->mcces 
= allmcces(v
, v
->mcces
); 
 359                 leaders(v
, v
->mcces
); 
 360                 addmcce(v
->mcces
, (chr 
*)NULL
, (chr 
*)NULL
);    /* dummy */ 
 365         lexstart(v
);                    /* also handles prefixes */ 
 366         if ((v
->cflags
®_NLSTOP
) || (v
->cflags
®_NLANCH
)) { 
 367                 /* assign newline a unique color */ 
 368                 v
->nlcolor 
= subcolor(v
->cm
, newline()); 
 369                 okcolors(v
->nfa
, v
->cm
); 
 372         v
->tree 
= parse(v
, EOS
, PLAIN
, v
->nfa
->init
, v
->nfa
->final
); 
 373         assert(SEE(EOS
));               /* even if error; ISERR() => SEE(EOS) */ 
 375         assert(v
->tree 
!= NULL
); 
 377         /* finish setup of nfa and its subre tree */ 
 378         specialcolors(v
->nfa
); 
 381                 fprintf(debug
, "\n\n\n========= RAW ==========\n"); 
 382                 dumpnfa(v
->nfa
, debug
); 
 383                 dumpst(v
->tree
, debug
, 1); 
 386         v
->ntree 
= numst(v
->tree
, 1); 
 390                 fprintf(debug
, "\n\n\n========= TREE FIXED ==========\n"); 
 391                 dumpst(v
->tree
, debug
, 1); 
 394         /* build compacted NFAs for tree and lacons */ 
 395         re
->re_info 
|= nfatree(v
, v
->tree
, debug
); 
 397         assert(v
->nlacons 
== 0 || v
->lacons 
!= NULL
); 
 398         for (i 
= 1; i 
< v
->nlacons
; i
++) { 
 400                         fprintf(debug
, "\n\n\n========= LA%d ==========\n", i
); 
 401                 nfanode(v
, &v
->lacons
[i
], debug
); 
 404         if (v
->tree
->flags
&SHORTER
) 
 407         /* build compacted NFAs for tree, lacons, fast search */ 
 409                 fprintf(debug
, "\n\n\n========= SEARCH ==========\n"); 
 410         /* can sacrifice main NFA now, so use it as work area */ 
 411         (DISCARD
)optimize(v
->nfa
, debug
); 
 413         makesearch(v
, v
->nfa
); 
 415         compact(v
->nfa
, &g
->search
); 
 418         /* looks okay, package it up */ 
 419         re
->re_nsub 
= v
->nsubexp
; 
 420         v
->re 
= NULL
;                   /* freev no longer frees re */ 
 421         g
->magic 
= GUTSMAGIC
; 
 422         g
->cflags 
= v
->cflags
; 
 423         g
->info 
= re
->re_info
; 
 424         g
->nsub 
= re
->re_nsub
; 
 428         g
->compare 
= (v
->cflags
®_ICASE
) ? casecmp 
: cmp
; 
 429         g
->lacons 
= v
->lacons
; 
 431         g
->nlacons 
= v
->nlacons
; 
 441  - moresubs - enlarge subRE vector 
 442  ^ static VOID moresubs(struct vars *, int); 
 447 int wanted
;                     /* want enough room for this one */ 
 452         assert(wanted 
> 0 && (size_t)wanted 
>= v
->nsubs
); 
 453         n 
= (size_t)wanted 
* 3 / 2 + 1; 
 454         if (v
->subs 
== v
->sub10
) { 
 455                 p 
= (struct subre 
**)MALLOC(n 
* sizeof(struct subre 
*)); 
 457                         memcpy(VS(p
), VS(v
->subs
), 
 458                                         v
->nsubs 
* sizeof(struct subre 
*)); 
 460                 p 
= (struct subre 
**)REALLOC(v
->subs
, n
*sizeof(struct subre 
*)); 
 466         for (p 
= &v
->subs
[v
->nsubs
]; v
->nsubs 
< n
; p
++, v
->nsubs
++) 
 468         assert(v
->nsubs 
== n
); 
 469         assert((size_t)wanted 
< v
->nsubs
); 
 473  - freev - free vars struct's substructures where necessary 
 474  * Optionally does error-number setting, and always returns error code 
 475  * (if any), to make error-handling code terser. 
 476  ^ static int freev(struct vars *, int); 
 485         if (v
->subs 
!= v
->sub10
) 
 490                 freesubre(v
, v
->tree
); 
 491         if (v
->treechain 
!= NULL
) 
 497         if (v
->mcces 
!= NULL
) 
 499         if (v
->lacons 
!= NULL
) 
 500                 freelacons(v
->lacons
, v
->nlacons
); 
 501         ERR(err
);                       /* nop if err==0 */ 
 507  - makesearch - turn an NFA into a search NFA (implicit prepend of .*?) 
 508  * NFA must have been optimize()d already. 
 509  ^ static VOID makesearch(struct vars *, struct nfa *); 
 518         struct state 
*pre 
= nfa
->pre
; 
 523         /* no loops are needed if it's anchored */ 
 524         for (a 
= pre
->outs
; a 
!= NULL
; a 
= a
->outchain
) { 
 525                 assert(a
->type 
== PLAIN
); 
 526                 if (a
->co 
!= nfa
->bos
[0] && a
->co 
!= nfa
->bos
[1]) 
 530                 /* add implicit .* in front */ 
 531                 rainbow(nfa
, v
->cm
, PLAIN
, COLORLESS
, pre
, pre
); 
 533                 /* and ^* and \A* too -- not always necessary, but harmless */ 
 534                 newarc(nfa
, PLAIN
, nfa
->bos
[0], pre
, pre
); 
 535                 newarc(nfa
, PLAIN
, nfa
->bos
[1], pre
, pre
); 
 539          * Now here's the subtle part.  Because many REs have no lookback 
 540          * constraints, often knowing when you were in the pre state tells 
 541          * you little; it's the next state(s) that are informative.  But 
 542          * some of them may have other inarcs, i.e. it may be possible to 
 543          * make actual progress and then return to one of them.  We must 
 544          * de-optimize such cases, splitting each such state into progress 
 545          * and no-progress states. 
 548         /* first, make a list of the states */ 
 550         for (a 
= pre
->outs
; a 
!= NULL
; a 
= a
->outchain
) { 
 552                 for (b 
= s
->ins
; b 
!= NULL
; b 
= b
->inchain
) 
 555                 if (b 
!= NULL
) {                /* must be split */ 
 556                         if (s
->tmp 
== NULL
) {  /* if not already in the list */ 
 557                                                /* (fixes bugs 505048, 230589, */ 
 558                                                /* 840258, 504785) */ 
 566         for (s 
= slist
; s 
!= NULL
; s 
= s2
) { 
 568                 copyouts(nfa
, s
, s2
); 
 569                 for (a 
= s
->ins
; a 
!= NULL
; a 
= b
) { 
 571                         if (a
->from 
!= pre
) { 
 572                                 cparc(nfa
, a
, a
->from
, s2
); 
 577                 s
->tmp 
= NULL
;          /* clean up while we're at it */ 
 582  - parse - parse an RE 
 583  * This is actually just the top level, which parses a bunch of branches 
 584  * tied together with '|'.  They appear in the tree as the left children 
 585  * of a chain of '|' subres. 
 586  ^ static struct subre *parse(struct vars *, int, int, struct state *, 
 589 static struct subre 
* 
 590 parse(v
, stopper
, type
, init
, final
) 
 592 int stopper
;                    /* EOS or ')' */ 
 593 int type
;                       /* LACON (lookahead subRE) or PLAIN */ 
 594 struct state 
*init
;             /* initial state */ 
 595 struct state 
*final
;            /* final state */ 
 597         struct state 
*left
;     /* scaffolding for branch */ 
 599         struct subre 
*branches
; /* top level */ 
 600         struct subre 
*branch
;   /* current branch */ 
 601         struct subre 
*t
;        /* temporary */ 
 602         int firstbranch
;        /* is this the first branch? */ 
 604         assert(stopper 
== ')' || stopper 
== EOS
); 
 606         branches 
= subre(v
, '|', LONGER
, init
, final
); 
 612                         /* need a place to hang it */ 
 613                         branch
->right 
= subre(v
, '|', LONGER
, init
, final
); 
 615                         branch 
= branch
->right
; 
 618                 left 
= newstate(v
->nfa
); 
 619                 right 
= newstate(v
->nfa
); 
 621                 EMPTYARC(init
, left
); 
 622                 EMPTYARC(right
, final
); 
 624                 branch
->left 
= parsebranch(v
, stopper
, type
, left
, right
, 0); 
 626                 branch
->flags 
|= UP(branch
->flags 
| branch
->left
->flags
); 
 627                 if ((branch
->flags 
&~ branches
->flags
) != 0)    /* new flags */ 
 628                         for (t 
= branches
; t 
!= branch
; t 
= t
->right
) 
 629                                 t
->flags 
|= branch
->flags
; 
 631         assert(SEE(stopper
) || SEE(EOS
)); 
 634                 assert(stopper 
== ')' && SEE(EOS
)); 
 638         /* optimize out simple cases */ 
 639         if (branch 
== branches
) {       /* only one branch */ 
 640                 assert(branch
->right 
== NULL
); 
 643                 freesubre(v
, branches
); 
 645         } else if (!MESSY(branches
->flags
)) {   /* no interesting innards */ 
 646                 freesubre(v
, branches
->left
); 
 647                 branches
->left 
= NULL
; 
 648                 freesubre(v
, branches
->right
); 
 649                 branches
->right 
= NULL
; 
 657  - parsebranch - parse one branch of an RE 
 658  * This mostly manages concatenation, working closely with parseqatom(). 
 659  * Concatenated things are bundled up as much as possible, with separate 
 660  * ',' nodes introduced only when necessary due to substructure. 
 661  ^ static struct subre *parsebranch(struct vars *, int, int, struct state *, 
 662  ^      struct state *, int); 
 664 static struct subre 
* 
 665 parsebranch(v
, stopper
, type
, left
, right
, partial
) 
 667 int stopper
;                    /* EOS or ')' */ 
 668 int type
;                       /* LACON (lookahead subRE) or PLAIN */ 
 669 struct state 
*left
;             /* leftmost state */ 
 670 struct state 
*right
;            /* rightmost state */ 
 671 int partial
;                    /* is this only part of a branch? */ 
 673         struct state 
*lp
;       /* left end of current construct */ 
 674         int seencontent
;        /* is there anything in this branch yet? */ 
 679         t 
= subre(v
, '=', 0, left
, right
);      /* op '=' is tentative */ 
 681         while (!SEE('|') && !SEE(stopper
) && !SEE(EOS
)) { 
 682                 if (seencontent
) {      /* implicit concat operator */ 
 683                         lp 
= newstate(v
->nfa
); 
 685                         moveins(v
->nfa
, right
, lp
); 
 689                 /* NB, recursion in parseqatom() may swallow rest of branch */ 
 690                 parseqatom(v
, stopper
, type
, lp
, right
, t
); 
 693         if (!seencontent
) {             /* empty branch */ 
 697                 EMPTYARC(left
, right
); 
 704  - parseqatom - parse one quantified atom or constraint of an RE 
 705  * The bookkeeping near the end cooperates very closely with parsebranch(); 
 706  * in particular, it contains a recursion that can involve parsing the rest 
 707  * of the branch, making this function's name somewhat inaccurate. 
 708  ^ static VOID parseqatom(struct vars *, int, int, struct state *, 
 709  ^      struct state *, struct subre *); 
 712 parseqatom(v
, stopper
, type
, lp
, rp
, top
) 
 714 int stopper
;                    /* EOS or ')' */ 
 715 int type
;                       /* LACON (lookahead subRE) or PLAIN */ 
 716 struct state 
*lp
;               /* left state to hang it on */ 
 717 struct state 
*rp
;               /* right state to hang it on */ 
 718 struct subre 
*top
;              /* subtree top */ 
 720         struct state 
*s
;        /* temporaries for new states */ 
 722 #       define  ARCV(t, val)    newarc(v->nfa, t, val, lp, rp) 
 724         struct subre 
*atom
;     /* atom's subtree */ 
 726         int cap
;                /* capturing parens? */ 
 727         int pos
;                /* positive lookahead? */ 
 728         int subno
;              /* capturing-parens or backref number */ 
 730         int qprefer
;            /* quantifier short/long preference */ 
 732         struct subre 
**atomp
;   /* where the pointer to atom is */ 
 734         /* initial bookkeeping */ 
 736         assert(lp
->nouts 
== 0); /* must string new code */ 
 737         assert(rp
->nins 
== 0);  /*  between lp and rp */ 
 738         subno 
= 0;              /* just to shut lint up */ 
 740         /* an atom or constraint... */ 
 741         atomtype 
= v
->nexttype
; 
 743         /* first, constraints, which end by returning */ 
 746                 if (v
->cflags
®_NLANCH
) 
 747                         ARCV(BEHIND
, v
->nlcolor
); 
 753                 if (v
->cflags
®_NLANCH
) 
 754                         ARCV(AHEAD
, v
->nlcolor
); 
 759                 ARCV('^', 1);   /* BOL */ 
 760                 ARCV('^', 0);   /* or BOS */ 
 765                 ARCV('$', 1);   /* EOL */ 
 766                 ARCV('$', 0);   /* or EOS */ 
 771                 wordchrs(v
);    /* does NEXT() */ 
 772                 s 
= newstate(v
->nfa
); 
 774                 nonword(v
, BEHIND
, lp
, s
); 
 775                 word(v
, AHEAD
, s
, rp
); 
 779                 wordchrs(v
);    /* does NEXT() */ 
 780                 s 
= newstate(v
->nfa
); 
 782                 word(v
, BEHIND
, lp
, s
); 
 783                 nonword(v
, AHEAD
, s
, rp
); 
 787                 wordchrs(v
);    /* does NEXT() */ 
 788                 s 
= newstate(v
->nfa
); 
 790                 nonword(v
, BEHIND
, lp
, s
); 
 791                 word(v
, AHEAD
, s
, rp
); 
 792                 s 
= newstate(v
->nfa
); 
 794                 word(v
, BEHIND
, lp
, s
); 
 795                 nonword(v
, AHEAD
, s
, rp
); 
 799                 wordchrs(v
);    /* does NEXT() */ 
 800                 s 
= newstate(v
->nfa
); 
 802                 word(v
, BEHIND
, lp
, s
); 
 803                 word(v
, AHEAD
, s
, rp
); 
 804                 s 
= newstate(v
->nfa
); 
 806                 nonword(v
, BEHIND
, lp
, s
); 
 807                 nonword(v
, AHEAD
, s
, rp
); 
 810         case LACON
:     /* lookahead constraint */ 
 813                 s 
= newstate(v
->nfa
); 
 814                 s2 
= newstate(v
->nfa
); 
 816                 t 
= parse(v
, ')', LACON
, s
, s2
); 
 817                 freesubre(v
, t
);        /* internal structure irrelevant */ 
 818                 assert(SEE(')') || ISERR()); 
 820                 n 
= newlacon(v
, s
, s2
, pos
); 
 825         /* then errors, to get them out of the way */ 
 837         /* then plain characters, and minor variants on that theme */ 
 838         case ')':               /* unbalanced paren */ 
 839                 if ((v
->cflags
®_ADVANCED
) != REG_EXTENDED
) { 
 843                 /* legal in EREs due to specification botch */ 
 845                 /* fallthrough into case PLAIN */ 
 847                 onechr(v
, v
->nextvalue
, lp
, rp
); 
 848                 okcolors(v
->nfa
, v
->cm
); 
 853                 if (v
->nextvalue 
== 1) 
 857                 assert(SEE(']') || ISERR()); 
 861                 rainbow(v
->nfa
, v
->cm
, PLAIN
, 
 862                                 (v
->cflags
®_NLSTOP
) ? v
->nlcolor 
: COLORLESS
, 
 866         /* and finally the ugly stuff */ 
 867         case '(':       /* value flags as capturing or non */ 
 868                 cap 
= (type 
== LACON
) ? 0 : v
->nextvalue
; 
 872                         if ((size_t)subno 
>= v
->nsubs
) 
 874                         assert((size_t)subno 
< v
->nsubs
); 
 876                         atomtype 
= PLAIN
;       /* something that's not '(' */ 
 878                 /* need new endpoints because tree will contain pointers */ 
 879                 s 
= newstate(v
->nfa
); 
 880                 s2 
= newstate(v
->nfa
); 
 885                 atom 
= parse(v
, ')', PLAIN
, s
, s2
); 
 886                 assert(SEE(')') || ISERR()); 
 890                         v
->subs
[subno
] = atom
; 
 891                         t 
= subre(v
, '(', atom
->flags
|CAP
, lp
, rp
); 
 897                 /* postpone everything else pending possible {0} */ 
 899         case BACKREF
:   /* the Feature From The Black Lagoon */ 
 900                 INSIST(type 
!= LACON
, REG_ESUBREG
); 
 901                 INSIST(v
->nextvalue 
< v
->nsubs
, REG_ESUBREG
); 
 902                 INSIST(v
->subs
[(int)v
->nextvalue
] != NULL
, REG_ESUBREG
); 
 904                 assert(v
->nextvalue 
> 0); 
 905                 atom 
= subre(v
, 'b', BACKR
, lp
, rp
); 
 906                 subno 
= v
->nextvalue
; 
 908                 EMPTYARC(lp
, rp
);       /* temporarily, so there's something */ 
 913         /* ...and an atom may be followed by a quantifier */ 
 914         switch (v
->nexttype
) { 
 918                 qprefer 
= (v
->nextvalue
) ? LONGER 
: SHORTER
; 
 924                 qprefer 
= (v
->nextvalue
) ? LONGER 
: SHORTER
; 
 930                 qprefer 
= (v
->nextvalue
) ? LONGER 
: SHORTER
; 
 945                         /* {m,n} exercises preference, even if it's {m,m} */ 
 946                         qprefer 
= (v
->nextvalue
) ? LONGER 
: SHORTER
; 
 949                         /* {m} passes operand's preference through */ 
 952                 if (!SEE('}')) {        /* catches errors too */ 
 958         default:                /* no quantifier */ 
 964         /* annoying special case:  {0} or {0,0} cancels everything */ 
 965         if (m 
== 0 && n 
== 0) { 
 969                         v
->subs
[subno
] = NULL
; 
 970                 delsub(v
->nfa
, lp
, rp
); 
 975         /* if not a messy case, avoid hard part */ 
 976         assert(!MESSY(top
->flags
)); 
 977         f 
= top
->flags 
| qprefer 
| ((atom 
!= NULL
) ? atom
->flags 
: 0); 
 978         if (atomtype 
!= '(' && atomtype 
!= BACKREF 
&& !MESSY(UP(f
))) { 
 979                 if (!(m 
== 1 && n 
== 1)) 
 980                         repeat(v
, lp
, rp
, m
, n
); 
 988          * hard part:  something messy 
 989          * That is, capturing parens, back reference, short/long clash, or 
 990          * an atom with substructure containing one of those. 
 993         /* now we'll need a subre for the contents even if they're boring */ 
 995                 atom 
= subre(v
, '=', 0, lp
, rp
); 
1000          * prepare a general-purpose state skeleton 
1002          *    ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp] 
1004          * [lp] ----> [s2] ----bypass--------------------- 
1006          * where bypass is an empty, and prefix is some repetitions of atom 
1008         s 
= newstate(v
->nfa
);           /* first, new endpoints for the atom */ 
1009         s2 
= newstate(v
->nfa
); 
1011         moveouts(v
->nfa
, lp
, s
); 
1012         moveins(v
->nfa
, rp
, s2
); 
1016         s 
= newstate(v
->nfa
);           /* and spots for prefix and bypass */ 
1017         s2 
= newstate(v
->nfa
); 
1023         /* break remaining subRE into x{...} and what follows */ 
1024         t 
= subre(v
, '.', COMBINE(qprefer
, atom
->flags
), lp
, rp
); 
1027         /* here we should recurse... but we must postpone that to the end */ 
1029         /* split top into prefix and remaining */ 
1030         assert(top
->op 
== '=' && top
->left 
== NULL 
&& top
->right 
== NULL
); 
1031         top
->left 
= subre(v
, '=', top
->flags
, top
->begin
, lp
); 
1035         /* if it's a backref, now is the time to replicate the subNFA */ 
1036         if (atomtype 
== BACKREF
) { 
1037                 assert(atom
->begin
->nouts 
== 1);        /* just the EMPTY */ 
1038                 delsub(v
->nfa
, atom
->begin
, atom
->end
); 
1039                 assert(v
->subs
[subno
] != NULL
); 
1040                 /* and here's why the recursion got postponed:  it must */ 
1041                 /* wait until the skeleton is filled in, because it may */ 
1042                 /* hit a backref that wants to copy the filled-in skeleton */ 
1043                 dupnfa(v
->nfa
, v
->subs
[subno
]->begin
, v
->subs
[subno
]->end
, 
1044                                                 atom
->begin
, atom
->end
); 
1048         /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */ 
1050                 EMPTYARC(s2
, atom
->end
);                /* the bypass */ 
1051                 assert(PREF(qprefer
) != 0); 
1052                 f 
= COMBINE(qprefer
, atom
->flags
); 
1053                 t 
= subre(v
, '|', f
, lp
, atom
->end
); 
1056                 t
->right 
= subre(v
, '|', PREF(f
), s2
, atom
->end
); 
1058                 t
->right
->left 
= subre(v
, '=', 0, s2
, atom
->end
); 
1065         /* deal with the rest of the quantifier */ 
1066         if (atomtype 
== BACKREF
) { 
1067                 /* special case:  backrefs have internal quantifiers */ 
1068                 EMPTYARC(s
, atom
->begin
);       /* empty prefix */ 
1069                 /* just stuff everything into atom */ 
1070                 repeat(v
, atom
->begin
, atom
->end
, m
, n
); 
1071                 atom
->min 
= (short)m
; 
1072                 atom
->max 
= (short)n
; 
1073                 atom
->flags 
|= COMBINE(qprefer
, atom
->flags
); 
1074         } else if (m 
== 1 && n 
== 1) { 
1075                 /* no/vacuous quantifier:  done */ 
1076                 EMPTYARC(s
, atom
->begin
);       /* empty prefix */ 
1078                 /* turn x{m,n} into x{m-1,n-1}x, with capturing */ 
1079                 /*  parens in only second x */ 
1080                 dupnfa(v
->nfa
, atom
->begin
, atom
->end
, s
, atom
->begin
); 
1081                 assert(m 
>= 1 && m 
!= INFINITY 
&& n 
>= 1); 
1082                 repeat(v
, s
, atom
->begin
, m
-1, (n 
== INFINITY
) ? n 
: n
-1); 
1083                 f 
= COMBINE(qprefer
, atom
->flags
); 
1084                 t 
= subre(v
, '.', f
, s
, atom
->end
);     /* prefix and atom */ 
1086                 t
->left 
= subre(v
, '=', PREF(f
), s
, atom
->begin
); 
1092         /* and finally, look after that postponed recursion */ 
1094         if (!(SEE('|') || SEE(stopper
) || SEE(EOS
))) 
1095                 t
->right 
= parsebranch(v
, stopper
, type
, atom
->end
, rp
, 1); 
1097                 EMPTYARC(atom
->end
, rp
); 
1098                 t
->right 
= subre(v
, '=', 0, atom
->end
, rp
); 
1100         assert(SEE('|') || SEE(stopper
) || SEE(EOS
)); 
1101         t
->flags 
|= COMBINE(t
->flags
, t
->right
->flags
); 
1102         top
->flags 
|= COMBINE(top
->flags
, t
->flags
); 
1106  - nonword - generate arcs for non-word-character ahead or behind 
1107  ^ static VOID nonword(struct vars *, int, struct state *, struct state *); 
1110 nonword(v
, dir
, lp
, rp
) 
1112 int dir
;                        /* AHEAD or BEHIND */ 
1116         int anchor 
= (dir 
== AHEAD
) ? '$' : '^'; 
1118         assert(dir 
== AHEAD 
|| dir 
== BEHIND
); 
1119         newarc(v
->nfa
, anchor
, 1, lp
, rp
); 
1120         newarc(v
->nfa
, anchor
, 0, lp
, rp
); 
1121         colorcomplement(v
->nfa
, v
->cm
, dir
, v
->wordchrs
, lp
, rp
); 
1122         /* (no need for special attention to \n) */ 
1126  - word - generate arcs for word character ahead or behind 
1127  ^ static VOID word(struct vars *, int, struct state *, struct state *); 
1130 word(v
, dir
, lp
, rp
) 
1132 int dir
;                        /* AHEAD or BEHIND */ 
1136         assert(dir 
== AHEAD 
|| dir 
== BEHIND
); 
1137         cloneouts(v
->nfa
, v
->wordchrs
, lp
, rp
, dir
); 
1138         /* (no need for special attention to \n) */ 
1142  - scannum - scan a number 
1143  ^ static int scannum(struct vars *); 
1145 static int                      /* value, <= DUPMAX */ 
1151         while (SEE(DIGIT
) && n 
< DUPMAX
) { 
1152                 n 
= n
*10 + v
->nextvalue
; 
1155         if (SEE(DIGIT
) || n 
> DUPMAX
) { 
1163  - repeat - replicate subNFA for quantifiers 
1164  * The duplication sequences used here are chosen carefully so that any 
1165  * pointers starting out pointing into the subexpression end up pointing into 
1166  * the last occurrence.  (Note that it may not be strung between the same 
1167  * left and right end states, however!)  This used to be important for the 
1168  * subRE tree, although the important bits are now handled by the in-line 
1169  * code in parse(), and when this is called, it doesn't matter any more. 
1170  ^ static VOID repeat(struct vars *, struct state *, struct state *, int, int); 
1173 repeat(v
, lp
, rp
, m
, n
) 
1182 #       define  PAIR(x, y)      ((x)*4 + (y)) 
1183 #       define  REDUCE(x)       ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) ) 
1184         CONST 
int rm 
= REDUCE(m
); 
1185         CONST 
int rn 
= REDUCE(n
); 
1189         switch (PAIR(rm
, rn
)) { 
1190         case PAIR(0, 0):                /* empty string */ 
1191                 delsub(v
->nfa
, lp
, rp
); 
1194         case PAIR(0, 1):                /* do as x| */ 
1197         case PAIR(0, SOME
):             /* do as x{1,n}| */ 
1198                 repeat(v
, lp
, rp
, 1, n
); 
1202         case PAIR(0, INF
):              /* loop x around */ 
1203                 s 
= newstate(v
->nfa
); 
1205                 moveouts(v
->nfa
, lp
, s
); 
1206                 moveins(v
->nfa
, rp
, s
); 
1210         case PAIR(1, 1):                /* no action required */ 
1212         case PAIR(1, SOME
):             /* do as x{0,n-1}x = (x{1,n-1}|)x */ 
1213                 s 
= newstate(v
->nfa
); 
1215                 moveouts(v
->nfa
, lp
, s
); 
1216                 dupnfa(v
->nfa
, s
, rp
, lp
, s
); 
1218                 repeat(v
, lp
, s
, 1, n
-1); 
1222         case PAIR(1, INF
):              /* add loopback arc */ 
1223                 s 
= newstate(v
->nfa
); 
1224                 s2 
= newstate(v
->nfa
); 
1226                 moveouts(v
->nfa
, lp
, s
); 
1227                 moveins(v
->nfa
, rp
, s2
); 
1232         case PAIR(SOME
, SOME
):          /* do as x{m-1,n-1}x */ 
1233                 s 
= newstate(v
->nfa
); 
1235                 moveouts(v
->nfa
, lp
, s
); 
1236                 dupnfa(v
->nfa
, s
, rp
, lp
, s
); 
1238                 repeat(v
, lp
, s
, m
-1, n
-1); 
1240         case PAIR(SOME
, INF
):           /* do as x{m-1,}x */ 
1241                 s 
= newstate(v
->nfa
); 
1243                 moveouts(v
->nfa
, lp
, s
); 
1244                 dupnfa(v
->nfa
, s
, rp
, lp
, s
); 
1246                 repeat(v
, lp
, s
, m
-1, n
); 
1255  - bracket - handle non-complemented bracket expression 
1256  * Also called from cbracket for complemented bracket expressions. 
1257  ^ static VOID bracket(struct vars *, struct state *, struct state *); 
1267         while (!SEE(']') && !SEE(EOS
)) 
1268                 brackpart(v
, lp
, rp
); 
1269         assert(SEE(']') || ISERR()); 
1270         okcolors(v
->nfa
, v
->cm
); 
1274  - cbracket - handle complemented bracket expression 
1275  * We do it by calling bracket() with dummy endpoints, and then complementing 
1276  * the result.  The alternative would be to invoke rainbow(), and then delete 
1277  * arcs as the b.e. is seen... but that gets messy. 
1278  ^ static VOID cbracket(struct vars *, struct state *, struct state *); 
1286         struct state 
*left 
= newstate(v
->nfa
); 
1287         struct state 
*right 
= newstate(v
->nfa
); 
1289         struct arc 
*a
;                  /* arc from lp */ 
1290         struct arc 
*ba
;                 /* arc from left, from bracket() */ 
1291         struct arc 
*pa
;                 /* MCCE-prototype arc */ 
1297         bracket(v
, left
, right
); 
1298         if (v
->cflags
®_NLSTOP
) 
1299                 newarc(v
->nfa
, PLAIN
, v
->nlcolor
, left
, right
); 
1302         assert(lp
->nouts 
== 0);         /* all outarcs will be ours */ 
1304         /* easy part of complementing */ 
1305         colorcomplement(v
->nfa
, v
->cm
, PLAIN
, left
, lp
, rp
); 
1307         if (v
->mcces 
== NULL
) {         /* no MCCEs -- we're done */ 
1308                 dropstate(v
->nfa
, left
); 
1309                 assert(right
->nins 
== 0); 
1310                 freestate(v
->nfa
, right
); 
1314         /* but complementing gets messy in the presence of MCCEs... */ 
1316         for (p 
= v
->mcces
->chrs
, i 
= v
->mcces
->nchrs
; i 
> 0; p
++, i
--) { 
1317                 co 
= GETCOLOR(v
->cm
, *p
); 
1318                 a 
= findarc(lp
, PLAIN
, co
); 
1319                 ba 
= findarc(left
, PLAIN
, co
); 
1326                 s 
= newstate(v
->nfa
); 
1328                 newarc(v
->nfa
, PLAIN
, co
, lp
, s
); 
1330                 pa 
= findarc(v
->mccepbegin
, PLAIN
, co
); 
1332                 if (ba 
== NULL
) {       /* easy case, need all of them */ 
1333                         cloneouts(v
->nfa
, pa
->to
, s
, rp
, PLAIN
); 
1334                         newarc(v
->nfa
, '$', 1, s
, rp
); 
1335                         newarc(v
->nfa
, '$', 0, s
, rp
); 
1336                         colorcomplement(v
->nfa
, v
->cm
, AHEAD
, pa
->to
, s
, rp
); 
1337                 } else {                /* must be selective */ 
1338                         if (findarc(ba
->to
, '$', 1) == NULL
) { 
1339                                 newarc(v
->nfa
, '$', 1, s
, rp
); 
1340                                 newarc(v
->nfa
, '$', 0, s
, rp
); 
1341                                 colorcomplement(v
->nfa
, v
->cm
, AHEAD
, pa
->to
, 
1344                         for (pa 
= pa
->to
->outs
; pa 
!= NULL
; pa 
= pa
->outchain
) 
1345                                 if (findarc(ba
->to
, PLAIN
, pa
->co
) == NULL
) 
1346                                         newarc(v
->nfa
, PLAIN
, pa
->co
, s
, rp
); 
1347                         if (s
->nouts 
== 0)      /* limit of selectivity: none */ 
1348                                 dropstate(v
->nfa
, s
);   /* frees arc too */ 
1353         delsub(v
->nfa
, left
, right
); 
1354         assert(left
->nouts 
== 0); 
1355         freestate(v
->nfa
, left
); 
1356         assert(right
->nins 
== 0); 
1357         freestate(v
->nfa
, right
); 
1361  - brackpart - handle one item (or range) within a bracket expression 
1362  ^ static VOID brackpart(struct vars *, struct state *, struct state *); 
1365 brackpart(v
, lp
, rp
) 
1377         /* parse something, get rid of special cases, take shortcuts */ 
1378         switch (v
->nexttype
) { 
1379         case RANGE
:                     /* a-b-c or other botch */ 
1384                 c
[0] = v
->nextvalue
; 
1386                 /* shortcut for ordinary chr (not range, not MCCE leader) */ 
1387                 if (!SEE(RANGE
) && !ISCELEADER(v
, c
[0])) { 
1388                         onechr(v
, c
[0], lp
, rp
); 
1391                 startc 
= element(v
, c
, c
+1); 
1396                 endp 
= scanplain(v
); 
1397                 INSIST(startp 
< endp
, REG_ECOLLATE
); 
1399                 startc 
= element(v
, startp
, endp
); 
1404                 endp 
= scanplain(v
); 
1405                 INSIST(startp 
< endp
, REG_ECOLLATE
); 
1407                 startc 
= element(v
, startp
, endp
); 
1409                 cv 
= eclass(v
, startc
, (v
->cflags
®_ICASE
)); 
1411                 dovec(v
, cv
, lp
, rp
); 
1416                 endp 
= scanplain(v
); 
1417                 INSIST(startp 
< endp
, REG_ECTYPE
); 
1419                 cv 
= cclass(v
, startp
, endp
, (v
->cflags
®_ICASE
)); 
1421                 dovec(v
, cv
, lp
, rp
); 
1432                 switch (v
->nexttype
) { 
1435                         c
[0] = v
->nextvalue
; 
1437                         endc 
= element(v
, c
, c
+1); 
1442                         endp 
= scanplain(v
); 
1443                         INSIST(startp 
< endp
, REG_ECOLLATE
); 
1445                         endc 
= element(v
, startp
, endp
); 
1457          * Ranges are unportable.  Actually, standard C does 
1458          * guarantee that digits are contiguous, but making 
1459          * that an exception is just too complicated. 
1463         cv 
= range(v
, startc
, endc
, (v
->cflags
®_ICASE
)); 
1465         dovec(v
, cv
, lp
, rp
); 
1469  - scanplain - scan PLAIN contents of [. etc. 
1470  * Certain bits of trickery in lex.c know that this code does not try 
1471  * to look past the final bracket of the [. etc. 
1472  ^ static chr *scanplain(struct vars *); 
1474 static chr 
*                    /* just after end of sequence */ 
1480         assert(SEE(COLLEL
) || SEE(ECLASS
) || SEE(CCLASS
)); 
1484         while (SEE(PLAIN
)) { 
1489         assert(SEE(END
) || ISERR()); 
1496  - leaders - process a cvec of collating elements to also include leaders 
1497  * Also gives all characters involved their own colors, which is almost 
1498  * certainly necessary, and sets up little disconnected subNFA. 
1499  ^ static VOID leaders(struct vars *, struct cvec *); 
1512         v
->mccepbegin 
= newstate(v
->nfa
); 
1513         v
->mccepend 
= newstate(v
->nfa
); 
1516         for (mcce 
= 0; mcce 
< cv
->nmcces
; mcce
++) { 
1517                 p 
= cv
->mcces
[mcce
]; 
1519                 if (!haschr(cv
, leader
)) { 
1521                         s 
= newstate(v
->nfa
); 
1522                         newarc(v
->nfa
, PLAIN
, subcolor(v
->cm
, leader
), 
1524                         okcolors(v
->nfa
, v
->cm
); 
1526                         a 
= findarc(v
->mccepbegin
, PLAIN
, 
1527                                                 GETCOLOR(v
->cm
, leader
)); 
1530                         assert(s 
!= v
->mccepend
); 
1533                 assert(*p 
!= 0 && *(p
+1) == 0); /* only 2-char MCCEs for now */ 
1534                 newarc(v
->nfa
, PLAIN
, subcolor(v
->cm
, *p
), s
, v
->mccepend
); 
1535                 okcolors(v
->nfa
, v
->cm
); 
1540  - onechr - fill in arcs for a plain character, and possible case complements 
1541  * This is mostly a shortcut for efficient handling of the common case. 
1542  ^ static VOID onechr(struct vars *, pchr, struct state *, struct state *); 
1545 onechr(v
, c
, lp
, rp
) 
1551         if (!(v
->cflags
®_ICASE
)) { 
1552                 newarc(v
->nfa
, PLAIN
, subcolor(v
->cm
, c
), lp
, rp
); 
1556         /* rats, need general case anyway... */ 
1557         dovec(v
, allcases(v
, c
), lp
, rp
); 
1561  - dovec - fill in arcs for each element of a cvec 
1562  * This one has to handle the messy cases, like MCCEs and MCCE leaders. 
1563  ^ static VOID dovec(struct vars *, struct cvec *, struct state *, 
1567 dovec(v
, cv
, lp
, rp
) 
1580         struct arc 
*pa
;         /* arc in prototype */ 
1582         struct state 
*ps
;       /* state in prototype */ 
1584         /* need a place to store leaders, if any */ 
1585         if (nmcces(v
) > 0) { 
1586                 assert(v
->mcces 
!= NULL
); 
1587                 if (v
->cv2 
== NULL 
|| v
->cv2
->nchrs 
< v
->mcces
->nchrs
) { 
1590                         v
->cv2 
= newcvec(v
->mcces
->nchrs
, 0, v
->mcces
->nmcces
); 
1594                         leads 
= clearcvec(v
->cv2
); 
1598         /* first, get the ordinary characters out of the way */ 
1599         for (p 
= cv
->chrs
, i 
= cv
->nchrs
; i 
> 0; p
++, i
--) { 
1601                 if (!ISCELEADER(v
, ch
)) 
1602                         newarc(v
->nfa
, PLAIN
, subcolor(v
->cm
, ch
), lp
, rp
); 
1604                         assert(singleton(v
->cm
, ch
)); 
1605                         assert(leads 
!= NULL
); 
1606                         if (!haschr(leads
, ch
)) 
1611         /* and the ranges */ 
1612         for (p 
= cv
->ranges
, i 
= cv
->nranges
; i 
> 0; p 
+= 2, i
--) { 
1615                 while (from 
<= to 
&& (ce 
= nextleader(v
, from
, to
)) != NOCELT
) { 
1617                                 subrange(v
, from
, ce 
- 1, lp
, rp
); 
1618                         assert(singleton(v
->cm
, ce
)); 
1619                         assert(leads 
!= NULL
); 
1620                         if (!haschr(leads
, ce
)) 
1625                         subrange(v
, from
, to
, lp
, rp
); 
1628         if ((leads 
== NULL 
|| leads
->nchrs 
== 0) && cv
->nmcces 
== 0) 
1631         /* deal with the MCCE leaders */ 
1633         for (p 
= leads
->chrs
, i 
= leads
->nchrs
; i 
> 0; p
++, i
--) { 
1634                 co 
= GETCOLOR(v
->cm
, *p
); 
1635                 a 
= findarc(lp
, PLAIN
, co
); 
1639                         s 
= newstate(v
->nfa
); 
1641                         newarc(v
->nfa
, PLAIN
, co
, lp
, s
); 
1644                 pa 
= findarc(v
->mccepbegin
, PLAIN
, co
); 
1647                 newarc(v
->nfa
, '$', 1, s
, rp
); 
1648                 newarc(v
->nfa
, '$', 0, s
, rp
); 
1649                 colorcomplement(v
->nfa
, v
->cm
, AHEAD
, ps
, s
, rp
); 
1654         for (i 
= 0; i 
< cv
->nmcces
; i
++) { 
1656                 assert(singleton(v
->cm
, *p
)); 
1657                 if (!singleton(v
->cm
, *p
)) { 
1662                 co 
= GETCOLOR(v
->cm
, ch
); 
1663                 a 
= findarc(lp
, PLAIN
, co
); 
1667                         s 
= newstate(v
->nfa
); 
1669                         newarc(v
->nfa
, PLAIN
, co
, lp
, s
); 
1672                 assert(*p 
!= 0);        /* at least two chars */ 
1673                 assert(singleton(v
->cm
, *p
)); 
1675                 co 
= GETCOLOR(v
->cm
, ch
); 
1676                 assert(*p 
== 0);        /* and only two, for now */ 
1677                 newarc(v
->nfa
, PLAIN
, co
, s
, rp
); 
1683  - nextleader - find next MCCE leader within range 
1684  ^ static celt nextleader(struct vars *, pchr, pchr); 
1686 static celt                     
/* NOCELT means none */ 
1687 nextleader(v
, from
, to
) 
1697         if (v
->mcces 
== NULL
) 
1700         for (i 
= v
->mcces
->nchrs
, p 
= v
->mcces
->chrs
; i 
> 0; i
--, p
++) { 
1702                 if (from 
<= ch 
&& ch 
<= to
) 
1703                         if (it 
== NOCELT 
|| ch 
< it
) 
1710  - wordchrs - set up word-chr list for word-boundary stuff, if needed 
1711  * The list is kept as a bunch of arcs between two dummy states; it's 
1712  * disposed of by the unreachable-states sweep in NFA optimization. 
1713  * Does NEXT().  Must not be called from any unusual lexical context. 
1714  * This should be reconciled with the \w etc. handling in lex.c, and 
1715  * should be cleaned up to reduce dependencies on input scanning. 
1716  ^ static VOID wordchrs(struct vars *); 
1723         struct state 
*right
; 
1725         if (v
->wordchrs 
!= NULL
) { 
1726                 NEXT();         /* for consistency */ 
1730         left 
= newstate(v
->nfa
); 
1731         right 
= newstate(v
->nfa
); 
1733         /* fine point:  implemented with [::], and lexer will set REG_ULOCALE */ 
1736         assert(v
->savenow 
!= NULL 
&& SEE('[')); 
1737         bracket(v
, left
, right
); 
1738         assert((v
->savenow 
!= NULL 
&& SEE(']')) || ISERR()); 
1745  - subre - allocate a subre 
1746  ^ static struct subre *subre(struct vars *, int, int, struct state *, 
1749 static struct subre 
* 
1750 subre(v
, op
, flags
, begin
, end
) 
1754 struct state 
*begin
; 
1761                 v
->treefree 
= ret
->left
; 
1763                 ret 
= (struct subre 
*)MALLOC(sizeof(struct subre
)); 
1768                 ret
->chain 
= v
->treechain
; 
1772         assert(strchr("|.b(=", op
) != NULL
); 
1778         ret
->min 
= ret
->max 
= 1; 
1789  - freesubre - free a subRE subtree 
1790  ^ static VOID freesubre(struct vars *, struct subre *); 
1794 struct vars 
*v
;                 /* might be NULL */ 
1800         if (sr
->left 
!= NULL
) 
1801                 freesubre(v
, sr
->left
); 
1802         if (sr
->right 
!= NULL
) 
1803                 freesubre(v
, sr
->right
); 
1809  - freesrnode - free one node in a subRE subtree 
1810  ^ static VOID freesrnode(struct vars *, struct subre *); 
1814 struct vars 
*v
;                 /* might be NULL */ 
1820         if (!NULLCNFA(sr
->cnfa
)) 
1821                 freecnfa(&sr
->cnfa
); 
1825                 sr
->left 
= v
->treefree
; 
1832  - optst - optimize a subRE subtree 
1833  ^ static VOID optst(struct vars *, struct subre *); 
1843         /* recurse through children */ 
1844         if (t
->left 
!= NULL
) 
1846         if (t
->right 
!= NULL
) 
1851  - numst - number tree nodes (assigning retry indexes) 
1852  ^ static int numst(struct subre *, int); 
1854 static int                      /* next number */ 
1857 int start
;                      /* starting point for subtree numbers */ 
1864         t
->retry 
= (short)i
++; 
1865         if (t
->left 
!= NULL
) 
1866                 i 
= numst(t
->left
, i
); 
1867         if (t
->right 
!= NULL
) 
1868                 i 
= numst(t
->right
, i
); 
1873  - markst - mark tree nodes as INUSE 
1874  ^ static VOID markst(struct subre *); 
1883         if (t
->left 
!= NULL
) 
1885         if (t
->right 
!= NULL
) 
1890  - cleanst - free any tree nodes not marked INUSE 
1891  ^ static VOID cleanst(struct vars *); 
1900         for (t 
= v
->treechain
; t 
!= NULL
; t 
= next
) { 
1902                 if (!(t
->flags
&INUSE
)) 
1905         v
->treechain 
= NULL
; 
1906         v
->treefree 
= NULL
;             /* just on general principles */ 
1910  - nfatree - turn a subRE subtree into a tree of compacted NFAs 
1911  ^ static long nfatree(struct vars *, struct subre *, FILE *); 
1913 static long                     /* optimize results from top node */ 
1917 FILE *f
;                        /* for debug output */ 
1919         assert(t 
!= NULL 
&& t
->begin 
!= NULL
); 
1921         if (t
->left 
!= NULL
) 
1922                 (DISCARD
)nfatree(v
, t
->left
, f
); 
1923         if (t
->right 
!= NULL
) 
1924                 (DISCARD
)nfatree(v
, t
->right
, f
); 
1926         return nfanode(v
, t
, f
); 
1930  - nfanode - do one NFA for nfatree 
1931  ^ static long nfanode(struct vars *, struct subre *, FILE *); 
1933 static long                     /* optimize results */ 
1937 FILE *f
;                        /* for debug output */ 
1943         assert(t
->begin 
!= NULL
); 
1946                 fprintf(f
, "\n\n\n========= TREE NODE %s ==========\n", 
1947                                                 stid(t
, idbuf
, sizeof(idbuf
))); 
1948         nfa 
= newnfa(v
, v
->cm
, v
->nfa
); 
1950         dupnfa(nfa
, t
->begin
, t
->end
, nfa
->init
, nfa
->final
); 
1953                 ret 
= optimize(nfa
, f
); 
1956                 compact(nfa
, &t
->cnfa
); 
1963  - newlacon - allocate a lookahead-constraint subRE 
1964  ^ static int newlacon(struct vars *, struct state *, struct state *, int); 
1966 static int                      /* lacon number */ 
1967 newlacon(v
, begin
, end
, pos
) 
1969 struct state 
*begin
; 
1976         if (v
->nlacons 
== 0) { 
1977                 v
->lacons 
= (struct subre 
*)MALLOC(2 * sizeof(struct subre
)); 
1978                 n 
= 1;          /* skip 0th */ 
1981                 v
->lacons 
= (struct subre 
*)REALLOC(v
->lacons
, 
1982                                         (v
->nlacons
+1)*sizeof(struct subre
)); 
1985         if (v
->lacons 
== NULL
) { 
1989         sub 
= &v
->lacons
[n
]; 
1998  - freelacons - free lookahead-constraint subRE vector 
1999  ^ static VOID freelacons(struct subre *, int); 
2010         for (sub 
= subs 
+ 1, i 
= n 
- 1; i 
> 0; sub
++, i
--)      /* no 0th */ 
2011                 if (!NULLCNFA(sub
->cnfa
)) 
2012                         freecnfa(&sub
->cnfa
); 
2017  - rfree - free a whole RE (insides of regfree) 
2018  ^ static VOID rfree(regex_t *); 
2026         if (re 
== NULL 
|| re
->re_magic 
!= REMAGIC
) 
2029         re
->re_magic 
= 0;       /* invalidate RE */ 
2030         g 
= (struct guts 
*)re
->re_guts
; 
2035         if (g
->tree 
!= NULL
) 
2036                 freesubre((struct vars 
*)NULL
, g
->tree
); 
2037         if (g
->lacons 
!= NULL
) 
2038                 freelacons(g
->lacons
, g
->nlacons
); 
2039         if (!NULLCNFA(g
->search
)) 
2040                 freecnfa(&g
->search
); 
2045  - dump - dump an RE in human-readable form 
2046  ^ static VOID dump(regex_t *, FILE *); 
2057         if (re
->re_magic 
!= REMAGIC
) 
2058                 fprintf(f
, "bad magic number (0x%x not 0x%x)\n", re
->re_magic
, 
2060         if (re
->re_guts 
== NULL
) { 
2061                 fprintf(f
, "NULL guts!!!\n"); 
2064         g 
= (struct guts 
*)re
->re_guts
; 
2065         if (g
->magic 
!= GUTSMAGIC
) 
2066                 fprintf(f
, "bad guts magic number (0x%x not 0x%x)\n", g
->magic
, 
2069         fprintf(f
, "\n\n\n========= DUMP ==========\n"); 
2070         fprintf(f
, "nsub %d, info 0%lo, csize %d, ntree %d\n",  
2071                 re
->re_nsub
, re
->re_info
, re
->re_csize
, g
->ntree
); 
2073         dumpcolors(&g
->cmap
, f
); 
2074         if (!NULLCNFA(g
->search
)) { 
2075                 printf("\nsearch:\n"); 
2076                 dumpcnfa(&g
->search
, f
); 
2078         for (i 
= 1; i 
< g
->nlacons
; i
++) { 
2079                 fprintf(f
, "\nla%d (%s):\n", i
, 
2080                                 (g
->lacons
[i
].subno
) ? "positive" : "negative"); 
2081                 dumpcnfa(&g
->lacons
[i
].cnfa
, f
); 
2084         dumpst(g
->tree
, f
, 0); 
2089  - dumpst - dump a subRE tree 
2090  ^ static VOID dumpst(struct subre *, FILE *, int); 
2093 dumpst(t
, f
, nfapresent
) 
2096 int nfapresent
;                 /* is the original NFA still around? */ 
2099                 fprintf(f
, "null tree\n"); 
2101                 stdump(t
, f
, nfapresent
); 
2106  - stdump - recursive guts of dumpst 
2107  ^ static VOID stdump(struct subre *, FILE *, int); 
2110 stdump(t
, f
, nfapresent
) 
2113 int nfapresent
;                 /* is the original NFA still around? */ 
2117         fprintf(f
, "%s. `%c'", stid(t
, idbuf
, sizeof(idbuf
)), t
->op
); 
2118         if (t
->flags
&LONGER
) 
2119                 fprintf(f
, " longest"); 
2120         if (t
->flags
&SHORTER
) 
2121                 fprintf(f
, " shortest"); 
2123                 fprintf(f
, " hasmixed"); 
2125                 fprintf(f
, " hascapture"); 
2127                 fprintf(f
, " hasbackref"); 
2128         if (!(t
->flags
&INUSE
)) 
2129                 fprintf(f
, " UNUSED"); 
2131                 fprintf(f
, " (#%d)", t
->subno
); 
2132         if (t
->min 
!= 1 || t
->max 
!= 1) { 
2133                 fprintf(f
, " {%d,", t
->min
); 
2134                 if (t
->max 
!= INFINITY
) 
2135                         fprintf(f
, "%d", t
->max
); 
2139                 fprintf(f
, " %ld-%ld", (long)t
->begin
->no
, (long)t
->end
->no
); 
2140         if (t
->left 
!= NULL
) 
2141                 fprintf(f
, " L:%s", stid(t
->left
, idbuf
, sizeof(idbuf
))); 
2142         if (t
->right 
!= NULL
) 
2143                 fprintf(f
, " R:%s", stid(t
->right
, idbuf
, sizeof(idbuf
))); 
2144         if (!NULLCNFA(t
->cnfa
)) { 
2146                 dumpcnfa(&t
->cnfa
, f
); 
2149         if (t
->left 
!= NULL
) 
2150                 stdump(t
->left
, f
, nfapresent
); 
2151         if (t
->right 
!= NULL
) 
2152                 stdump(t
->right
, f
, nfapresent
); 
2156  - stid - identify a subtree node for dumping 
2157  ^ static char *stid(struct subre *, char *, size_t); 
2159 static char *                   /* points to buf or constant string */ 
2160 stid(t
, buf
, bufsize
) 
2165         /* big enough for hex int or decimal t->retry? */ 
2166         if (bufsize 
< sizeof(int)*2 + 3 || bufsize 
< sizeof(t
->retry
)*3 + 1) 
2169                 sprintf(buf
, "%d", t
->retry
); 
2171                 sprintf(buf
, "0x%x", (int)(wxUIntPtr
)(t
));      /* may lose bits, that's okay */ 
2175 #include "regc_lex.c" 
2176 #include "regc_color.c" 
2177 #include "regc_nfa.c" 
2178 #include "regc_cvec.c" 
2179 #include "regc_locale.c"