]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regguts.h
   2  * Internal interface definitions, etc., for the reg package 
   4  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved. 
   6  * Development of this software was funded, in part, by Cray Research Inc., 
   7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 
   8  * Corporation, none of whom are responsible for the results.  The author 
  11  * Redistribution and use in source and binary forms -- with or without 
  12  * modification -- are permitted for any purpose, provided that 
  13  * redistributions in source form retain this entire copyright notice and 
  14  * indicate the origin and nature of any modifications. 
  16  * I'd appreciate being given credit for this package in the documentation 
  17  * of software which uses it, but that is not a requirement. 
  19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 
  20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
  21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
  22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
  27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
  28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  34  * Environmental customization.  It should not (I hope) be necessary to 
  35  * alter the file you are now reading -- regcustom.h should handle it all, 
  36  * given care here and elsewhere. 
  38 #include "regcustom.h" 
  41  * Things that regcustom.h might override. 
  44 /* standard header files (NULL is a reasonable indicator for them) */ 
  56 #       define  NDEBUG          /* no assertions */ 
  63 #define VOID    void                    /* for function return values */ 
  66 #define DISCARD VOID                    /* for throwing values away */ 
  69 #define PVOID   VOID *                  /* generic pointer */ 
  72 #define VS(x)   ((PVOID)(x))            /* cast something to generic ptr */ 
  75 #define NOPARMS VOID                    /* for empty parm lists */ 
  80 #define CONST   const                   /* for old compilers, might be empty */ 
  83 /* function-pointer declarator */ 
  86 #define FUNCPTR(name, args)     (*name)args 
  88 #define FUNCPTR(name, args)     (*name)() 
  92 /* memory allocation */ 
  94 #define MALLOC(n)       malloc(n) 
  97 #define REALLOC(p, n)   realloc(VS(p), n) 
 100 #define FREE(p)         free(VS(p)) 
 103 /* want size of a char in bits, and max value in bounded quantifiers */ 
 107 #ifndef _POSIX2_RE_DUP_MAX 
 108 #define _POSIX2_RE_DUP_MAX      255     /* normally from <limits.h> */ 
 120 #define DUPMAX  _POSIX2_RE_DUP_MAX 
 121 #define INFINITY        (DUPMAX+1) 
 123 #define REMAGIC 0xfed7          /* magic number for main struct */ 
 128  * debugging facilities 
 131 /* FDEBUG does finite-state tracing */ 
 132 #define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } 
 133 /* MDEBUG does higher-level tracing */ 
 134 #define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } 
 136 #define FDEBUG(arglist) {} 
 137 #define MDEBUG(arglist) {} 
 143  * bitmap manipulation 
 145 #define UBITS   (CHAR_BIT * sizeof(unsigned)) 
 146 #define BSET(uv, sn)    ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) 
 147 #define ISBSET(uv, sn)  ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) 
 152  * We dissect a chr into byts for colormap table indexing.  Here we define 
 153  * a byt, which will be the same as a byte on most machines...  The exact 
 154  * size of a byt is not critical, but about 8 bits is good, and extraction 
 155  * of 8-bit chunks is sometimes especially fast. 
 158 #define BYTBITS 8               /* bits in a byt */ 
 160 #define BYTTAB  (1<<BYTBITS)    /* size of table with one entry per byt value */ 
 161 #define BYTMASK (BYTTAB-1)      /* bit mask for byt */ 
 162 #define NBYTS   ((CHRBITS+BYTBITS-1)/BYTBITS) 
 163 /* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ 
 168  * As soon as possible, we map chrs into equivalence classes -- "colors" -- 
 169  * which are of much more manageable number. 
 171 typedef short color
;            /* colors of characters */ 
 172 typedef int pcolor
;             /* what color promotes to */ 
 173 #define COLORLESS       (-1)    /* impossible color */ 
 174 #define WHITE           0       /* default color, parent of all others */ 
 179  * A colormap is a tree -- more precisely, a DAG -- indexed at each level 
 180  * by a byt of the chr, to map the chr to a color efficiently.  Because 
 181  * lower sections of the tree can be shared, it can exploit the usual 
 182  * sparseness of such a mapping table.  The tree is always NBYTS levels 
 183  * deep (in the past it was shallower during construction but was "filled" 
 184  * to full depth at the end of that); areas that are unaltered as yet point 
 185  * to "fill blocks" which are entirely WHITE in color. 
 188 /* the tree itself */ 
 190         color ccolor
[BYTTAB
]; 
 193         union tree 
*pptr
[BYTTAB
]; 
 196         struct colors colors
; 
 199 #define tcolor  colors.ccolor 
 200 #define tptr    ptrs.pptr 
 202 /* internal per-color structure for the color machinery */ 
 204         uchr nchrs
;             /* number of chars of this color */ 
 205         color sub
;              /* open subcolor (if any); free chain ptr */ 
 206 #               define  NOSUB   COLORLESS 
 207         struct arc 
*arcs
;       /* color chain */ 
 209 #               define  FREECOL 01      /* currently free */ 
 210 #               define  PSEUDO  02      /* pseudocolor, no real chars */ 
 211 #       define  UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) 
 212         union tree 
*block
;      /* block of solid color, if any */ 
 215 /* the color map itself */ 
 218 #               define  CMMAGIC 0x876 
 219         struct vars 
*v
;                 /* for compile error reporting */ 
 220         size_t ncds
;                    /* number of colordescs */ 
 221         size_t max
;                     /* highest in use */ 
 222         color free
;                     /* beginning of free chain (if non-0) */ 
 223         struct colordesc 
*cd
; 
 224 #       define  CDEND(cm)       (&(cm)->cd[(cm)->max + 1]) 
 225 #               define  NINLINECDS      ((size_t)10) 
 226         struct colordesc cdspace
[NINLINECDS
]; 
 227         union tree tree
[NBYTS
];         /* tree top, plus fill blocks */ 
 230 /* optimization magic to do fast chr->color mapping */ 
 231 #define B0(c)   ((c) & BYTMASK) 
 232 #define B1(c)   (((c)>>BYTBITS) & BYTMASK) 
 233 #define B2(c)   (((c)>>(2*BYTBITS)) & BYTMASK) 
 234 #define B3(c)   (((c)>>(3*BYTBITS)) & BYTMASK) 
 236 #define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) 
 238 /* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ 
 240 #define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) 
 243 #define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) 
 249  * Interface definitions for locale-interface functions in locale.c. 
 250  * Multi-character collating elements (MCCEs) cause most of the trouble. 
 253         int nchrs
;              /* number of chrs */ 
 254         int chrspace
;           /* number of chrs possible */ 
 255         chr 
*chrs
;              /* pointer to vector of chrs */ 
 256         int nranges
;            /* number of ranges (chr pairs) */ 
 257         int rangespace
;         /* number of chrs possible */ 
 258         chr 
*ranges
;            /* pointer to vector of chr pairs */ 
 259         int nmcces
;             /* number of MCCEs */ 
 260         int mccespace
;          /* number of MCCEs possible */ 
 261         int nmccechrs
;          /* number of chrs used for MCCEs */ 
 262         chr 
*mcces
[1];          /* pointers to 0-terminated MCCEs */ 
 263                                 /* and both batches of chrs are on the end */ 
 266 /* caution:  this value cannot be changed easily */ 
 267 #define MAXMCCE 2               /* length of longest MCCE */ 
 272  * definitions for NFA internal representation 
 274  * Having a "from" pointer within each arc may seem redundant, but it 
 275  * saves a lot of hassle. 
 281 #               define  ARCFREE '\0' 
 283         struct state 
*from
;     /* where it's from (and contained within) */ 
 284         struct state 
*to
;       /* where it's to */ 
 285         struct arc 
*outchain
;   /* *from's outs chain or free chain */ 
 286 #       define  freechain       outchain 
 287         struct arc 
*inchain
;    /* *to's ins chain */ 
 288         struct arc 
*colorchain
; /* color's arc chain */ 
 291 struct arcbatch 
{               /* for bulk allocation of arcs */ 
 292         struct arcbatch 
*next
; 
 294         struct arc a
[ABSIZE
]; 
 299 #               define  FREESTATE       (-1) 
 300         char flag
;              /* marks special states */ 
 301         int nins
;               /* number of inarcs */ 
 302         struct arc 
*ins
;        /* chain of inarcs */ 
 303         int nouts
;              /* number of outarcs */ 
 304         struct arc 
*outs
;       /* chain of outarcs */ 
 305         struct arc 
*free
;       /* chain of free arcs */ 
 306         struct state 
*tmp
;      /* temporary for traversal algorithms */ 
 307         struct state 
*next
;     /* chain for traversing all */ 
 308         struct state 
*prev
;     /* back chain */ 
 309         struct arcbatch oas
;    /* first arcbatch, avoid malloc in easy case */ 
 310         int noas
;               /* number of arcs used in first arcbatch */ 
 314         struct state 
*pre
;      /* pre-initial state */ 
 315         struct state 
*init
;     /* initial state */ 
 316         struct state 
*final
;    /* final state */ 
 317         struct state 
*post
;     /* post-final state */ 
 318         int nstates
;            /* for numbering states */ 
 319         struct state 
*states
;   /* state-chain header */ 
 320         struct state 
*slast
;    /* tail of the chain */ 
 321         struct state 
*free
;     /* free list */ 
 322         struct colormap 
*cm
;    /* the color map */ 
 323         color bos
[2];           /* colors, if any, assigned to BOS and BOL */ 
 324         color eos
[2];           /* colors, if any, assigned to EOS and EOL */ 
 325         struct vars 
*v
;         /* simplifies compile error reporting */ 
 326         struct nfa 
*parent
;     /* parent NFA, if any */ 
 332  * definitions for compacted NFA 
 335         color co
;               /* COLORLESS is list terminator */ 
 336         int to
;                 /* state number */ 
 340         int nstates
;            /* number of states */ 
 341         int ncolors
;            /* number of colors */ 
 343 #               define  HASLACONS       01      /* uses lookahead constraints */ 
 344         int pre
;                /* setup state number */ 
 345         int post
;               /* teardown state number */ 
 346         color bos
[2];           /* colors, if any, assigned to BOS and BOL */ 
 347         color eos
[2];           /* colors, if any, assigned to EOS and EOL */ 
 348         struct carc 
**states
;   /* vector of pointers to outarc lists */ 
 349         struct carc 
*arcs
;      /* the area for the lists */ 
 351 #define ZAPCNFA(cnfa)   ((cnfa).nstates = 0) 
 352 #define NULLCNFA(cnfa)  ((cnfa).nstates == 0) 
 360         char op
;                /* '|', '.' (concat), 'b' (backref), '(', '=' */ 
 362 #               define  LONGER  01      /* prefers longer match */ 
 363 #               define  SHORTER 02      /* prefers shorter match */ 
 364 #               define  MIXED   04      /* mixed preference below */ 
 365 #               define  CAP     010     /* capturing parens below */ 
 366 #               define  BACKR   020     /* back reference below */ 
 367 #               define  INUSE   0100    /* in use in final tree */ 
 368 #               define  LOCAL   03      /* bits which may not propagate up */ 
 369 #               define  LMIX(f) ((f)<<2)        /* LONGER -> MIXED */ 
 370 #               define  SMIX(f) ((f)<<1)        /* SHORTER -> MIXED */ 
 371 #               define  UP(f)   (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) 
 372 #               define  MESSY(f)        ((f)&(MIXED|CAP|BACKR)) 
 373 #               define  PREF(f) ((f)&LOCAL) 
 374 #               define  PREF2(f1, f2)   ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) 
 375 #               define  COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) 
 376         short retry
;            /* index into retry memory */ 
 377         int subno
;              /* subexpression number (for 'b' and '(') */ 
 378         short min
;              /* min repetitions, for backref only */ 
 379         short max
;              /* max repetitions, for backref only */ 
 380         struct subre 
*left
;     /* left child, if any (also freelist chain) */ 
 381         struct subre 
*right
;    /* right child, if any */ 
 382         struct state 
*begin
;    /* outarcs from here... */ 
 383         struct state 
*end
;      /* ...ending in inarcs here */ 
 384         struct cnfa cnfa
;       /* compacted NFA, if any */ 
 385         struct subre 
*chain
;    /* for bookkeeping and error cleanup */ 
 391  * table of function pointers for generic manipulation functions 
 392  * A regex_t's re_fns points to one of these. 
 395         VOID 
FUNCPTR(free
, (regex_t 
*)); 
 401  * the insides of a regex_t, hidden behind a void * 
 405 #               define  GUTSMAGIC       0xfed9 
 406         int cflags
;             /* copy of compile flags */ 
 407         long info
;              /* copy of re_info */ 
 408         size_t nsub
;            /* copy of re_nsub */ 
 410         struct cnfa search
;     /* for fast preliminary search */ 
 412         struct colormap cmap
; 
 413         int FUNCPTR(compare
, (CONST chr 
*, CONST chr 
*, size_t)); 
 414         struct subre 
*lacons
;   /* lookahead-constraint vector */ 
 415         int nlacons
;            /* size of lacons */