]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regguts.h
   2  * Internal interface definitions, etc., for the reg package 
   4  * Copyright (c) 1998, 1999 Henry Spencer.      All rights reserved. 
   6  * Development of this software was funded, in part, by Cray Research Inc., 
   7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 
   8  * Corporation, none of whom are responsible for the results.  The author 
  11  * Redistribution and use in source and binary forms -- with or without 
  12  * modification -- are permitted for any purpose, provided that 
  13  * redistributions in source form retain this entire copyright notice and 
  14  * indicate the origin and nature of any modifications. 
  16  * I'd appreciate being given credit for this package in the documentation 
  17  * of software which uses it, but that is not a requirement. 
  19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 
  20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
  21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
  22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
  27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
  28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  36  * Environmental customization.  It should not (I hope) be necessary to 
  37  * alter the file you are now reading -- regcustom.h should handle it all, 
  38  * given care here and elsewhere. 
  40 #include "regcustom.h" 
  45  * Things that regcustom.h might override. 
  53 #               define  NDEBUG                                  /* no assertions */ 
  59 // To do: assertion on WinCE 
  65 #define DISCARD void                    /* for throwing values away */ 
  68 #define VS(x)   ((void *)(x))   /* cast something to generic ptr */ 
  71 /* function-pointer declarator */ 
  73 #define FUNCPTR(name, args) (*name) args 
  76 /* memory allocation */ 
  78 #define MALLOC(n)       malloc(n) 
  81 #define REALLOC(p, n)   realloc(VS(p), n) 
  84 #define FREE(p)         free(VS(p)) 
  87 /* want size of a char in bits, and max value in bounded quantifiers */ 
  91 #ifndef _POSIX2_RE_DUP_MAX 
  92 #define _POSIX2_RE_DUP_MAX      255 /* normally from <limits.h> */ 
 104 #define DUPMAX  _POSIX2_RE_DUP_MAX 
 105 #define INFINITY        (DUPMAX+1) 
 107 #define REMAGIC 0xfed7                  /* magic number for main struct */ 
 112  * debugging facilities 
 115 /* FDEBUG does finite-state tracing */ 
 116 #define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } 
 117 /* MDEBUG does higher-level tracing */ 
 118 #define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } 
 120 #define FDEBUG(arglist) {} 
 121 #define MDEBUG(arglist) {} 
 127  * bitmap manipulation 
 129 #define UBITS   (CHAR_BIT * sizeof(unsigned)) 
 130 #define BSET(uv, sn)    ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) 
 131 #define ISBSET(uv, sn)  ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) 
 136  * We dissect a chr into byts for colormap table indexing.      Here we define 
 137  * a byt, which will be the same as a byte on most machines...  The exact 
 138  * size of a byt is not critical, but about 8 bits is good, and extraction 
 139  * of 8-bit chunks is sometimes especially fast. 
 142 #define BYTBITS 8                               /* bits in a byt */ 
 144 #define BYTTAB  (1<<BYTBITS)    /* size of table with one entry per byt 
 146 #define BYTMASK (BYTTAB-1)              /* bit mask for byt */ 
 147 #define NBYTS   ((CHRBITS+BYTBITS-1)/BYTBITS) 
 148 /* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ 
 153  * As soon as possible, we map chrs into equivalence classes -- "colors" -- 
 154  * which are of much more manageable number. 
 156 typedef short color
;                    /* colors of characters */ 
 157 typedef int pcolor
;                             /* what color promotes to */ 
 159 #define COLORLESS       (-1)            /* impossible color */ 
 160 #define WHITE           0                       /* default color, parent of all others */ 
 165  * A colormap is a tree -- more precisely, a DAG -- indexed at each level 
 166  * by a byt of the chr, to map the chr to a color efficiently.  Because 
 167  * lower sections of the tree can be shared, it can exploit the usual 
 168  * sparseness of such a mapping table.  The tree is always NBYTS levels 
 169  * deep (in the past it was shallower during construction but was "filled" 
 170  * to full depth at the end of that); areas that are unaltered as yet point 
 171  * to "fill blocks" which are entirely WHITE in color. 
 174 /* the tree itself */ 
 177         color           ccolor
[BYTTAB
]; 
 181         union tree 
*pptr
[BYTTAB
]; 
 185         struct colors colors
; 
 189 #define tcolor  colors.ccolor 
 190 #define tptr    ptrs.pptr 
 192 /* internal per-color structure for the color machinery */ 
 195         uchr            nchrs
;                  /* number of chars of this color */ 
 196         color           sub
;                    /* open subcolor (if any); free chain ptr */ 
 197 #define  NOSUB   COLORLESS 
 198         struct arc 
*arcs
;                       /* color chain */ 
 200 #define  FREECOL 01                             /* currently free */ 
 201 #define  PSEUDO  02                             /* pseudocolor, no real chars */ 
 202 #define  UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) 
 203         union tree 
*block
;                      /* block of solid color, if any */ 
 206 /* the color map itself */ 
 210 #define  CMMAGIC 0x876 
 211         struct vars 
*v
;                         /* for compile error reporting */ 
 212         size_t          ncds
;                   /* number of colordescs */ 
 213         size_t          max
;                    /* highest in use */ 
 214         color           free
;                   /* beginning of free chain (if non-0) */ 
 215         struct colordesc 
*cd
; 
 216 #define  CDEND(cm)       (&(cm)->cd[(cm)->max + 1]) 
 217 #define  NINLINECDS  ((size_t)10) 
 218         struct colordesc cdspace
[NINLINECDS
]; 
 219         union tree      tree
[NBYTS
];    /* tree top, plus fill blocks */ 
 222 /* optimization magic to do fast chr->color mapping */ 
 223 #define B0(c)   ((c) & BYTMASK) 
 224 #define B1(c)   (((c)>>BYTBITS) & BYTMASK) 
 225 #define B2(c)   (((c)>>(2*BYTBITS)) & BYTMASK) 
 226 #define B3(c)   (((c)>>(3*BYTBITS)) & BYTMASK) 
 228 #define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) 
 230 /* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ 
 232 #define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) 
 235 #define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) 
 241  * Interface definitions for locale-interface functions in locale.c. 
 242  * Multi-character collating elements (MCCEs) cause most of the trouble. 
 246         int                     nchrs
;                  /* number of chrs */ 
 247         int                     chrspace
;               /* number of chrs possible */ 
 248         chr                
*chrs
;                       /* pointer to vector of chrs */ 
 249         int                     nranges
;                /* number of ranges (chr pairs) */ 
 250         int                     rangespace
;             /* number of chrs possible */ 
 251         chr                
*ranges
;                     /* pointer to vector of chr pairs */ 
 252         int                     nmcces
;                 /* number of MCCEs */ 
 253         int                     mccespace
;              /* number of MCCEs possible */ 
 254         int                     nmccechrs
;              /* number of chrs used for MCCEs */ 
 255         chr                
*mcces
[1];           /* pointers to 0-terminated MCCEs */ 
 256         /* and both batches of chrs are on the end */ 
 259 /* caution:  this value cannot be changed easily */ 
 260 #define MAXMCCE 2                               /* length of longest MCCE */ 
 265  * definitions for NFA internal representation 
 267  * Having a "from" pointer within each arc may seem redundant, but it 
 268  * saves a lot of hassle. 
 277         struct state 
*from
;                     /* where it's from (and contained within) */ 
 278         struct state 
*to
;                       /* where it's to */ 
 279         struct arc 
*outchain
;           /* *from's outs chain or free chain */ 
 280 #define  freechain       outchain 
 281         struct arc 
*inchain
;            /* *to's ins chain */ 
 282         struct arc 
*colorchain
;         /* color's arc chain */ 
 286 {                                                               /* for bulk allocation of arcs */ 
 287         struct arcbatch 
*next
; 
 289         struct arc      a
[ABSIZE
]; 
 295 #define  FREESTATE       (-1) 
 296         char            flag
;                   /* marks special states */ 
 297         int                     nins
;                   /* number of inarcs */ 
 298         struct arc 
*ins
;                        /* chain of inarcs */ 
 299         int                     nouts
;                  /* number of outarcs */ 
 300         struct arc 
*outs
;                       /* chain of outarcs */ 
 301         struct arc 
*free
;                       /* chain of free arcs */ 
 302         struct state 
*tmp
;                      /* temporary for traversal algorithms */ 
 303         struct state 
*next
;                     /* chain for traversing all */ 
 304         struct state 
*prev
;                     /* back chain */ 
 305         struct arcbatch oas
;            /* first arcbatch, avoid malloc in easy 
 307         int                     noas
;                   /* number of arcs used in first arcbatch */ 
 312         struct state 
*pre
;                      /* pre-initial state */ 
 313         struct state 
*init
;                     /* initial state */ 
 314         struct state 
*final
;            /* final state */ 
 315         struct state 
*post
;                     /* post-final state */ 
 316         int                     nstates
;                /* for numbering states */ 
 317         struct state 
*states
;           /* state-chain header */ 
 318         struct state 
*slast
;            /* tail of the chain */ 
 319         struct state 
*free
;                     /* free list */ 
 320         struct colormap 
*cm
;            /* the color map */ 
 321         color           bos
[2];                 /* colors, if any, assigned to BOS and BOL */ 
 322         color           eos
[2];                 /* colors, if any, assigned to EOS and EOL */ 
 323         struct vars 
*v
;                         /* simplifies compile error reporting */ 
 324         struct nfa 
*parent
;                     /* parent NFA, if any */ 
 330  * definitions for compacted NFA 
 334         color           co
;                             /* COLORLESS is list terminator */ 
 335         int                     to
;                             /* state number */ 
 340         int                     nstates
;                /* number of states */ 
 341         int                     ncolors
;                /* number of colors */ 
 343 #define  HASLACONS       01                     /* uses lookahead constraints */ 
 344         int                     pre
;                    /* setup state number */ 
 345         int                     post
;                   /* teardown state number */ 
 346         color           bos
[2];                 /* colors, if any, assigned to BOS and BOL */ 
 347         color           eos
[2];                 /* colors, if any, assigned to EOS and EOL */ 
 348         struct carc 
**states
;           /* vector of pointers to outarc lists */ 
 349         struct carc 
*arcs
;                      /* the area for the lists */ 
 352 #define ZAPCNFA(cnfa)   ((cnfa).nstates = 0) 
 353 #define NULLCNFA(cnfa)  ((cnfa).nstates == 0) 
 362         char            op
;                             /* '|', '.' (concat), 'b' (backref), '(', 
 365 #define  LONGER  01                             /* prefers longer match */ 
 366 #define  SHORTER 02                             /* prefers shorter match */ 
 367 #define  MIXED   04                             /* mixed preference below */ 
 368 #define  CAP 010                                /* capturing parens below */ 
 369 #define  BACKR   020                    /* back reference below */ 
 370 #define  INUSE   0100                   /* in use in final tree */ 
 371 #define  LOCAL   03                             /* bits which may not propagate up */ 
 372 #define  LMIX(f) ((f)<<2)               /* LONGER -> MIXED */ 
 373 #define  SMIX(f) ((f)<<1)               /* SHORTER -> MIXED */ 
 374 #define  UP(f)   (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) 
 375 #define  MESSY(f)        ((f)&(MIXED|CAP|BACKR)) 
 376 #define  PREF(f) ((f)&LOCAL) 
 377 #define  PREF2(f1, f2)   ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) 
 378 #define  COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) 
 379         short           retry
;                  /* index into retry memory */ 
 380         int                     subno
;                  /* subexpression number (for 'b' and '(') */ 
 381         short           min
;                    /* min repetitions, for backref only */ 
 382         short           max
;                    /* max repetitions, for backref only */ 
 383         struct subre 
*left
;                     /* left child, if any (also freelist 
 385         struct subre 
*right
;            /* right child, if any */ 
 386         struct state 
*begin
;            /* outarcs from here... */ 
 387         struct state 
*end
;                      /* ...ending in inarcs here */ 
 388         struct cnfa cnfa
;                       /* compacted NFA, if any */ 
 389         struct subre 
*chain
;            /* for bookkeeping and error cleanup */ 
 395  * table of function pointers for generic manipulation functions 
 396  * A regex_t's re_fns points to one of these. 
 400         void            FUNCPTR(free
, (regex_t 
*)); 
 406  * the insides of a regex_t, hidden behind a void * 
 411 #define  GUTSMAGIC       0xfed9 
 412         int                     cflags
;                 /* copy of compile flags */ 
 413         long            info
;                   /* copy of re_info */ 
 414         size_t          nsub
;                   /* copy of re_nsub */ 
 416         struct cnfa search
;                     /* for fast preliminary search */ 
 418         struct colormap cmap
; 
 419         int                     FUNCPTR(compare
, (const chr 
*, const chr 
*, size_t)); 
 420         struct subre 
*lacons
;           /* lookahead-constraint vector */ 
 421         int                     nlacons
;                /* size of lacons */