]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regguts.h
   2  * Internal interface definitions, etc., for the reg package 
   4  * Copyright (c) 1998, 1999 Henry Spencer.      All rights reserved. 
   6  * Development of this software was funded, in part, by Cray Research Inc., 
   7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 
   8  * Corporation, none of whom are responsible for the results.  The author 
  11  * Redistribution and use in source and binary forms -- with or without 
  12  * modification -- are permitted for any purpose, provided that 
  13  * redistributions in source form retain this entire copyright notice and 
  14  * indicate the origin and nature of any modifications. 
  16  * I'd appreciate being given credit for this package in the documentation 
  17  * of software which uses it, but that is not a requirement. 
  19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 
  20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
  21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
  22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
  27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
  28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  36  * Environmental customization.  It should not (I hope) be necessary to 
  37  * alter the file you are now reading -- regcustom.h should handle it all, 
  38  * given care here and elsewhere. 
  40 #include "regcustom.h" 
  45  * Things that regcustom.h might override. 
  52 #               define  NDEBUG                                  /* no assertions */ 
  60 #define DISCARD void                    /* for throwing values away */ 
  63 #define VS(x)   ((void *)(x))   /* cast something to generic ptr */ 
  66 /* function-pointer declarator */ 
  68 #define FUNCPTR(name, args) (*name) args 
  71 /* memory allocation */ 
  73 #define MALLOC(n)       malloc(n) 
  76 #define REALLOC(p, n)   realloc(VS(p), n) 
  79 #define FREE(p)         free(VS(p)) 
  82 /* want size of a char in bits, and max value in bounded quantifiers */ 
  86 #ifndef _POSIX2_RE_DUP_MAX 
  87 #define _POSIX2_RE_DUP_MAX      255 /* normally from <limits.h> */ 
  99 #define DUPMAX  _POSIX2_RE_DUP_MAX 
 100 #define INFINITY        (DUPMAX+1) 
 102 #define REMAGIC 0xfed7                  /* magic number for main struct */ 
 107  * debugging facilities 
 110 /* FDEBUG does finite-state tracing */ 
 111 #define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } 
 112 /* MDEBUG does higher-level tracing */ 
 113 #define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } 
 115 #define FDEBUG(arglist) {} 
 116 #define MDEBUG(arglist) {} 
 122  * bitmap manipulation 
 124 #define UBITS   (CHAR_BIT * sizeof(unsigned)) 
 125 #define BSET(uv, sn)    ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) 
 126 #define ISBSET(uv, sn)  ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) 
 131  * We dissect a chr into byts for colormap table indexing.      Here we define 
 132  * a byt, which will be the same as a byte on most machines...  The exact 
 133  * size of a byt is not critical, but about 8 bits is good, and extraction 
 134  * of 8-bit chunks is sometimes especially fast. 
 137 #define BYTBITS 8                               /* bits in a byt */ 
 139 #define BYTTAB  (1<<BYTBITS)    /* size of table with one entry per byt 
 141 #define BYTMASK (BYTTAB-1)              /* bit mask for byt */ 
 142 #define NBYTS   ((CHRBITS+BYTBITS-1)/BYTBITS) 
 143 /* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ 
 148  * As soon as possible, we map chrs into equivalence classes -- "colors" -- 
 149  * which are of much more manageable number. 
 151 typedef short color
;                    /* colors of characters */ 
 152 typedef int pcolor
;                             /* what color promotes to */ 
 154 #define COLORLESS       (-1)            /* impossible color */ 
 155 #define WHITE           0                       /* default color, parent of all others */ 
 160  * A colormap is a tree -- more precisely, a DAG -- indexed at each level 
 161  * by a byt of the chr, to map the chr to a color efficiently.  Because 
 162  * lower sections of the tree can be shared, it can exploit the usual 
 163  * sparseness of such a mapping table.  The tree is always NBYTS levels 
 164  * deep (in the past it was shallower during construction but was "filled" 
 165  * to full depth at the end of that); areas that are unaltered as yet point 
 166  * to "fill blocks" which are entirely WHITE in color. 
 169 /* the tree itself */ 
 172         color           ccolor
[BYTTAB
]; 
 176         union tree 
*pptr
[BYTTAB
]; 
 180         struct colors colors
; 
 184 #define tcolor  colors.ccolor 
 185 #define tptr    ptrs.pptr 
 187 /* internal per-color structure for the color machinery */ 
 190         uchr            nchrs
;                  /* number of chars of this color */ 
 191         color           sub
;                    /* open subcolor (if any); free chain ptr */ 
 192 #define  NOSUB   COLORLESS 
 193         struct arc 
*arcs
;                       /* color chain */ 
 195 #define  FREECOL 01                             /* currently free */ 
 196 #define  PSEUDO  02                             /* pseudocolor, no real chars */ 
 197 #define  UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) 
 198         union tree 
*block
;                      /* block of solid color, if any */ 
 201 /* the color map itself */ 
 205 #define  CMMAGIC 0x876 
 206         struct vars 
*v
;                         /* for compile error reporting */ 
 207         size_t          ncds
;                   /* number of colordescs */ 
 208         size_t          max
;                    /* highest in use */ 
 209         color           free
;                   /* beginning of free chain (if non-0) */ 
 210         struct colordesc 
*cd
; 
 211 #define  CDEND(cm)       (&(cm)->cd[(cm)->max + 1]) 
 212 #define  NINLINECDS  ((size_t)10) 
 213         struct colordesc cdspace
[NINLINECDS
]; 
 214         union tree      tree
[NBYTS
];    /* tree top, plus fill blocks */ 
 217 /* optimization magic to do fast chr->color mapping */ 
 218 #define B0(c)   ((c) & BYTMASK) 
 219 #define B1(c)   (((c)>>BYTBITS) & BYTMASK) 
 220 #define B2(c)   (((c)>>(2*BYTBITS)) & BYTMASK) 
 221 #define B3(c)   (((c)>>(3*BYTBITS)) & BYTMASK) 
 223 #define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) 
 225 /* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ 
 227 #define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) 
 230 #define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) 
 236  * Interface definitions for locale-interface functions in locale.c. 
 237  * Multi-character collating elements (MCCEs) cause most of the trouble. 
 241         int                     nchrs
;                  /* number of chrs */ 
 242         int                     chrspace
;               /* number of chrs possible */ 
 243         chr                
*chrs
;                       /* pointer to vector of chrs */ 
 244         int                     nranges
;                /* number of ranges (chr pairs) */ 
 245         int                     rangespace
;             /* number of chrs possible */ 
 246         chr                
*ranges
;                     /* pointer to vector of chr pairs */ 
 247         int                     nmcces
;                 /* number of MCCEs */ 
 248         int                     mccespace
;              /* number of MCCEs possible */ 
 249         int                     nmccechrs
;              /* number of chrs used for MCCEs */ 
 250         chr                
*mcces
[1];           /* pointers to 0-terminated MCCEs */ 
 251         /* and both batches of chrs are on the end */ 
 254 /* caution:  this value cannot be changed easily */ 
 255 #define MAXMCCE 2                               /* length of longest MCCE */ 
 260  * definitions for NFA internal representation 
 262  * Having a "from" pointer within each arc may seem redundant, but it 
 263  * saves a lot of hassle. 
 272         struct state 
*from
;                     /* where it's from (and contained within) */ 
 273         struct state 
*to
;                       /* where it's to */ 
 274         struct arc 
*outchain
;           /* *from's outs chain or free chain */ 
 275 #define  freechain       outchain 
 276         struct arc 
*inchain
;            /* *to's ins chain */ 
 277         struct arc 
*colorchain
;         /* color's arc chain */ 
 281 {                                                               /* for bulk allocation of arcs */ 
 282         struct arcbatch 
*next
; 
 284         struct arc      a
[ABSIZE
]; 
 290 #define  FREESTATE       (-1) 
 291         char            flag
;                   /* marks special states */ 
 292         int                     nins
;                   /* number of inarcs */ 
 293         struct arc 
*ins
;                        /* chain of inarcs */ 
 294         int                     nouts
;                  /* number of outarcs */ 
 295         struct arc 
*outs
;                       /* chain of outarcs */ 
 296         struct arc 
*free
;                       /* chain of free arcs */ 
 297         struct state 
*tmp
;                      /* temporary for traversal algorithms */ 
 298         struct state 
*next
;                     /* chain for traversing all */ 
 299         struct state 
*prev
;                     /* back chain */ 
 300         struct arcbatch oas
;            /* first arcbatch, avoid malloc in easy 
 302         int                     noas
;                   /* number of arcs used in first arcbatch */ 
 307         struct state 
*pre
;                      /* pre-initial state */ 
 308         struct state 
*init
;                     /* initial state */ 
 309         struct state 
*final
;            /* final state */ 
 310         struct state 
*post
;                     /* post-final state */ 
 311         int                     nstates
;                /* for numbering states */ 
 312         struct state 
*states
;           /* state-chain header */ 
 313         struct state 
*slast
;            /* tail of the chain */ 
 314         struct state 
*free
;                     /* free list */ 
 315         struct colormap 
*cm
;            /* the color map */ 
 316         color           bos
[2];                 /* colors, if any, assigned to BOS and BOL */ 
 317         color           eos
[2];                 /* colors, if any, assigned to EOS and EOL */ 
 318         struct vars 
*v
;                         /* simplifies compile error reporting */ 
 319         struct nfa 
*parent
;                     /* parent NFA, if any */ 
 325  * definitions for compacted NFA 
 329         color           co
;                             /* COLORLESS is list terminator */ 
 330         int                     to
;                             /* state number */ 
 335         int                     nstates
;                /* number of states */ 
 336         int                     ncolors
;                /* number of colors */ 
 338 #define  HASLACONS       01                     /* uses lookahead constraints */ 
 339         int                     pre
;                    /* setup state number */ 
 340         int                     post
;                   /* teardown state number */ 
 341         color           bos
[2];                 /* colors, if any, assigned to BOS and BOL */ 
 342         color           eos
[2];                 /* colors, if any, assigned to EOS and EOL */ 
 343         struct carc 
**states
;           /* vector of pointers to outarc lists */ 
 344         struct carc 
*arcs
;                      /* the area for the lists */ 
 347 #define ZAPCNFA(cnfa)   ((cnfa).nstates = 0) 
 348 #define NULLCNFA(cnfa)  ((cnfa).nstates == 0) 
 357         char            op
;                             /* '|', '.' (concat), 'b' (backref), '(', 
 360 #define  LONGER  01                             /* prefers longer match */ 
 361 #define  SHORTER 02                             /* prefers shorter match */ 
 362 #define  MIXED   04                             /* mixed preference below */ 
 363 #define  CAP 010                                /* capturing parens below */ 
 364 #define  BACKR   020                    /* back reference below */ 
 365 #define  INUSE   0100                   /* in use in final tree */ 
 366 #define  LOCAL   03                             /* bits which may not propagate up */ 
 367 #define  LMIX(f) ((f)<<2)               /* LONGER -> MIXED */ 
 368 #define  SMIX(f) ((f)<<1)               /* SHORTER -> MIXED */ 
 369 #define  UP(f)   (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) 
 370 #define  MESSY(f)        ((f)&(MIXED|CAP|BACKR)) 
 371 #define  PREF(f) ((f)&LOCAL) 
 372 #define  PREF2(f1, f2)   ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) 
 373 #define  COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) 
 374         short           retry
;                  /* index into retry memory */ 
 375         int                     subno
;                  /* subexpression number (for 'b' and '(') */ 
 376         short           min
;                    /* min repetitions, for backref only */ 
 377         short           max
;                    /* max repetitions, for backref only */ 
 378         struct subre 
*left
;                     /* left child, if any (also freelist 
 380         struct subre 
*right
;            /* right child, if any */ 
 381         struct state 
*begin
;            /* outarcs from here... */ 
 382         struct state 
*end
;                      /* ...ending in inarcs here */ 
 383         struct cnfa cnfa
;                       /* compacted NFA, if any */ 
 384         struct subre 
*chain
;            /* for bookkeeping and error cleanup */ 
 390  * table of function pointers for generic manipulation functions 
 391  * A regex_t's re_fns points to one of these. 
 395         void            FUNCPTR(free
, (regex_t 
*)); 
 401  * the insides of a regex_t, hidden behind a void * 
 406 #define  GUTSMAGIC       0xfed9 
 407         int                     cflags
;                 /* copy of compile flags */ 
 408         long            info
;                   /* copy of re_info */ 
 409         size_t          nsub
;                   /* copy of re_nsub */ 
 411         struct cnfa search
;                     /* for fast preliminary search */ 
 413         struct colormap cmap
; 
 414         int                     FUNCPTR(compare
, (const chr 
*, const chr 
*, size_t)); 
 415         struct subre 
*lacons
;           /* lookahead-constraint vector */ 
 416         int                     nlacons
;                /* size of lacons */