]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/rege_dfa.c
   3  * This file is #included by regexec.c. 
   5  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved. 
   7  * Development of this software was funded, in part, by Cray Research Inc., 
   8  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 
   9  * Corporation, none of whom are responsible for the results.  The author 
  12  * Redistribution and use in source and binary forms -- with or without 
  13  * modification -- are permitted for any purpose, provided that 
  14  * redistributions in source form retain this entire copyright notice and 
  15  * indicate the origin and nature of any modifications. 
  17  * I'd appreciate being given credit for this package in the documentation 
  18  * of software which uses it, but that is not a requirement. 
  20  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 
  21  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
  22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
  23  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
  28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
  29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  34  - longest - longest-preferred matching engine 
  35  ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); 
  37 static chr 
*                    /* endpoint, or NULL */ 
  38 longest(v
, d
, start
, stop
, hitstopp
) 
  39 struct vars 
*v
;                 /* used only for debug and exec flags */ 
  41 chr 
*start
;                     /* where the match should start */ 
  42 chr 
*stop
;                      /* match must end at or before here */ 
  43 int *hitstopp
;                  /* record whether hit v->stop, if non-NULL */ 
  46         chr 
*realstop 
= (stop 
== v
->stop
) ? stop 
: stop 
+ 1; 
  52         struct colormap 
*cm 
= d
->cm
; 
  55         css 
= initialize(v
, d
, start
); 
  61         FDEBUG(("+++ startup +++\n")); 
  63                 co 
= d
->cnfa
->bos
[(v
->eflags
®_NOTBOL
) ? 0 : 1]; 
  64                 FDEBUG(("color %ld\n", (long)co
)); 
  66                 co 
= GETCOLOR(cm
, *(cp 
- 1)); 
  67                 FDEBUG(("char %c, color %ld\n", (char)*(cp
-1), (long)co
)); 
  69         css 
= miss(v
, d
, css
, co
, cp
, start
); 
  75         if (v
->eflags
®_FTRACE
) 
  76                 while (cp 
< realstop
) { 
  77                         FDEBUG(("+++ at c%d +++\n", css 
- d
->ssets
)); 
  78                         co 
= GETCOLOR(cm
, *cp
); 
  79                         FDEBUG(("char %c, color %ld\n", (char)*cp
, (long)co
)); 
  82                                 ss 
= miss(v
, d
, css
, co
, cp
+1, start
); 
  84                                         break;  /* NOTE BREAK OUT */ 
  91                 while (cp 
< realstop
) { 
  92                         co 
= GETCOLOR(cm
, *cp
); 
  95                                 ss 
= miss(v
, d
, css
, co
, cp
+1, start
); 
  97                                         break;  /* NOTE BREAK OUT */ 
 105         FDEBUG(("+++ shutdown at c%d +++\n", css 
- d
->ssets
)); 
 106         if (cp 
== v
->stop 
&& stop 
== v
->stop
) { 
 107                 if (hitstopp 
!= NULL
) 
 109                 co 
= d
->cnfa
->eos
[(v
->eflags
®_NOTEOL
) ? 0 : 1]; 
 110                 FDEBUG(("color %ld\n", (long)co
)); 
 111                 ss 
= miss(v
, d
, css
, co
, cp
, start
); 
 112                 /* special case:  match ended at eol? */ 
 113                 if (ss 
!= NULL 
&& (ss
->flags
&POSTSTATE
)) 
 116                         ss
->lastseen 
= cp
;      /* to be tidy */ 
 119         /* find last match, if any */ 
 121         for (ss 
= d
->ssets
, i 
= d
->nssused
; i 
> 0; ss
++, i
--) 
 122                 if ((ss
->flags
&POSTSTATE
) && post 
!= ss
->lastseen 
&& 
 123                                         (post 
== NULL 
|| post 
< ss
->lastseen
)) 
 125         if (post 
!= NULL
)               /* found one */ 
 132  - shortest - shortest-preferred matching engine 
 133  ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, 
 136 static chr 
*                    /* endpoint, or NULL */ 
 137 shortest(v
, d
, start
, min
, max
, coldp
, hitstopp
) 
 140 chr 
*start
;                     /* where the match should start */ 
 141 chr 
*min
;                       /* match must end at or after here */ 
 142 chr 
*max
;                       /* match must end at or before here */ 
 143 chr 
**coldp
;                    /* store coldstart pointer here, if nonNULL */ 
 144 int *hitstopp
;                  /* record whether hit v->stop, if non-NULL */ 
 147         chr 
*realmin 
= (min 
== v
->stop
) ? min 
: min 
+ 1; 
 148         chr 
*realmax 
= (max 
== v
->stop
) ? max 
: max 
+ 1; 
 152         struct colormap 
*cm 
= d
->cm
; 
 155         css 
= initialize(v
, d
, start
); 
 157         if (hitstopp 
!= NULL
) 
 161         FDEBUG(("--- startup ---\n")); 
 162         if (cp 
== v
->start
) { 
 163                 co 
= d
->cnfa
->bos
[(v
->eflags
®_NOTBOL
) ? 0 : 1]; 
 164                 FDEBUG(("color %ld\n", (long)co
)); 
 166                 co 
= GETCOLOR(cm
, *(cp 
- 1)); 
 167                 FDEBUG(("char %c, color %ld\n", (char)*(cp
-1), (long)co
)); 
 169         css 
= miss(v
, d
, css
, co
, cp
, start
); 
 176         if (v
->eflags
®_FTRACE
) 
 177                 while (cp 
< realmax
) { 
 178                         FDEBUG(("--- at c%d ---\n", css 
- d
->ssets
)); 
 179                         co 
= GETCOLOR(cm
, *cp
); 
 180                         FDEBUG(("char %c, color %ld\n", (char)*cp
, (long)co
)); 
 183                                 ss 
= miss(v
, d
, css
, co
, cp
+1, start
); 
 185                                         break;  /* NOTE BREAK OUT */ 
 190                         if ((ss
->flags
&POSTSTATE
) && cp 
>= realmin
) 
 191                                 break;          /* NOTE BREAK OUT */ 
 194                 while (cp 
< realmax
) { 
 195                         co 
= GETCOLOR(cm
, *cp
); 
 198                                 ss 
= miss(v
, d
, css
, co
, cp
+1, start
); 
 200                                         break;  /* NOTE BREAK OUT */ 
 205                         if ((ss
->flags
&POSTSTATE
) && cp 
>= realmin
) 
 206                                 break;          /* NOTE BREAK OUT */ 
 212         if (coldp 
!= NULL
)      /* report last no-progress state set, if any */ 
 213                 *coldp 
= lastcold(v
, d
); 
 215         if ((ss
->flags
&POSTSTATE
) && cp 
> min
) { 
 216                 assert(cp 
>= realmin
); 
 218         } else if (cp 
== v
->stop 
&& max 
== v
->stop
) { 
 219                 co 
= d
->cnfa
->eos
[(v
->eflags
®_NOTEOL
) ? 0 : 1]; 
 220                 FDEBUG(("color %ld\n", (long)co
)); 
 221                 ss 
= miss(v
, d
, css
, co
, cp
, start
); 
 222                 /* match might have ended at eol */ 
 223                 if ((ss 
== NULL 
|| !(ss
->flags
&POSTSTATE
)) && hitstopp 
!= NULL
) 
 227         if (ss 
== NULL 
|| !(ss
->flags
&POSTSTATE
)) 
 234  - lastcold - determine last point at which no progress had been made 
 235  ^ static chr *lastcold(struct vars *, struct dfa *); 
 237 static chr 
*                    /* endpoint, or NULL */ 
 249         for (ss 
= d
->ssets
, i 
= d
->nssused
; i 
> 0; ss
++, i
--) 
 250                 if ((ss
->flags
&NOPROGRESS
) && nopr 
< ss
->lastseen
) 
 256  - newdfa - set up a fresh DFA 
 257  ^ static struct dfa *newdfa(struct vars *, struct cnfa *, 
 258  ^      struct colormap *, struct smalldfa *); 
 261 /* FIXME Required for CW 8 on Mac since it's not in limits.h */ 
 263 #define __CHAR_BIT__ 8 
 267 newdfa(v
, cnfa
, cm
, small
) 
 271 struct smalldfa 
*small
;         /* preallocated space, may be NULL */ 
 274         size_t nss 
= cnfa
->nstates 
* 2; 
 275         int wordsper 
= (cnfa
->nstates 
+ UBITS 
- 1) / UBITS
; 
 276         struct smalldfa 
*smallwas 
= small
; 
 278         assert(cnfa 
!= NULL 
&& cnfa
->nstates 
!= 0); 
 280         if (nss 
<= FEWSTATES 
&& cnfa
->ncolors 
<= FEWCOLORS
) { 
 281                 assert(wordsper 
== 1); 
 283                         small 
= (struct smalldfa 
*)MALLOC( 
 284                                                 sizeof(struct smalldfa
)); 
 291                 d
->ssets 
= small
->ssets
; 
 292                 d
->statesarea 
= small
->statesarea
; 
 293                 d
->work 
= &d
->statesarea
[nss
]; 
 294                 d
->outsarea 
= small
->outsarea
; 
 295                 d
->incarea 
= small
->incarea
; 
 297                 d
->mallocarea 
= (smallwas 
== NULL
) ? (char *)small 
: NULL
; 
 299                 d 
= (struct dfa 
*)MALLOC(sizeof(struct dfa
)); 
 304                 d
->ssets 
= (struct sset 
*)MALLOC(nss 
* sizeof(struct sset
)); 
 305                 d
->statesarea 
= (unsigned *)MALLOC((nss
+WORK
) * wordsper 
* 
 307                 d
->work 
= &d
->statesarea
[nss 
* wordsper
]; 
 308                 d
->outsarea 
= (struct sset 
**)MALLOC(nss 
* cnfa
->ncolors 
* 
 309                                                         sizeof(struct sset 
*)); 
 310                 d
->incarea 
= (struct arcp 
*)MALLOC(nss 
* cnfa
->ncolors 
* 
 311                                                         sizeof(struct arcp
)); 
 313                 d
->mallocarea 
= (char *)d
; 
 314                 if (d
->ssets 
== NULL 
|| d
->statesarea 
== NULL 
|| 
 315                                 d
->outsarea 
== NULL 
|| d
->incarea 
== NULL
) { 
 322         d
->nssets 
= (v
->eflags
®_SMALL
) ? 7 : nss
; 
 324         d
->nstates 
= cnfa
->nstates
; 
 325         d
->ncolors 
= cnfa
->ncolors
; 
 326         d
->wordsper 
= wordsper
; 
 331         d
->search 
= d
->ssets
; 
 333         /* initialization of sset fields is done as needed */ 
 339  - freedfa - free a DFA 
 340  ^ static VOID freedfa(struct dfa *); 
 346         if (d
->cptsmalloced
) { 
 347                 if (d
->ssets 
!= NULL
) 
 349                 if (d
->statesarea 
!= NULL
) 
 351                 if (d
->outsarea 
!= NULL
) 
 353                 if (d
->incarea 
!= NULL
) 
 357         if (d
->mallocarea 
!= NULL
) 
 362  - hash - construct a hash code for a bitvector 
 363  * There are probably better ways, but they're more expensive. 
 364  ^ static unsigned hash(unsigned *, int); 
 375         for (i 
= 0; i 
< n
; i
++) 
 381  - initialize - hand-craft a cache entry for startup, otherwise get ready 
 382  ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); 
 385 initialize(v
, d
, start
) 
 386 struct vars 
*v
;                 /* used only for debug flags */ 
 393         /* is previous one still there? */ 
 394         if (d
->nssused 
> 0 && (d
->ssets
[0].flags
&STARTER
)) 
 396         else {                          /* no, must (re)build it */ 
 397                 ss 
= getvacant(v
, d
, start
, start
); 
 398                 for (i 
= 0; i 
< d
->wordsper
; i
++) 
 400                 BSET(ss
->states
, d
->cnfa
->pre
); 
 401                 ss
->hash 
= HASH(ss
->states
, d
->wordsper
); 
 402                 assert(d
->cnfa
->pre 
!= d
->cnfa
->post
); 
 403                 ss
->flags 
= STARTER
|LOCKED
|NOPROGRESS
; 
 404                 /* lastseen dealt with below */ 
 407         for (i 
= 0; i 
< d
->nssused
; i
++) 
 408                 d
->ssets
[i
].lastseen 
= NULL
; 
 409         ss
->lastseen 
= start
;           /* maybe untrue, but harmless */ 
 416  - miss - handle a cache miss 
 417  ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, 
 418  ^      pcolor, chr *, chr *); 
 420 static struct sset 
*            /* NULL if goes to empty set */ 
 421 miss(v
, d
, css
, co
, cp
, start
) 
 422 struct vars 
*v
;                 /* used only for debug flags */ 
 426 chr 
*cp
;                        /* next chr */ 
 427 chr 
*start
;                     /* where the attempt got started */ 
 429         struct cnfa 
*cnfa 
= d
->cnfa
; 
 440         /* for convenience, we can be called even if it might not be a miss */ 
 441         if (css
->outs
[co
] != NULL
) { 
 443                 return css
->outs
[co
]; 
 447         /* first, what set of states would we end up in? */ 
 448         for (i 
= 0; i 
< d
->wordsper
; i
++) 
 453         for (i 
= 0; i 
< d
->nstates
; i
++) 
 454                 if (ISBSET(css
->states
, i
)) 
 455                         for (ca 
= cnfa
->states
[i
]+1; ca
->co 
!= COLORLESS
; ca
++) 
 457                                         BSET(d
->work
, ca
->to
); 
 459                                         if (ca
->to 
== cnfa
->post
) 
 461                                         if (!cnfa
->states
[ca
->to
]->co
) 
 463                                         FDEBUG(("%d -> %d\n", i
, ca
->to
)); 
 465         dolacons 
= (gotstate
) ? (cnfa
->flags
&HASLACONS
) : 0; 
 467         while (dolacons
) {              /* transitive closure */ 
 469                 for (i 
= 0; i 
< d
->nstates
; i
++) 
 470                         if (ISBSET(d
->work
, i
)) 
 471                                 for (ca 
= cnfa
->states
[i
]+1; ca
->co 
!= COLORLESS
; 
 473                                         if (ca
->co 
<= cnfa
->ncolors
) 
 474                                                 continue; /* NOTE CONTINUE */ 
 476                                         if (ISBSET(d
->work
, ca
->to
)) 
 477                                                 continue; /* NOTE CONTINUE */ 
 478                                         if (!lacon(v
, cnfa
, cp
, ca
->co
)) 
 479                                                 continue; /* NOTE CONTINUE */ 
 480                                         BSET(d
->work
, ca
->to
); 
 482                                         if (ca
->to 
== cnfa
->post
) 
 484                                         if (!cnfa
->states
[ca
->to
]->co
) 
 486                                         FDEBUG(("%d :> %d\n", i
, ca
->to
)); 
 491         h 
= HASH(d
->work
, d
->wordsper
); 
 493         /* next, is that in the cache? */ 
 494         for (p 
= d
->ssets
, i 
= d
->nssused
; i 
> 0; p
++, i
--) 
 495                 if (HIT(h
, d
->work
, p
, d
->wordsper
)) { 
 496                         FDEBUG(("cached c%d\n", p 
- d
->ssets
)); 
 497                         break;                  /* NOTE BREAK OUT */ 
 499         if (i 
== 0) {           /* nope, need a new cache entry */ 
 500                 p 
= getvacant(v
, d
, cp
, start
); 
 502                 for (i 
= 0; i 
< d
->wordsper
; i
++) 
 503                         p
->states
[i
] = d
->work
[i
]; 
 505                 p
->flags 
= (ispost
) ? POSTSTATE 
: 0; 
 507                         p
->flags 
|= NOPROGRESS
; 
 508                 /* lastseen to be dealt with by caller */ 
 511         if (!sawlacons
) {               /* lookahead conds. always cache miss */ 
 512                 FDEBUG(("c%d[%d]->c%d\n", css 
- d
->ssets
, co
, p 
- d
->ssets
)); 
 514                 css
->inchain
[co
] = p
->ins
; 
 516                 p
->ins
.co 
= (color
)co
; 
 522  - lacon - lookahead-constraint checker for miss() 
 523  ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); 
 525 static int                      /* predicate:  constraint satisfied? */ 
 526 lacon(v
, pcnfa
, cp
, co
) 
 528 struct cnfa 
*pcnfa
;             /* parent cnfa */ 
 530 pcolor co
;                      /* "color" of the lookahead constraint */ 
 538         n 
= co 
- pcnfa
->ncolors
; 
 539         assert(n 
< v
->g
->nlacons 
&& v
->g
->lacons 
!= NULL
); 
 540         FDEBUG(("=== testing lacon %d\n", n
)); 
 541         sub 
= &v
->g
->lacons
[n
]; 
 542         d 
= newdfa(v
, &sub
->cnfa
, &v
->g
->cmap
, &sd
); 
 547         end 
= longest(v
, d
, cp
, v
->stop
, (int *)NULL
); 
 549         FDEBUG(("=== lacon %d match %d\n", n
, (end 
!= NULL
))); 
 550         return (sub
->subno
) ? (end 
!= NULL
) : (end 
== NULL
); 
 554  - getvacant - get a vacant state set 
 555  * This routine clears out the inarcs and outarcs, but does not otherwise 
 556  * clear the innards of the state set -- that's up to the caller. 
 557  ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); 
 560 getvacant(v
, d
, cp
, start
) 
 561 struct vars 
*v
;                 /* used only for debug flags */ 
 572     lastap
.ss 
= NULL
; lastap
.co 
= 0; // WX: suppress dummy gcc warnings 
 573         ss 
= pickss(v
, d
, cp
, start
); 
 574         assert(!(ss
->flags
&LOCKED
)); 
 576         /* clear out its inarcs, including self-referential ones */ 
 578         while ((p 
= ap
.ss
) != NULL
) { 
 580                 FDEBUG(("zapping c%d's %ld outarc\n", p 
- d
->ssets
, (long)co
)); 
 583                 p
->inchain
[co
].ss 
= NULL
;       /* paranoia */ 
 587         /* take it off the inarc chains of the ssets reached by its outarcs */ 
 588         for (i 
= 0; i 
< d
->ncolors
; i
++) { 
 590                 assert(p 
!= ss
);                /* not self-referential */ 
 592                         continue;               /* NOTE CONTINUE */ 
 593                 FDEBUG(("del outarc %d from c%d's in chn\n", i
, p 
- d
->ssets
)); 
 594                 if (p
->ins
.ss 
== ss 
&& p
->ins
.co 
== i
) 
 595                         p
->ins 
= ss
->inchain
[i
]; 
 597                         assert(p
->ins
.ss 
!= NULL
); 
 598                         for (ap 
= p
->ins
; ap
.ss 
!= NULL 
&& 
 599                                                 !(ap
.ss 
== ss 
&& ap
.co 
== i
); 
 600                                                 ap 
= ap
.ss
->inchain
[ap
.co
]) 
 602                         assert(ap
.ss 
!= NULL
); 
 603                         lastap
.ss
->inchain
[lastap
.co
] = ss
->inchain
[i
]; 
 606                 ss
->inchain
[i
].ss 
= NULL
; 
 609         /* if ss was a success state, may need to remember location */ 
 610         if ((ss
->flags
&POSTSTATE
) && ss
->lastseen 
!= d
->lastpost 
&& 
 611                         (d
->lastpost 
== NULL 
|| d
->lastpost 
< ss
->lastseen
)) 
 612                 d
->lastpost 
= ss
->lastseen
; 
 614         /* likewise for a no-progress state */ 
 615         if ((ss
->flags
&NOPROGRESS
) && ss
->lastseen 
!= d
->lastnopr 
&& 
 616                         (d
->lastnopr 
== NULL 
|| d
->lastnopr 
< ss
->lastseen
)) 
 617                 d
->lastnopr 
= ss
->lastseen
; 
 623  - pickss - pick the next stateset to be used 
 624  ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); 
 627 pickss(v
, d
, cp
, start
) 
 628 struct vars 
*v
;                 /* used only for debug flags */ 
 638         /* shortcut for cases where cache isn't full */ 
 639         if (d
->nssused 
< d
->nssets
) { 
 643                 FDEBUG(("new c%d\n", i
)); 
 645                 ss
->states 
= &d
->statesarea
[i 
* d
->wordsper
]; 
 648                 ss
->ins
.co 
= WHITE
;             /* give it some value */ 
 649                 ss
->outs 
= &d
->outsarea
[i 
* d
->ncolors
]; 
 650                 ss
->inchain 
= &d
->incarea
[i 
* d
->ncolors
]; 
 651                 for (i 
= 0; i 
< d
->ncolors
; i
++) { 
 653                         ss
->inchain
[i
].ss 
= NULL
; 
 658         /* look for oldest, or old enough anyway */ 
 659         if (cp 
- start 
> d
->nssets
*2/3)         /* oldest 33% are expendable */ 
 660                 ancient 
= cp 
- d
->nssets
*2/3; 
 663         for (ss 
= d
->search
, end 
= &d
->ssets
[d
->nssets
]; ss 
< end
; ss
++) 
 664                 if ((ss
->lastseen 
== NULL 
|| ss
->lastseen 
< ancient
) && 
 665                                                         !(ss
->flags
&LOCKED
)) { 
 667                         FDEBUG(("replacing c%d\n", ss 
- d
->ssets
)); 
 670         for (ss 
= d
->ssets
, end 
= d
->search
; ss 
< end
; ss
++) 
 671                 if ((ss
->lastseen 
== NULL 
|| ss
->lastseen 
< ancient
) && 
 672                                                         !(ss
->flags
&LOCKED
)) { 
 674                         FDEBUG(("replacing c%d\n", ss 
- d
->ssets
)); 
 678         /* nobody's old enough?!? -- something's really wrong */ 
 679         FDEBUG(("can't find victim to replace!\n"));