]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/rege_dfa.c
   3  * This file is #included by regexec.c. 
   5  * Copyright (c) 1998, 1999 Henry Spencer.      All rights reserved. 
   7  * Development of this software was funded, in part, by Cray Research Inc., 
   8  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 
   9  * Corporation, none of whom are responsible for the results.  The author 
  12  * Redistribution and use in source and binary forms -- with or without 
  13  * modification -- are permitted for any purpose, provided that 
  14  * redistributions in source form retain this entire copyright notice and 
  15  * indicate the origin and nature of any modifications. 
  17  * I'd appreciate being given credit for this package in the documentation 
  18  * of software which uses it, but that is not a requirement. 
  20  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 
  21  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
  22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
  23  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
  28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
  29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  36  * longest - longest-preferred matching engine 
  38 static chr 
*                                    /* endpoint, or NULL */ 
  39 longest(struct vars 
* v
,                /* used only for debug and exec flags */ 
  41                 chr 
*start
,                             /* where the match should start */ 
  42                 chr 
*stop
,                              /* match must end at or before here */ 
  43                 int *hitstopp
)                  /* record whether hit v->stop, if non-NULL */ 
  46         chr                
*realstop 
= (stop 
== v
->stop
) ? stop 
: stop 
+ 1; 
  52         struct colormap 
*cm 
= d
->cm
; 
  55         css 
= initialize(v
, d
, start
); 
  61         FDEBUG(("+++ startup +++\n")); 
  64                 co 
= d
->cnfa
->bos
[(v
->eflags 
& REG_NOTBOL
) ? 0 : 1]; 
  65                 FDEBUG(("color %ld\n", (long) co
)); 
  69                 co 
= GETCOLOR(cm
, *(cp 
- 1)); 
  70                 FDEBUG(("char %c, color %ld\n", (char) *(cp 
- 1), (long) co
)); 
  72         css 
= miss(v
, d
, css
, co
, cp
, start
); 
  78         if (v
->eflags 
& REG_FTRACE
) 
  81                         FDEBUG(("+++ at c%d +++\n", css 
- d
->ssets
)); 
  82                         co 
= GETCOLOR(cm
, *cp
); 
  83                         FDEBUG(("char %c, color %ld\n", (char) *cp
, (long) co
)); 
  87                                 ss 
= miss(v
, d
, css
, co
, cp 
+ 1, start
); 
  89                                         break;          /* NOTE BREAK OUT */ 
  98                         co 
= GETCOLOR(cm
, *cp
); 
 102                                 ss 
= miss(v
, d
, css
, co
, cp 
+ 1, start
); 
 104                                         break;          /* NOTE BREAK OUT */ 
 112         FDEBUG(("+++ shutdown at c%d +++\n", css 
- d
->ssets
)); 
 113         if (cp 
== v
->stop 
&& stop 
== v
->stop
) 
 115                 if (hitstopp 
!= NULL
) 
 117                 co 
= d
->cnfa
->eos
[(v
->eflags 
& REG_NOTEOL
) ? 0 : 1]; 
 118                 FDEBUG(("color %ld\n", (long) co
)); 
 119                 ss 
= miss(v
, d
, css
, co
, cp
, start
); 
 120                 /* special case:  match ended at eol? */ 
 121                 if (ss 
!= NULL 
&& (ss
->flags 
& POSTSTATE
)) 
 124                         ss
->lastseen 
= cp
;      /* to be tidy */ 
 127         /* find last match, if any */ 
 129         for (ss 
= d
->ssets
, i 
= d
->nssused
; i 
> 0; ss
++, i
--) 
 130                 if ((ss
->flags 
& POSTSTATE
) && post 
!= ss
->lastseen 
&& 
 131                         (post 
== NULL 
|| post 
< ss
->lastseen
)) 
 133         if (post 
!= NULL
)                       /* found one */ 
 140  * shortest - shortest-preferred matching engine 
 142 static chr 
*                                    /* endpoint, or NULL */ 
 143 shortest(struct vars 
* v
, 
 145                  chr 
*start
,                    /* where the match should start */ 
 146                  chr 
*min
,                              /* match must end at or after here */ 
 147                  chr 
*max
,                              /* match must end at or before here */ 
 148                  chr 
**coldp
,                   /* store coldstart pointer here, if 
 150                  int *hitstopp
)                 /* record whether hit v->stop, if non-NULL */ 
 153         chr                
*realmin 
= (min 
== v
->stop
) ? min 
: min 
+ 1; 
 154         chr                
*realmax 
= (max 
== v
->stop
) ? max 
: max 
+ 1; 
 158         struct colormap 
*cm 
= d
->cm
; 
 161         css 
= initialize(v
, d
, start
); 
 163         if (hitstopp 
!= NULL
) 
 167         FDEBUG(("--- startup ---\n")); 
 170                 co 
= d
->cnfa
->bos
[(v
->eflags 
& REG_NOTBOL
) ? 0 : 1]; 
 171                 FDEBUG(("color %ld\n", (long) co
)); 
 175                 co 
= GETCOLOR(cm
, *(cp 
- 1)); 
 176                 FDEBUG(("char %c, color %ld\n", (char) *(cp 
- 1), (long) co
)); 
 178         css 
= miss(v
, d
, css
, co
, cp
, start
); 
 185         if (v
->eflags 
& REG_FTRACE
) 
 188                         FDEBUG(("--- at c%d ---\n", css 
- d
->ssets
)); 
 189                         co 
= GETCOLOR(cm
, *cp
); 
 190                         FDEBUG(("char %c, color %ld\n", (char) *cp
, (long) co
)); 
 194                                 ss 
= miss(v
, d
, css
, co
, cp 
+ 1, start
); 
 196                                         break;          /* NOTE BREAK OUT */ 
 201                         if ((ss
->flags 
& POSTSTATE
) && cp 
>= realmin
) 
 202                                 break;                  /* NOTE BREAK OUT */ 
 207                         co 
= GETCOLOR(cm
, *cp
); 
 211                                 ss 
= miss(v
, d
, css
, co
, cp 
+ 1, start
); 
 213                                         break;          /* NOTE BREAK OUT */ 
 218                         if ((ss
->flags 
& POSTSTATE
) && cp 
>= realmin
) 
 219                                 break;                  /* NOTE BREAK OUT */ 
 225         if (coldp 
!= NULL
)                      /* report last no-progress state set, if 
 227                 *coldp 
= lastcold(v
, d
); 
 229         if ((ss
->flags 
& POSTSTATE
) && cp 
> min
) 
 231                 assert(cp 
>= realmin
); 
 234         else if (cp 
== v
->stop 
&& max 
== v
->stop
) 
 236                 co 
= d
->cnfa
->eos
[(v
->eflags 
& REG_NOTEOL
) ? 0 : 1]; 
 237                 FDEBUG(("color %ld\n", (long) co
)); 
 238                 ss 
= miss(v
, d
, css
, co
, cp
, start
); 
 239                 /* match might have ended at eol */ 
 240                 if ((ss 
== NULL 
|| !(ss
->flags 
& POSTSTATE
)) && hitstopp 
!= NULL
) 
 244         if (ss 
== NULL 
|| !(ss
->flags 
& POSTSTATE
)) 
 251  * lastcold - determine last point at which no progress had been made 
 253 static chr 
*                                    /* endpoint, or NULL */ 
 254 lastcold(struct vars 
* v
, 
 264         for (ss 
= d
->ssets
, i 
= d
->nssused
; i 
> 0; ss
++, i
--) 
 265                 if ((ss
->flags 
& NOPROGRESS
) && nopr 
< ss
->lastseen
) 
 271  * newdfa - set up a fresh DFA 
 274 newdfa(struct vars 
* v
, 
 276            struct colormap 
* cm
, 
 277            struct smalldfa 
* small
) /* preallocated space, may be NULL */ 
 280         size_t          nss 
= cnfa
->nstates 
* 2; 
 281         int                     wordsper 
= (cnfa
->nstates 
+ UBITS 
- 1) / UBITS
; 
 282         struct smalldfa 
*smallwas 
= small
; 
 284         assert(cnfa 
!= NULL 
&& cnfa
->nstates 
!= 0); 
 286         if (nss 
<= FEWSTATES 
&& cnfa
->ncolors 
<= FEWCOLORS
) 
 288                 assert(wordsper 
== 1); 
 291                         small 
= (struct smalldfa 
*) MALLOC( 
 292                                                                                            sizeof(struct smalldfa
)); 
 300                 d
->ssets 
= small
->ssets
; 
 301                 d
->statesarea 
= small
->statesarea
; 
 302                 d
->work 
= &d
->statesarea
[nss
]; 
 303                 d
->outsarea 
= small
->outsarea
; 
 304                 d
->incarea 
= small
->incarea
; 
 306                 d
->mallocarea 
= (smallwas 
== NULL
) ? (char *) small 
: NULL
; 
 310                 d 
= (struct dfa 
*) MALLOC(sizeof(struct dfa
)); 
 316                 d
->ssets 
= (struct sset 
*) MALLOC(nss 
* sizeof(struct sset
)); 
 317                 d
->statesarea 
= (unsigned *) MALLOC((nss 
+ WORK
) * wordsper 
* 
 319                 d
->work 
= &d
->statesarea
[nss 
* wordsper
]; 
 320                 d
->outsarea 
= (struct sset 
**) MALLOC(nss 
* cnfa
->ncolors 
* 
 321                                                                                           sizeof(struct sset 
*)); 
 322                 d
->incarea 
= (struct arcp 
*) MALLOC(nss 
* cnfa
->ncolors 
* 
 323                                                                                         sizeof(struct arcp
)); 
 325                 d
->mallocarea 
= (char *) d
; 
 326                 if (d
->ssets 
== NULL 
|| d
->statesarea 
== NULL 
|| 
 327                         d
->outsarea 
== NULL 
|| d
->incarea 
== NULL
) 
 335         d
->nssets 
= (v
->eflags 
& REG_SMALL
) ? 7 : nss
; 
 337         d
->nstates 
= cnfa
->nstates
; 
 338         d
->ncolors 
= cnfa
->ncolors
; 
 339         d
->wordsper 
= wordsper
; 
 344         d
->search 
= d
->ssets
; 
 346         /* initialization of sset fields is done as needed */ 
 352  * freedfa - free a DFA 
 355 freedfa(struct dfa 
* d
) 
 359                 if (d
->ssets 
!= NULL
) 
 361                 if (d
->statesarea 
!= NULL
) 
 363                 if (d
->outsarea 
!= NULL
) 
 365                 if (d
->incarea 
!= NULL
) 
 369         if (d
->mallocarea 
!= NULL
) 
 374  * hash - construct a hash code for a bitvector 
 376  * There are probably better ways, but they're more expensive. 
 386         for (i 
= 0; i 
< n
; i
++) 
 392  * initialize - hand-craft a cache entry for startup, otherwise get ready 
 395 initialize(struct vars 
* v
,             /* used only for debug flags */ 
 402         /* is previous one still there? */ 
 403         if (d
->nssused 
> 0 && (d
->ssets
[0].flags 
& STARTER
)) 
 406         {                                                       /* no, must (re)build it */ 
 407                 ss 
= getvacant(v
, d
, start
, start
); 
 408                 for (i 
= 0; i 
< d
->wordsper
; i
++) 
 410                 BSET(ss
->states
, d
->cnfa
->pre
); 
 411                 ss
->hash 
= HASH(ss
->states
, d
->wordsper
); 
 412                 assert(d
->cnfa
->pre 
!= d
->cnfa
->post
); 
 413                 ss
->flags 
= STARTER 
| LOCKED 
| NOPROGRESS
; 
 414                 /* lastseen dealt with below */ 
 417         for (i 
= 0; i 
< d
->nssused
; i
++) 
 418                 d
->ssets
[i
].lastseen 
= NULL
; 
 419         ss
->lastseen 
= start
;           /* maybe untrue, but harmless */ 
 426  * miss - handle a cache miss 
 428 static struct sset 
*                    /* NULL if goes to empty set */ 
 429 miss(struct vars 
* v
,                   /* used only for debug flags */ 
 433          chr 
*cp
,                                       /* next chr */ 
 434          chr 
*start
)                            /* where the attempt got started */ 
 436         struct cnfa 
*cnfa 
= d
->cnfa
; 
 447         /* for convenience, we can be called even if it might not be a miss */ 
 448         if (css
->outs
[co
] != NULL
) 
 451                 return css
->outs
[co
]; 
 455         /* first, what set of states would we end up in? */ 
 456         for (i 
= 0; i 
< d
->wordsper
; i
++) 
 461         for (i 
= 0; i 
< d
->nstates
; i
++) 
 462                 if (ISBSET(css
->states
, i
)) 
 463                         for (ca 
= cnfa
->states
[i
] + 1; ca
->co 
!= COLORLESS
; ca
++) 
 466                                         BSET(d
->work
, ca
->to
); 
 468                                         if (ca
->to 
== cnfa
->post
) 
 470                                         if (!cnfa
->states
[ca
->to
]->co
) 
 472                                         FDEBUG(("%d -> %d\n", i
, ca
->to
)); 
 474         dolacons 
= (gotstate
) ? (cnfa
->flags 
& HASLACONS
) : 0; 
 477         {                                                       /* transitive closure */ 
 479                 for (i 
= 0; i 
< d
->nstates
; i
++) 
 480                         if (ISBSET(d
->work
, i
)) 
 481                                 for (ca 
= cnfa
->states
[i
] + 1; ca
->co 
!= COLORLESS
; 
 484                                         if (ca
->co 
<= cnfa
->ncolors
) 
 485                                                 continue;               /* NOTE CONTINUE */ 
 487                                         if (ISBSET(d
->work
, ca
->to
)) 
 488                                                 continue;               /* NOTE CONTINUE */ 
 489                                         if (!lacon(v
, cnfa
, cp
, ca
->co
)) 
 490                                                 continue;               /* NOTE CONTINUE */ 
 491                                         BSET(d
->work
, ca
->to
); 
 493                                         if (ca
->to 
== cnfa
->post
) 
 495                                         if (!cnfa
->states
[ca
->to
]->co
) 
 497                                         FDEBUG(("%d :> %d\n", i
, ca
->to
)); 
 502         h 
= HASH(d
->work
, d
->wordsper
); 
 504         /* next, is that in the cache? */ 
 505         for (p 
= d
->ssets
, i 
= d
->nssused
; i 
> 0; p
++, i
--) 
 506                 if (HIT(h
, d
->work
, p
, d
->wordsper
)) 
 508                         FDEBUG(("cached c%d\n", p 
- d
->ssets
)); 
 509                         break;                          /* NOTE BREAK OUT */ 
 512         {                                                       /* nope, need a new cache entry */ 
 513                 p 
= getvacant(v
, d
, cp
, start
); 
 515                 for (i 
= 0; i 
< d
->wordsper
; i
++) 
 516                         p
->states
[i
] = d
->work
[i
]; 
 518                 p
->flags 
= (ispost
) ? POSTSTATE 
: 0; 
 520                         p
->flags 
|= NOPROGRESS
; 
 521                 /* lastseen to be dealt with by caller */ 
 525         {                                                       /* lookahead conds. always cache miss */ 
 526                 FDEBUG(("c%d[%d]->c%d\n", css 
- d
->ssets
, co
, p 
- d
->ssets
)); 
 528                 css
->inchain
[co
] = p
->ins
; 
 530                 p
->ins
.co 
= (color
) co
; 
 536  * lacon - lookahead-constraint checker for miss() 
 538 static int                                              /* predicate:  constraint satisfied? */ 
 539 lacon(struct vars 
* v
, 
 540           struct cnfa 
* pcnfa
,          /* parent cnfa */ 
 542           pcolor co
)                            /* "color" of the lookahead constraint */ 
 550         n 
= co 
- pcnfa
->ncolors
; 
 551         assert(n 
< v
->g
->nlacons 
&& v
->g
->lacons 
!= NULL
); 
 552         FDEBUG(("=== testing lacon %d\n", n
)); 
 553         sub 
= &v
->g
->lacons
[n
]; 
 554         d 
= newdfa(v
, &sub
->cnfa
, &v
->g
->cmap
, &sd
); 
 560         end 
= longest(v
, d
, cp
, v
->stop
, (int *) NULL
); 
 562         FDEBUG(("=== lacon %d match %d\n", n
, (end 
!= NULL
))); 
 563         return (sub
->subno
) ? (end 
!= NULL
) : (end 
== NULL
); 
 567  * getvacant - get a vacant state set 
 568  * This routine clears out the inarcs and outarcs, but does not otherwise 
 569  * clear the innards of the state set -- that's up to the caller. 
 572 getvacant(struct vars 
* v
,              /* used only for debug flags */ 
 584         ss 
= pickss(v
, d
, cp
, start
); 
 585         assert(!(ss
->flags 
& LOCKED
)); 
 587         /* clear out its inarcs, including self-referential ones */ 
 589         while ((p 
= ap
.ss
) != NULL
) 
 592                 FDEBUG(("zapping c%d's %ld outarc\n", p 
- d
->ssets
, (long) co
)); 
 595                 p
->inchain
[co
].ss 
= NULL
;               /* paranoia */ 
 599         /* take it off the inarc chains of the ssets reached by its outarcs */ 
 600         for (i 
= 0; i 
< d
->ncolors
; i
++) 
 603                 assert(p 
!= ss
);                /* not self-referential */ 
 605                         continue;                       /* NOTE CONTINUE */ 
 606                 FDEBUG(("del outarc %d from c%d's in chn\n", i
, p 
- d
->ssets
)); 
 607                 if (p
->ins
.ss 
== ss 
&& p
->ins
.co 
== i
) 
 608                         p
->ins 
= ss
->inchain
[i
]; 
 611                         assert(p
->ins
.ss 
!= NULL
); 
 612                         for (ap 
= p
->ins
; ap
.ss 
!= NULL 
&& 
 613                                  !(ap
.ss 
== ss 
&& ap
.co 
== i
); 
 614                                  ap 
= ap
.ss
->inchain
[ap
.co
]) 
 616                         assert(ap
.ss 
!= NULL
); 
 617                         lastap
.ss
->inchain
[lastap
.co
] = ss
->inchain
[i
]; 
 620                 ss
->inchain
[i
].ss 
= NULL
; 
 623         /* if ss was a success state, may need to remember location */ 
 624         if ((ss
->flags 
& POSTSTATE
) && ss
->lastseen 
!= d
->lastpost 
&& 
 625                 (d
->lastpost 
== NULL 
|| d
->lastpost 
< ss
->lastseen
)) 
 626                 d
->lastpost 
= ss
->lastseen
; 
 628         /* likewise for a no-progress state */ 
 629         if ((ss
->flags 
& NOPROGRESS
) && ss
->lastseen 
!= d
->lastnopr 
&& 
 630                 (d
->lastnopr 
== NULL 
|| d
->lastnopr 
< ss
->lastseen
)) 
 631                 d
->lastnopr 
= ss
->lastseen
; 
 637  * pickss - pick the next stateset to be used 
 640 pickss(struct vars 
* v
,                 /* used only for debug flags */ 
 650         /* shortcut for cases where cache isn't full */ 
 651         if (d
->nssused 
< d
->nssets
) 
 656                 FDEBUG(("new c%d\n", i
)); 
 658                 ss
->states 
= &d
->statesarea
[i 
* d
->wordsper
]; 
 661                 ss
->ins
.co 
= WHITE
;             /* give it some value */ 
 662                 ss
->outs 
= &d
->outsarea
[i 
* d
->ncolors
]; 
 663                 ss
->inchain 
= &d
->incarea
[i 
* d
->ncolors
]; 
 664                 for (i 
= 0; i 
< d
->ncolors
; i
++) 
 667                         ss
->inchain
[i
].ss 
= NULL
; 
 672         /* look for oldest, or old enough anyway */ 
 673         if (cp 
- start 
> d
->nssets 
* 2 / 3) /* oldest 33% are expendable */ 
 674                 ancient 
= cp 
- d
->nssets 
* 2 / 3; 
 677         for (ss 
= d
->search
, end 
= &d
->ssets
[d
->nssets
]; ss 
< end
; ss
++) 
 678                 if ((ss
->lastseen 
== NULL 
|| ss
->lastseen 
< ancient
) && 
 679                         !(ss
->flags 
& LOCKED
)) 
 682                         FDEBUG(("replacing c%d\n", ss 
- d
->ssets
)); 
 685         for (ss 
= d
->ssets
, end 
= d
->search
; ss 
< end
; ss
++) 
 686                 if ((ss
->lastseen 
== NULL 
|| ss
->lastseen 
< ancient
) && 
 687                         !(ss
->flags 
& LOCKED
)) 
 690                         FDEBUG(("replacing c%d\n", ss 
- d
->ssets
)); 
 694         /* nobody's old enough?!? -- something's really wrong */ 
 695         FDEBUG(("can't find victim to replace!\n"));