]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/rege_dfa.c
3 * This file is #included by regexec.c.
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 - longest - longest-preferred matching engine
35 ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
37 static chr
* /* endpoint, or NULL */
38 longest(v
, d
, start
, stop
, hitstopp
)
39 struct vars
*v
; /* used only for debug and exec flags */
41 chr
*start
; /* where the match should start */
42 chr
*stop
; /* match must end at or before here */
43 int *hitstopp
; /* record whether hit v->stop, if non-NULL */
46 chr
*realstop
= (stop
== v
->stop
) ? stop
: stop
+ 1;
52 struct colormap
*cm
= d
->cm
;
55 css
= initialize(v
, d
, start
);
61 FDEBUG(("+++ startup +++\n"));
63 co
= d
->cnfa
->bos
[(v
->eflags
®_NOTBOL
) ? 0 : 1];
64 FDEBUG(("color %ld\n", (long)co
));
66 co
= GETCOLOR(cm
, *(cp
- 1));
67 FDEBUG(("char %c, color %ld\n", (char)*(cp
-1), (long)co
));
69 css
= miss(v
, d
, css
, co
, cp
, start
);
75 if (v
->eflags
®_FTRACE
)
76 while (cp
< realstop
) {
77 FDEBUG(("+++ at c%d +++\n", css
- d
->ssets
));
78 co
= GETCOLOR(cm
, *cp
);
79 FDEBUG(("char %c, color %ld\n", (char)*cp
, (long)co
));
82 ss
= miss(v
, d
, css
, co
, cp
+1, start
);
84 break; /* NOTE BREAK OUT */
91 while (cp
< realstop
) {
92 co
= GETCOLOR(cm
, *cp
);
95 ss
= miss(v
, d
, css
, co
, cp
+1, start
);
97 break; /* NOTE BREAK OUT */
105 FDEBUG(("+++ shutdown at c%d +++\n", css
- d
->ssets
));
106 if (cp
== v
->stop
&& stop
== v
->stop
) {
107 if (hitstopp
!= NULL
)
109 co
= d
->cnfa
->eos
[(v
->eflags
®_NOTEOL
) ? 0 : 1];
110 FDEBUG(("color %ld\n", (long)co
));
111 ss
= miss(v
, d
, css
, co
, cp
, start
);
112 /* special case: match ended at eol? */
113 if (ss
!= NULL
&& (ss
->flags
&POSTSTATE
))
116 ss
->lastseen
= cp
; /* to be tidy */
119 /* find last match, if any */
121 for (ss
= d
->ssets
, i
= d
->nssused
; i
> 0; ss
++, i
--)
122 if ((ss
->flags
&POSTSTATE
) && post
!= ss
->lastseen
&&
123 (post
== NULL
|| post
< ss
->lastseen
))
125 if (post
!= NULL
) /* found one */
132 - shortest - shortest-preferred matching engine
133 ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *,
136 static chr
* /* endpoint, or NULL */
137 shortest(v
, d
, start
, min
, max
, coldp
, hitstopp
)
140 chr
*start
; /* where the match should start */
141 chr
*min
; /* match must end at or after here */
142 chr
*max
; /* match must end at or before here */
143 chr
**coldp
; /* store coldstart pointer here, if nonNULL */
144 int *hitstopp
; /* record whether hit v->stop, if non-NULL */
147 chr
*realmin
= (min
== v
->stop
) ? min
: min
+ 1;
148 chr
*realmax
= (max
== v
->stop
) ? max
: max
+ 1;
152 struct colormap
*cm
= d
->cm
;
155 css
= initialize(v
, d
, start
);
157 if (hitstopp
!= NULL
)
161 FDEBUG(("--- startup ---\n"));
162 if (cp
== v
->start
) {
163 co
= d
->cnfa
->bos
[(v
->eflags
®_NOTBOL
) ? 0 : 1];
164 FDEBUG(("color %ld\n", (long)co
));
166 co
= GETCOLOR(cm
, *(cp
- 1));
167 FDEBUG(("char %c, color %ld\n", (char)*(cp
-1), (long)co
));
169 css
= miss(v
, d
, css
, co
, cp
, start
);
176 if (v
->eflags
®_FTRACE
)
177 while (cp
< realmax
) {
178 FDEBUG(("--- at c%d ---\n", css
- d
->ssets
));
179 co
= GETCOLOR(cm
, *cp
);
180 FDEBUG(("char %c, color %ld\n", (char)*cp
, (long)co
));
183 ss
= miss(v
, d
, css
, co
, cp
+1, start
);
185 break; /* NOTE BREAK OUT */
190 if ((ss
->flags
&POSTSTATE
) && cp
>= realmin
)
191 break; /* NOTE BREAK OUT */
194 while (cp
< realmax
) {
195 co
= GETCOLOR(cm
, *cp
);
198 ss
= miss(v
, d
, css
, co
, cp
+1, start
);
200 break; /* NOTE BREAK OUT */
205 if ((ss
->flags
&POSTSTATE
) && cp
>= realmin
)
206 break; /* NOTE BREAK OUT */
212 if (coldp
!= NULL
) /* report last no-progress state set, if any */
213 *coldp
= lastcold(v
, d
);
215 if ((ss
->flags
&POSTSTATE
) && cp
> min
) {
216 assert(cp
>= realmin
);
218 } else if (cp
== v
->stop
&& max
== v
->stop
) {
219 co
= d
->cnfa
->eos
[(v
->eflags
®_NOTEOL
) ? 0 : 1];
220 FDEBUG(("color %ld\n", (long)co
));
221 ss
= miss(v
, d
, css
, co
, cp
, start
);
222 /* match might have ended at eol */
223 if ((ss
== NULL
|| !(ss
->flags
&POSTSTATE
)) && hitstopp
!= NULL
)
227 if (ss
== NULL
|| !(ss
->flags
&POSTSTATE
))
234 - lastcold - determine last point at which no progress had been made
235 ^ static chr *lastcold(struct vars *, struct dfa *);
237 static chr
* /* endpoint, or NULL */
249 for (ss
= d
->ssets
, i
= d
->nssused
; i
> 0; ss
++, i
--)
250 if ((ss
->flags
&NOPROGRESS
) && nopr
< ss
->lastseen
)
256 - newdfa - set up a fresh DFA
257 ^ static struct dfa *newdfa(struct vars *, struct cnfa *,
258 ^ struct colormap *, struct smalldfa *);
261 newdfa(v
, cnfa
, cm
, small
)
265 struct smalldfa
*small
; /* preallocated space, may be NULL */
268 size_t nss
= cnfa
->nstates
* 2;
269 int wordsper
= (cnfa
->nstates
+ UBITS
- 1) / UBITS
;
270 struct smalldfa
*smallwas
= small
;
272 assert(cnfa
!= NULL
&& cnfa
->nstates
!= 0);
274 if (nss
<= FEWSTATES
&& cnfa
->ncolors
<= FEWCOLORS
) {
275 assert(wordsper
== 1);
277 small
= (struct smalldfa
*)MALLOC(
278 sizeof(struct smalldfa
));
285 d
->ssets
= small
->ssets
;
286 d
->statesarea
= small
->statesarea
;
287 d
->work
= &d
->statesarea
[nss
];
288 d
->outsarea
= small
->outsarea
;
289 d
->incarea
= small
->incarea
;
291 d
->mallocarea
= (smallwas
== NULL
) ? (char *)small
: NULL
;
293 d
= (struct dfa
*)MALLOC(sizeof(struct dfa
));
298 d
->ssets
= (struct sset
*)MALLOC(nss
* sizeof(struct sset
));
299 d
->statesarea
= (unsigned *)MALLOC((nss
+WORK
) * wordsper
*
301 d
->work
= &d
->statesarea
[nss
* wordsper
];
302 d
->outsarea
= (struct sset
**)MALLOC(nss
* cnfa
->ncolors
*
303 sizeof(struct sset
*));
304 d
->incarea
= (struct arcp
*)MALLOC(nss
* cnfa
->ncolors
*
305 sizeof(struct arcp
));
307 d
->mallocarea
= (char *)d
;
308 if (d
->ssets
== NULL
|| d
->statesarea
== NULL
||
309 d
->outsarea
== NULL
|| d
->incarea
== NULL
) {
316 d
->nssets
= (v
->eflags
®_SMALL
) ? 7 : nss
;
318 d
->nstates
= cnfa
->nstates
;
319 d
->ncolors
= cnfa
->ncolors
;
320 d
->wordsper
= wordsper
;
325 d
->search
= d
->ssets
;
327 /* initialization of sset fields is done as needed */
333 - freedfa - free a DFA
334 ^ static VOID freedfa(struct dfa *);
340 if (d
->cptsmalloced
) {
341 if (d
->ssets
!= NULL
)
343 if (d
->statesarea
!= NULL
)
345 if (d
->outsarea
!= NULL
)
347 if (d
->incarea
!= NULL
)
351 if (d
->mallocarea
!= NULL
)
356 - hash - construct a hash code for a bitvector
357 * There are probably better ways, but they're more expensive.
358 ^ static unsigned hash(unsigned *, int);
369 for (i
= 0; i
< n
; i
++)
375 - initialize - hand-craft a cache entry for startup, otherwise get ready
376 ^ static struct sset *initialize(struct vars *, struct dfa *, chr *);
379 initialize(v
, d
, start
)
380 struct vars
*v
; /* used only for debug flags */
387 /* is previous one still there? */
388 if (d
->nssused
> 0 && (d
->ssets
[0].flags
&STARTER
))
390 else { /* no, must (re)build it */
391 ss
= getvacant(v
, d
, start
, start
);
392 for (i
= 0; i
< d
->wordsper
; i
++)
394 BSET(ss
->states
, d
->cnfa
->pre
);
395 ss
->hash
= HASH(ss
->states
, d
->wordsper
);
396 assert(d
->cnfa
->pre
!= d
->cnfa
->post
);
397 ss
->flags
= STARTER
|LOCKED
|NOPROGRESS
;
398 /* lastseen dealt with below */
401 for (i
= 0; i
< d
->nssused
; i
++)
402 d
->ssets
[i
].lastseen
= NULL
;
403 ss
->lastseen
= start
; /* maybe untrue, but harmless */
410 - miss - handle a cache miss
411 ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *,
412 ^ pcolor, chr *, chr *);
414 static struct sset
* /* NULL if goes to empty set */
415 miss(v
, d
, css
, co
, cp
, start
)
416 struct vars
*v
; /* used only for debug flags */
420 chr
*cp
; /* next chr */
421 chr
*start
; /* where the attempt got started */
423 struct cnfa
*cnfa
= d
->cnfa
;
434 /* for convenience, we can be called even if it might not be a miss */
435 if (css
->outs
[co
] != NULL
) {
437 return css
->outs
[co
];
441 /* first, what set of states would we end up in? */
442 for (i
= 0; i
< d
->wordsper
; i
++)
447 for (i
= 0; i
< d
->nstates
; i
++)
448 if (ISBSET(css
->states
, i
))
449 for (ca
= cnfa
->states
[i
]+1; ca
->co
!= COLORLESS
; ca
++)
451 BSET(d
->work
, ca
->to
);
453 if (ca
->to
== cnfa
->post
)
455 if (!cnfa
->states
[ca
->to
]->co
)
457 FDEBUG(("%d -> %d\n", i
, ca
->to
));
459 dolacons
= (gotstate
) ? (cnfa
->flags
&HASLACONS
) : 0;
461 while (dolacons
) { /* transitive closure */
463 for (i
= 0; i
< d
->nstates
; i
++)
464 if (ISBSET(d
->work
, i
))
465 for (ca
= cnfa
->states
[i
]+1; ca
->co
!= COLORLESS
;
467 if (ca
->co
<= cnfa
->ncolors
)
468 continue; /* NOTE CONTINUE */
470 if (ISBSET(d
->work
, ca
->to
))
471 continue; /* NOTE CONTINUE */
472 if (!lacon(v
, cnfa
, cp
, ca
->co
))
473 continue; /* NOTE CONTINUE */
474 BSET(d
->work
, ca
->to
);
476 if (ca
->to
== cnfa
->post
)
478 if (!cnfa
->states
[ca
->to
]->co
)
480 FDEBUG(("%d :> %d\n", i
, ca
->to
));
485 h
= HASH(d
->work
, d
->wordsper
);
487 /* next, is that in the cache? */
488 for (p
= d
->ssets
, i
= d
->nssused
; i
> 0; p
++, i
--)
489 if (HIT(h
, d
->work
, p
, d
->wordsper
)) {
490 FDEBUG(("cached c%d\n", p
- d
->ssets
));
491 break; /* NOTE BREAK OUT */
493 if (i
== 0) { /* nope, need a new cache entry */
494 p
= getvacant(v
, d
, cp
, start
);
496 for (i
= 0; i
< d
->wordsper
; i
++)
497 p
->states
[i
] = d
->work
[i
];
499 p
->flags
= (ispost
) ? POSTSTATE
: 0;
501 p
->flags
|= NOPROGRESS
;
502 /* lastseen to be dealt with by caller */
505 if (!sawlacons
) { /* lookahead conds. always cache miss */
506 FDEBUG(("c%d[%d]->c%d\n", css
- d
->ssets
, co
, p
- d
->ssets
));
508 css
->inchain
[co
] = p
->ins
;
510 p
->ins
.co
= (color
)co
;
516 - lacon - lookahead-constraint checker for miss()
517 ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
519 static int /* predicate: constraint satisfied? */
520 lacon(v
, pcnfa
, cp
, co
)
522 struct cnfa
*pcnfa
; /* parent cnfa */
524 pcolor co
; /* "color" of the lookahead constraint */
532 n
= co
- pcnfa
->ncolors
;
533 assert(n
< v
->g
->nlacons
&& v
->g
->lacons
!= NULL
);
534 FDEBUG(("=== testing lacon %d\n", n
));
535 sub
= &v
->g
->lacons
[n
];
536 d
= newdfa(v
, &sub
->cnfa
, &v
->g
->cmap
, &sd
);
541 end
= longest(v
, d
, cp
, v
->stop
, (int *)NULL
);
543 FDEBUG(("=== lacon %d match %d\n", n
, (end
!= NULL
)));
544 return (sub
->subno
) ? (end
!= NULL
) : (end
== NULL
);
548 - getvacant - get a vacant state set
549 * This routine clears out the inarcs and outarcs, but does not otherwise
550 * clear the innards of the state set -- that's up to the caller.
551 ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
554 getvacant(v
, d
, cp
, start
)
555 struct vars
*v
; /* used only for debug flags */
567 ss
= pickss(v
, d
, cp
, start
);
568 assert(!(ss
->flags
&LOCKED
));
570 /* clear out its inarcs, including self-referential ones */
572 while ((p
= ap
.ss
) != NULL
) {
574 FDEBUG(("zapping c%d's %ld outarc\n", p
- d
->ssets
, (long)co
));
577 p
->inchain
[co
].ss
= NULL
; /* paranoia */
581 /* take it off the inarc chains of the ssets reached by its outarcs */
582 for (i
= 0; i
< d
->ncolors
; i
++) {
584 assert(p
!= ss
); /* not self-referential */
586 continue; /* NOTE CONTINUE */
587 FDEBUG(("del outarc %d from c%d's in chn\n", i
, p
- d
->ssets
));
588 if (p
->ins
.ss
== ss
&& p
->ins
.co
== i
)
589 p
->ins
= ss
->inchain
[i
];
591 assert(p
->ins
.ss
!= NULL
);
592 for (ap
= p
->ins
; ap
.ss
!= NULL
&&
593 !(ap
.ss
== ss
&& ap
.co
== i
);
594 ap
= ap
.ss
->inchain
[ap
.co
])
596 assert(ap
.ss
!= NULL
);
597 lastap
.ss
->inchain
[lastap
.co
] = ss
->inchain
[i
];
600 ss
->inchain
[i
].ss
= NULL
;
603 /* if ss was a success state, may need to remember location */
604 if ((ss
->flags
&POSTSTATE
) && ss
->lastseen
!= d
->lastpost
&&
605 (d
->lastpost
== NULL
|| d
->lastpost
< ss
->lastseen
))
606 d
->lastpost
= ss
->lastseen
;
608 /* likewise for a no-progress state */
609 if ((ss
->flags
&NOPROGRESS
) && ss
->lastseen
!= d
->lastnopr
&&
610 (d
->lastnopr
== NULL
|| d
->lastnopr
< ss
->lastseen
))
611 d
->lastnopr
= ss
->lastseen
;
617 - pickss - pick the next stateset to be used
618 ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
621 pickss(v
, d
, cp
, start
)
622 struct vars
*v
; /* used only for debug flags */
632 /* shortcut for cases where cache isn't full */
633 if (d
->nssused
< d
->nssets
) {
637 FDEBUG(("new c%d\n", i
));
639 ss
->states
= &d
->statesarea
[i
* d
->wordsper
];
642 ss
->ins
.co
= WHITE
; /* give it some value */
643 ss
->outs
= &d
->outsarea
[i
* d
->ncolors
];
644 ss
->inchain
= &d
->incarea
[i
* d
->ncolors
];
645 for (i
= 0; i
< d
->ncolors
; i
++) {
647 ss
->inchain
[i
].ss
= NULL
;
652 /* look for oldest, or old enough anyway */
653 if (cp
- start
> d
->nssets
*2/3) /* oldest 33% are expendable */
654 ancient
= cp
- d
->nssets
*2/3;
657 for (ss
= d
->search
, end
= &d
->ssets
[d
->nssets
]; ss
< end
; ss
++)
658 if ((ss
->lastseen
== NULL
|| ss
->lastseen
< ancient
) &&
659 !(ss
->flags
&LOCKED
)) {
661 FDEBUG(("replacing c%d\n", ss
- d
->ssets
));
664 for (ss
= d
->ssets
, end
= d
->search
; ss
< end
; ss
++)
665 if ((ss
->lastseen
== NULL
|| ss
->lastseen
< ancient
) &&
666 !(ss
->flags
&LOCKED
)) {
668 FDEBUG(("replacing c%d\n", ss
- d
->ssets
));
672 /* nobody's old enough?!? -- something's really wrong */
673 FDEBUG(("can't find victim to replace!\n"));