3 * This file is #included by regexec.c.
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 * longest - longest-preferred matching engine
38 static chr
* /* endpoint, or NULL */
39 longest(struct vars
* v
, /* used only for debug and exec flags */
41 chr
*start
, /* where the match should start */
42 chr
*stop
, /* match must end at or before here */
43 int *hitstopp
) /* record whether hit v->stop, if non-NULL */
46 chr
*realstop
= (stop
== v
->stop
) ? stop
: stop
+ 1;
52 struct colormap
*cm
= d
->cm
;
55 css
= initialize(v
, d
, start
);
61 FDEBUG(("+++ startup +++\n"));
64 co
= d
->cnfa
->bos
[(v
->eflags
& REG_NOTBOL
) ? 0 : 1];
65 FDEBUG(("color %ld\n", (long) co
));
69 co
= GETCOLOR(cm
, *(cp
- 1));
70 FDEBUG(("char %c, color %ld\n", (char) *(cp
- 1), (long) co
));
72 css
= miss(v
, d
, css
, co
, cp
, start
);
78 if (v
->eflags
& REG_FTRACE
)
81 FDEBUG(("+++ at c%d +++\n", css
- d
->ssets
));
82 co
= GETCOLOR(cm
, *cp
);
83 FDEBUG(("char %c, color %ld\n", (char) *cp
, (long) co
));
87 ss
= miss(v
, d
, css
, co
, cp
+ 1, start
);
89 break; /* NOTE BREAK OUT */
98 co
= GETCOLOR(cm
, *cp
);
102 ss
= miss(v
, d
, css
, co
, cp
+ 1, start
);
104 break; /* NOTE BREAK OUT */
112 FDEBUG(("+++ shutdown at c%d +++\n", css
- d
->ssets
));
113 if (cp
== v
->stop
&& stop
== v
->stop
)
115 if (hitstopp
!= NULL
)
117 co
= d
->cnfa
->eos
[(v
->eflags
& REG_NOTEOL
) ? 0 : 1];
118 FDEBUG(("color %ld\n", (long) co
));
119 ss
= miss(v
, d
, css
, co
, cp
, start
);
120 /* special case: match ended at eol? */
121 if (ss
!= NULL
&& (ss
->flags
& POSTSTATE
))
124 ss
->lastseen
= cp
; /* to be tidy */
127 /* find last match, if any */
129 for (ss
= d
->ssets
, i
= d
->nssused
; i
> 0; ss
++, i
--)
130 if ((ss
->flags
& POSTSTATE
) && post
!= ss
->lastseen
&&
131 (post
== NULL
|| post
< ss
->lastseen
))
133 if (post
!= NULL
) /* found one */
140 * shortest - shortest-preferred matching engine
142 static chr
* /* endpoint, or NULL */
143 shortest(struct vars
* v
,
145 chr
*start
, /* where the match should start */
146 chr
*min
, /* match must end at or after here */
147 chr
*max
, /* match must end at or before here */
148 chr
**coldp
, /* store coldstart pointer here, if
150 int *hitstopp
) /* record whether hit v->stop, if non-NULL */
153 chr
*realmin
= (min
== v
->stop
) ? min
: min
+ 1;
154 chr
*realmax
= (max
== v
->stop
) ? max
: max
+ 1;
158 struct colormap
*cm
= d
->cm
;
161 css
= initialize(v
, d
, start
);
163 if (hitstopp
!= NULL
)
167 FDEBUG(("--- startup ---\n"));
170 co
= d
->cnfa
->bos
[(v
->eflags
& REG_NOTBOL
) ? 0 : 1];
171 FDEBUG(("color %ld\n", (long) co
));
175 co
= GETCOLOR(cm
, *(cp
- 1));
176 FDEBUG(("char %c, color %ld\n", (char) *(cp
- 1), (long) co
));
178 css
= miss(v
, d
, css
, co
, cp
, start
);
185 if (v
->eflags
& REG_FTRACE
)
188 FDEBUG(("--- at c%d ---\n", css
- d
->ssets
));
189 co
= GETCOLOR(cm
, *cp
);
190 FDEBUG(("char %c, color %ld\n", (char) *cp
, (long) co
));
194 ss
= miss(v
, d
, css
, co
, cp
+ 1, start
);
196 break; /* NOTE BREAK OUT */
201 if ((ss
->flags
& POSTSTATE
) && cp
>= realmin
)
202 break; /* NOTE BREAK OUT */
207 co
= GETCOLOR(cm
, *cp
);
211 ss
= miss(v
, d
, css
, co
, cp
+ 1, start
);
213 break; /* NOTE BREAK OUT */
218 if ((ss
->flags
& POSTSTATE
) && cp
>= realmin
)
219 break; /* NOTE BREAK OUT */
225 if (coldp
!= NULL
) /* report last no-progress state set, if
227 *coldp
= lastcold(v
, d
);
229 if ((ss
->flags
& POSTSTATE
) && cp
> min
)
231 assert(cp
>= realmin
);
234 else if (cp
== v
->stop
&& max
== v
->stop
)
236 co
= d
->cnfa
->eos
[(v
->eflags
& REG_NOTEOL
) ? 0 : 1];
237 FDEBUG(("color %ld\n", (long) co
));
238 ss
= miss(v
, d
, css
, co
, cp
, start
);
239 /* match might have ended at eol */
240 if ((ss
== NULL
|| !(ss
->flags
& POSTSTATE
)) && hitstopp
!= NULL
)
244 if (ss
== NULL
|| !(ss
->flags
& POSTSTATE
))
251 * lastcold - determine last point at which no progress had been made
253 static chr
* /* endpoint, or NULL */
254 lastcold(struct vars
* v
,
264 for (ss
= d
->ssets
, i
= d
->nssused
; i
> 0; ss
++, i
--)
265 if ((ss
->flags
& NOPROGRESS
) && nopr
< ss
->lastseen
)
271 * newdfa - set up a fresh DFA
274 newdfa(struct vars
* v
,
276 struct colormap
* cm
,
277 struct smalldfa
* small
) /* preallocated space, may be NULL */
280 size_t nss
= cnfa
->nstates
* 2;
281 int wordsper
= (cnfa
->nstates
+ UBITS
- 1) / UBITS
;
282 struct smalldfa
*smallwas
= small
;
284 assert(cnfa
!= NULL
&& cnfa
->nstates
!= 0);
286 if (nss
<= FEWSTATES
&& cnfa
->ncolors
<= FEWCOLORS
)
288 assert(wordsper
== 1);
291 small
= (struct smalldfa
*) MALLOC(
292 sizeof(struct smalldfa
));
300 d
->ssets
= small
->ssets
;
301 d
->statesarea
= small
->statesarea
;
302 d
->work
= &d
->statesarea
[nss
];
303 d
->outsarea
= small
->outsarea
;
304 d
->incarea
= small
->incarea
;
306 d
->mallocarea
= (smallwas
== NULL
) ? (char *) small
: NULL
;
310 d
= (struct dfa
*) MALLOC(sizeof(struct dfa
));
316 d
->ssets
= (struct sset
*) MALLOC(nss
* sizeof(struct sset
));
317 d
->statesarea
= (unsigned *) MALLOC((nss
+ WORK
) * wordsper
*
319 d
->work
= &d
->statesarea
[nss
* wordsper
];
320 d
->outsarea
= (struct sset
**) MALLOC(nss
* cnfa
->ncolors
*
321 sizeof(struct sset
*));
322 d
->incarea
= (struct arcp
*) MALLOC(nss
* cnfa
->ncolors
*
323 sizeof(struct arcp
));
325 d
->mallocarea
= (char *) d
;
326 if (d
->ssets
== NULL
|| d
->statesarea
== NULL
||
327 d
->outsarea
== NULL
|| d
->incarea
== NULL
)
335 d
->nssets
= (v
->eflags
& REG_SMALL
) ? 7 : nss
;
337 d
->nstates
= cnfa
->nstates
;
338 d
->ncolors
= cnfa
->ncolors
;
339 d
->wordsper
= wordsper
;
344 d
->search
= d
->ssets
;
346 /* initialization of sset fields is done as needed */
352 * freedfa - free a DFA
355 freedfa(struct dfa
* d
)
359 if (d
->ssets
!= NULL
)
361 if (d
->statesarea
!= NULL
)
363 if (d
->outsarea
!= NULL
)
365 if (d
->incarea
!= NULL
)
369 if (d
->mallocarea
!= NULL
)
374 * hash - construct a hash code for a bitvector
376 * There are probably better ways, but they're more expensive.
386 for (i
= 0; i
< n
; i
++)
392 * initialize - hand-craft a cache entry for startup, otherwise get ready
395 initialize(struct vars
* v
, /* used only for debug flags */
402 /* is previous one still there? */
403 if (d
->nssused
> 0 && (d
->ssets
[0].flags
& STARTER
))
406 { /* no, must (re)build it */
407 ss
= getvacant(v
, d
, start
, start
);
408 for (i
= 0; i
< d
->wordsper
; i
++)
410 BSET(ss
->states
, d
->cnfa
->pre
);
411 ss
->hash
= HASH(ss
->states
, d
->wordsper
);
412 assert(d
->cnfa
->pre
!= d
->cnfa
->post
);
413 ss
->flags
= STARTER
| LOCKED
| NOPROGRESS
;
414 /* lastseen dealt with below */
417 for (i
= 0; i
< d
->nssused
; i
++)
418 d
->ssets
[i
].lastseen
= NULL
;
419 ss
->lastseen
= start
; /* maybe untrue, but harmless */
426 * miss - handle a cache miss
428 static struct sset
* /* NULL if goes to empty set */
429 miss(struct vars
* v
, /* used only for debug flags */
433 chr
*cp
, /* next chr */
434 chr
*start
) /* where the attempt got started */
436 struct cnfa
*cnfa
= d
->cnfa
;
447 /* for convenience, we can be called even if it might not be a miss */
448 if (css
->outs
[co
] != NULL
)
451 return css
->outs
[co
];
455 /* first, what set of states would we end up in? */
456 for (i
= 0; i
< d
->wordsper
; i
++)
461 for (i
= 0; i
< d
->nstates
; i
++)
462 if (ISBSET(css
->states
, i
))
463 for (ca
= cnfa
->states
[i
] + 1; ca
->co
!= COLORLESS
; ca
++)
466 BSET(d
->work
, ca
->to
);
468 if (ca
->to
== cnfa
->post
)
470 if (!cnfa
->states
[ca
->to
]->co
)
472 FDEBUG(("%d -> %d\n", i
, ca
->to
));
474 dolacons
= (gotstate
) ? (cnfa
->flags
& HASLACONS
) : 0;
477 { /* transitive closure */
479 for (i
= 0; i
< d
->nstates
; i
++)
480 if (ISBSET(d
->work
, i
))
481 for (ca
= cnfa
->states
[i
] + 1; ca
->co
!= COLORLESS
;
484 if (ca
->co
<= cnfa
->ncolors
)
485 continue; /* NOTE CONTINUE */
487 if (ISBSET(d
->work
, ca
->to
))
488 continue; /* NOTE CONTINUE */
489 if (!lacon(v
, cnfa
, cp
, ca
->co
))
490 continue; /* NOTE CONTINUE */
491 BSET(d
->work
, ca
->to
);
493 if (ca
->to
== cnfa
->post
)
495 if (!cnfa
->states
[ca
->to
]->co
)
497 FDEBUG(("%d :> %d\n", i
, ca
->to
));
502 h
= HASH(d
->work
, d
->wordsper
);
504 /* next, is that in the cache? */
505 for (p
= d
->ssets
, i
= d
->nssused
; i
> 0; p
++, i
--)
506 if (HIT(h
, d
->work
, p
, d
->wordsper
))
508 FDEBUG(("cached c%d\n", p
- d
->ssets
));
509 break; /* NOTE BREAK OUT */
512 { /* nope, need a new cache entry */
513 p
= getvacant(v
, d
, cp
, start
);
515 for (i
= 0; i
< d
->wordsper
; i
++)
516 p
->states
[i
] = d
->work
[i
];
518 p
->flags
= (ispost
) ? POSTSTATE
: 0;
520 p
->flags
|= NOPROGRESS
;
521 /* lastseen to be dealt with by caller */
525 { /* lookahead conds. always cache miss */
526 FDEBUG(("c%d[%d]->c%d\n", css
- d
->ssets
, co
, p
- d
->ssets
));
528 css
->inchain
[co
] = p
->ins
;
530 p
->ins
.co
= (color
) co
;
536 * lacon - lookahead-constraint checker for miss()
538 static int /* predicate: constraint satisfied? */
539 lacon(struct vars
* v
,
540 struct cnfa
* pcnfa
, /* parent cnfa */
542 pcolor co
) /* "color" of the lookahead constraint */
550 n
= co
- pcnfa
->ncolors
;
551 assert(n
< v
->g
->nlacons
&& v
->g
->lacons
!= NULL
);
552 FDEBUG(("=== testing lacon %d\n", n
));
553 sub
= &v
->g
->lacons
[n
];
554 d
= newdfa(v
, &sub
->cnfa
, &v
->g
->cmap
, &sd
);
560 end
= longest(v
, d
, cp
, v
->stop
, (int *) NULL
);
562 FDEBUG(("=== lacon %d match %d\n", n
, (end
!= NULL
)));
563 return (sub
->subno
) ? (end
!= NULL
) : (end
== NULL
);
567 * getvacant - get a vacant state set
568 * This routine clears out the inarcs and outarcs, but does not otherwise
569 * clear the innards of the state set -- that's up to the caller.
572 getvacant(struct vars
* v
, /* used only for debug flags */
584 ss
= pickss(v
, d
, cp
, start
);
585 assert(!(ss
->flags
& LOCKED
));
587 /* clear out its inarcs, including self-referential ones */
589 while ((p
= ap
.ss
) != NULL
)
592 FDEBUG(("zapping c%d's %ld outarc\n", p
- d
->ssets
, (long) co
));
595 p
->inchain
[co
].ss
= NULL
; /* paranoia */
599 /* take it off the inarc chains of the ssets reached by its outarcs */
600 for (i
= 0; i
< d
->ncolors
; i
++)
603 assert(p
!= ss
); /* not self-referential */
605 continue; /* NOTE CONTINUE */
606 FDEBUG(("del outarc %d from c%d's in chn\n", i
, p
- d
->ssets
));
607 if (p
->ins
.ss
== ss
&& p
->ins
.co
== i
)
608 p
->ins
= ss
->inchain
[i
];
611 assert(p
->ins
.ss
!= NULL
);
612 for (ap
= p
->ins
; ap
.ss
!= NULL
&&
613 !(ap
.ss
== ss
&& ap
.co
== i
);
614 ap
= ap
.ss
->inchain
[ap
.co
])
616 assert(ap
.ss
!= NULL
);
617 lastap
.ss
->inchain
[lastap
.co
] = ss
->inchain
[i
];
620 ss
->inchain
[i
].ss
= NULL
;
623 /* if ss was a success state, may need to remember location */
624 if ((ss
->flags
& POSTSTATE
) && ss
->lastseen
!= d
->lastpost
&&
625 (d
->lastpost
== NULL
|| d
->lastpost
< ss
->lastseen
))
626 d
->lastpost
= ss
->lastseen
;
628 /* likewise for a no-progress state */
629 if ((ss
->flags
& NOPROGRESS
) && ss
->lastseen
!= d
->lastnopr
&&
630 (d
->lastnopr
== NULL
|| d
->lastnopr
< ss
->lastseen
))
631 d
->lastnopr
= ss
->lastseen
;
637 * pickss - pick the next stateset to be used
640 pickss(struct vars
* v
, /* used only for debug flags */
650 /* shortcut for cases where cache isn't full */
651 if (d
->nssused
< d
->nssets
)
656 FDEBUG(("new c%d\n", i
));
658 ss
->states
= &d
->statesarea
[i
* d
->wordsper
];
661 ss
->ins
.co
= WHITE
; /* give it some value */
662 ss
->outs
= &d
->outsarea
[i
* d
->ncolors
];
663 ss
->inchain
= &d
->incarea
[i
* d
->ncolors
];
664 for (i
= 0; i
< d
->ncolors
; i
++)
667 ss
->inchain
[i
].ss
= NULL
;
672 /* look for oldest, or old enough anyway */
673 if (cp
- start
> d
->nssets
* 2 / 3) /* oldest 33% are expendable */
674 ancient
= cp
- d
->nssets
* 2 / 3;
677 for (ss
= d
->search
, end
= &d
->ssets
[d
->nssets
]; ss
< end
; ss
++)
678 if ((ss
->lastseen
== NULL
|| ss
->lastseen
< ancient
) &&
679 !(ss
->flags
& LOCKED
))
682 FDEBUG(("replacing c%d\n", ss
- d
->ssets
));
685 for (ss
= d
->ssets
, end
= d
->search
; ss
< end
; ss
++)
686 if ((ss
->lastseen
== NULL
|| ss
->lastseen
< ancient
) &&
687 !(ss
->flags
& LOCKED
))
690 FDEBUG(("replacing c%d\n", ss
- d
->ssets
));
694 /* nobody's old enough?!? -- something's really wrong */
695 FDEBUG(("can't find victim to replace!\n"));