]>
Commit | Line | Data |
---|---|---|
48bf5def RN |
1 | /* |
2 | * DFA routines | |
3 | * This file is #included by regexec.c. | |
4 | * | |
3ca4086b VS |
5 | * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. |
6 | * | |
48bf5def RN |
7 | * Development of this software was funded, in part, by Cray Research Inc., |
8 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics | |
9 | * Corporation, none of whom are responsible for the results. The author | |
3ca4086b VS |
10 | * thanks all of them. |
11 | * | |
48bf5def RN |
12 | * Redistribution and use in source and binary forms -- with or without |
13 | * modification -- are permitted for any purpose, provided that | |
14 | * redistributions in source form retain this entire copyright notice and | |
15 | * indicate the origin and nature of any modifications. | |
3ca4086b | 16 | * |
48bf5def RN |
17 | * I'd appreciate being given credit for this package in the documentation |
18 | * of software which uses it, but that is not a requirement. | |
3ca4086b | 19 | * |
48bf5def RN |
20 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
21 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
22 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL | |
23 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
26 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
27 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
28 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
29 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 | * | |
48bf5def RN |
31 | */ |
32 | ||
33 | /* | |
3ca4086b VS |
34 | - longest - longest-preferred matching engine |
35 | ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); | |
48bf5def | 36 | */ |
3ca4086b VS |
37 | static chr * /* endpoint, or NULL */ |
38 | longest(v, d, start, stop, hitstopp) | |
39 | struct vars *v; /* used only for debug and exec flags */ | |
40 | struct dfa *d; | |
41 | chr *start; /* where the match should start */ | |
42 | chr *stop; /* match must end at or before here */ | |
43 | int *hitstopp; /* record whether hit v->stop, if non-NULL */ | |
48bf5def | 44 | { |
3ca4086b VS |
45 | chr *cp; |
46 | chr *realstop = (stop == v->stop) ? stop : stop + 1; | |
47 | color co; | |
48bf5def RN |
48 | struct sset *css; |
49 | struct sset *ss; | |
3ca4086b VS |
50 | chr *post; |
51 | int i; | |
48bf5def RN |
52 | struct colormap *cm = d->cm; |
53 | ||
54 | /* initialize */ | |
55 | css = initialize(v, d, start); | |
56 | cp = start; | |
57 | if (hitstopp != NULL) | |
58 | *hitstopp = 0; | |
59 | ||
60 | /* startup */ | |
61 | FDEBUG(("+++ startup +++\n")); | |
3ca4086b VS |
62 | if (cp == v->start) { |
63 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; | |
64 | FDEBUG(("color %ld\n", (long)co)); | |
65 | } else { | |
48bf5def | 66 | co = GETCOLOR(cm, *(cp - 1)); |
3ca4086b | 67 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); |
48bf5def RN |
68 | } |
69 | css = miss(v, d, css, co, cp, start); | |
70 | if (css == NULL) | |
71 | return NULL; | |
72 | css->lastseen = cp; | |
73 | ||
74 | /* main loop */ | |
3ca4086b VS |
75 | if (v->eflags®_FTRACE) |
76 | while (cp < realstop) { | |
48bf5def RN |
77 | FDEBUG(("+++ at c%d +++\n", css - d->ssets)); |
78 | co = GETCOLOR(cm, *cp); | |
3ca4086b | 79 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); |
48bf5def | 80 | ss = css->outs[co]; |
3ca4086b VS |
81 | if (ss == NULL) { |
82 | ss = miss(v, d, css, co, cp+1, start); | |
48bf5def | 83 | if (ss == NULL) |
3ca4086b | 84 | break; /* NOTE BREAK OUT */ |
48bf5def RN |
85 | } |
86 | cp++; | |
87 | ss->lastseen = cp; | |
88 | css = ss; | |
89 | } | |
90 | else | |
3ca4086b | 91 | while (cp < realstop) { |
48bf5def RN |
92 | co = GETCOLOR(cm, *cp); |
93 | ss = css->outs[co]; | |
3ca4086b VS |
94 | if (ss == NULL) { |
95 | ss = miss(v, d, css, co, cp+1, start); | |
48bf5def | 96 | if (ss == NULL) |
3ca4086b | 97 | break; /* NOTE BREAK OUT */ |
48bf5def RN |
98 | } |
99 | cp++; | |
100 | ss->lastseen = cp; | |
101 | css = ss; | |
102 | } | |
103 | ||
104 | /* shutdown */ | |
105 | FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); | |
3ca4086b | 106 | if (cp == v->stop && stop == v->stop) { |
48bf5def RN |
107 | if (hitstopp != NULL) |
108 | *hitstopp = 1; | |
3ca4086b VS |
109 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; |
110 | FDEBUG(("color %ld\n", (long)co)); | |
48bf5def RN |
111 | ss = miss(v, d, css, co, cp, start); |
112 | /* special case: match ended at eol? */ | |
3ca4086b | 113 | if (ss != NULL && (ss->flags&POSTSTATE)) |
48bf5def RN |
114 | return cp; |
115 | else if (ss != NULL) | |
116 | ss->lastseen = cp; /* to be tidy */ | |
117 | } | |
118 | ||
119 | /* find last match, if any */ | |
120 | post = d->lastpost; | |
121 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
3ca4086b VS |
122 | if ((ss->flags&POSTSTATE) && post != ss->lastseen && |
123 | (post == NULL || post < ss->lastseen)) | |
48bf5def | 124 | post = ss->lastseen; |
3ca4086b | 125 | if (post != NULL) /* found one */ |
48bf5def RN |
126 | return post - 1; |
127 | ||
128 | return NULL; | |
129 | } | |
130 | ||
131 | /* | |
3ca4086b VS |
132 | - shortest - shortest-preferred matching engine |
133 | ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, | |
134 | ^ chr **, int *); | |
48bf5def | 135 | */ |
3ca4086b VS |
136 | static chr * /* endpoint, or NULL */ |
137 | shortest(v, d, start, min, max, coldp, hitstopp) | |
138 | struct vars *v; | |
139 | struct dfa *d; | |
140 | chr *start; /* where the match should start */ | |
141 | chr *min; /* match must end at or after here */ | |
142 | chr *max; /* match must end at or before here */ | |
143 | chr **coldp; /* store coldstart pointer here, if nonNULL */ | |
144 | int *hitstopp; /* record whether hit v->stop, if non-NULL */ | |
48bf5def | 145 | { |
3ca4086b VS |
146 | chr *cp; |
147 | chr *realmin = (min == v->stop) ? min : min + 1; | |
148 | chr *realmax = (max == v->stop) ? max : max + 1; | |
149 | color co; | |
48bf5def RN |
150 | struct sset *css; |
151 | struct sset *ss; | |
152 | struct colormap *cm = d->cm; | |
153 | ||
154 | /* initialize */ | |
155 | css = initialize(v, d, start); | |
156 | cp = start; | |
157 | if (hitstopp != NULL) | |
158 | *hitstopp = 0; | |
159 | ||
160 | /* startup */ | |
161 | FDEBUG(("--- startup ---\n")); | |
3ca4086b VS |
162 | if (cp == v->start) { |
163 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; | |
164 | FDEBUG(("color %ld\n", (long)co)); | |
165 | } else { | |
48bf5def | 166 | co = GETCOLOR(cm, *(cp - 1)); |
3ca4086b | 167 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); |
48bf5def RN |
168 | } |
169 | css = miss(v, d, css, co, cp, start); | |
170 | if (css == NULL) | |
171 | return NULL; | |
172 | css->lastseen = cp; | |
173 | ss = css; | |
174 | ||
175 | /* main loop */ | |
3ca4086b VS |
176 | if (v->eflags®_FTRACE) |
177 | while (cp < realmax) { | |
48bf5def RN |
178 | FDEBUG(("--- at c%d ---\n", css - d->ssets)); |
179 | co = GETCOLOR(cm, *cp); | |
3ca4086b | 180 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); |
48bf5def | 181 | ss = css->outs[co]; |
3ca4086b VS |
182 | if (ss == NULL) { |
183 | ss = miss(v, d, css, co, cp+1, start); | |
48bf5def | 184 | if (ss == NULL) |
3ca4086b | 185 | break; /* NOTE BREAK OUT */ |
48bf5def RN |
186 | } |
187 | cp++; | |
188 | ss->lastseen = cp; | |
189 | css = ss; | |
3ca4086b VS |
190 | if ((ss->flags&POSTSTATE) && cp >= realmin) |
191 | break; /* NOTE BREAK OUT */ | |
48bf5def RN |
192 | } |
193 | else | |
3ca4086b | 194 | while (cp < realmax) { |
48bf5def RN |
195 | co = GETCOLOR(cm, *cp); |
196 | ss = css->outs[co]; | |
3ca4086b VS |
197 | if (ss == NULL) { |
198 | ss = miss(v, d, css, co, cp+1, start); | |
48bf5def | 199 | if (ss == NULL) |
3ca4086b | 200 | break; /* NOTE BREAK OUT */ |
48bf5def RN |
201 | } |
202 | cp++; | |
203 | ss->lastseen = cp; | |
204 | css = ss; | |
3ca4086b VS |
205 | if ((ss->flags&POSTSTATE) && cp >= realmin) |
206 | break; /* NOTE BREAK OUT */ | |
48bf5def RN |
207 | } |
208 | ||
209 | if (ss == NULL) | |
210 | return NULL; | |
211 | ||
3ca4086b | 212 | if (coldp != NULL) /* report last no-progress state set, if any */ |
48bf5def RN |
213 | *coldp = lastcold(v, d); |
214 | ||
3ca4086b | 215 | if ((ss->flags&POSTSTATE) && cp > min) { |
48bf5def RN |
216 | assert(cp >= realmin); |
217 | cp--; | |
3ca4086b VS |
218 | } else if (cp == v->stop && max == v->stop) { |
219 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; | |
220 | FDEBUG(("color %ld\n", (long)co)); | |
48bf5def RN |
221 | ss = miss(v, d, css, co, cp, start); |
222 | /* match might have ended at eol */ | |
3ca4086b | 223 | if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) |
48bf5def RN |
224 | *hitstopp = 1; |
225 | } | |
226 | ||
3ca4086b | 227 | if (ss == NULL || !(ss->flags&POSTSTATE)) |
48bf5def RN |
228 | return NULL; |
229 | ||
230 | return cp; | |
231 | } | |
232 | ||
233 | /* | |
3ca4086b VS |
234 | - lastcold - determine last point at which no progress had been made |
235 | ^ static chr *lastcold(struct vars *, struct dfa *); | |
48bf5def | 236 | */ |
3ca4086b VS |
237 | static chr * /* endpoint, or NULL */ |
238 | lastcold(v, d) | |
239 | struct vars *v; | |
240 | struct dfa *d; | |
48bf5def RN |
241 | { |
242 | struct sset *ss; | |
3ca4086b VS |
243 | chr *nopr; |
244 | int i; | |
48bf5def RN |
245 | |
246 | nopr = d->lastnopr; | |
247 | if (nopr == NULL) | |
248 | nopr = v->start; | |
249 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
3ca4086b | 250 | if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) |
48bf5def RN |
251 | nopr = ss->lastseen; |
252 | return nopr; | |
253 | } | |
254 | ||
255 | /* | |
3ca4086b VS |
256 | - newdfa - set up a fresh DFA |
257 | ^ static struct dfa *newdfa(struct vars *, struct cnfa *, | |
258 | ^ struct colormap *, struct smalldfa *); | |
48bf5def RN |
259 | */ |
260 | static struct dfa * | |
3ca4086b VS |
261 | newdfa(v, cnfa, cm, small) |
262 | struct vars *v; | |
263 | struct cnfa *cnfa; | |
264 | struct colormap *cm; | |
265 | struct smalldfa *small; /* preallocated space, may be NULL */ | |
48bf5def RN |
266 | { |
267 | struct dfa *d; | |
3ca4086b VS |
268 | size_t nss = cnfa->nstates * 2; |
269 | int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; | |
48bf5def RN |
270 | struct smalldfa *smallwas = small; |
271 | ||
272 | assert(cnfa != NULL && cnfa->nstates != 0); | |
273 | ||
3ca4086b | 274 | if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) { |
48bf5def | 275 | assert(wordsper == 1); |
3ca4086b VS |
276 | if (small == NULL) { |
277 | small = (struct smalldfa *)MALLOC( | |
278 | sizeof(struct smalldfa)); | |
279 | if (small == NULL) { | |
48bf5def RN |
280 | ERR(REG_ESPACE); |
281 | return NULL; | |
282 | } | |
283 | } | |
284 | d = &small->dfa; | |
285 | d->ssets = small->ssets; | |
286 | d->statesarea = small->statesarea; | |
287 | d->work = &d->statesarea[nss]; | |
288 | d->outsarea = small->outsarea; | |
289 | d->incarea = small->incarea; | |
290 | d->cptsmalloced = 0; | |
3ca4086b VS |
291 | d->mallocarea = (smallwas == NULL) ? (char *)small : NULL; |
292 | } else { | |
293 | d = (struct dfa *)MALLOC(sizeof(struct dfa)); | |
294 | if (d == NULL) { | |
48bf5def RN |
295 | ERR(REG_ESPACE); |
296 | return NULL; | |
297 | } | |
3ca4086b VS |
298 | d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset)); |
299 | d->statesarea = (unsigned *)MALLOC((nss+WORK) * wordsper * | |
300 | sizeof(unsigned)); | |
48bf5def | 301 | d->work = &d->statesarea[nss * wordsper]; |
3ca4086b VS |
302 | d->outsarea = (struct sset **)MALLOC(nss * cnfa->ncolors * |
303 | sizeof(struct sset *)); | |
304 | d->incarea = (struct arcp *)MALLOC(nss * cnfa->ncolors * | |
305 | sizeof(struct arcp)); | |
48bf5def | 306 | d->cptsmalloced = 1; |
3ca4086b | 307 | d->mallocarea = (char *)d; |
48bf5def | 308 | if (d->ssets == NULL || d->statesarea == NULL || |
3ca4086b | 309 | d->outsarea == NULL || d->incarea == NULL) { |
48bf5def RN |
310 | freedfa(d); |
311 | ERR(REG_ESPACE); | |
312 | return NULL; | |
313 | } | |
314 | } | |
315 | ||
3ca4086b | 316 | d->nssets = (v->eflags®_SMALL) ? 7 : nss; |
48bf5def RN |
317 | d->nssused = 0; |
318 | d->nstates = cnfa->nstates; | |
319 | d->ncolors = cnfa->ncolors; | |
320 | d->wordsper = wordsper; | |
321 | d->cnfa = cnfa; | |
322 | d->cm = cm; | |
323 | d->lastpost = NULL; | |
324 | d->lastnopr = NULL; | |
325 | d->search = d->ssets; | |
326 | ||
327 | /* initialization of sset fields is done as needed */ | |
328 | ||
329 | return d; | |
330 | } | |
331 | ||
332 | /* | |
3ca4086b VS |
333 | - freedfa - free a DFA |
334 | ^ static VOID freedfa(struct dfa *); | |
48bf5def | 335 | */ |
3ca4086b VS |
336 | static VOID |
337 | freedfa(d) | |
338 | struct dfa *d; | |
48bf5def | 339 | { |
3ca4086b | 340 | if (d->cptsmalloced) { |
48bf5def RN |
341 | if (d->ssets != NULL) |
342 | FREE(d->ssets); | |
343 | if (d->statesarea != NULL) | |
344 | FREE(d->statesarea); | |
345 | if (d->outsarea != NULL) | |
346 | FREE(d->outsarea); | |
347 | if (d->incarea != NULL) | |
348 | FREE(d->incarea); | |
349 | } | |
350 | ||
351 | if (d->mallocarea != NULL) | |
352 | FREE(d->mallocarea); | |
353 | } | |
354 | ||
355 | /* | |
3ca4086b | 356 | - hash - construct a hash code for a bitvector |
48bf5def | 357 | * There are probably better ways, but they're more expensive. |
3ca4086b | 358 | ^ static unsigned hash(unsigned *, int); |
48bf5def RN |
359 | */ |
360 | static unsigned | |
3ca4086b VS |
361 | hash(uv, n) |
362 | unsigned *uv; | |
363 | int n; | |
48bf5def | 364 | { |
3ca4086b VS |
365 | int i; |
366 | unsigned h; | |
48bf5def RN |
367 | |
368 | h = 0; | |
369 | for (i = 0; i < n; i++) | |
370 | h ^= uv[i]; | |
371 | return h; | |
372 | } | |
373 | ||
374 | /* | |
3ca4086b VS |
375 | - initialize - hand-craft a cache entry for startup, otherwise get ready |
376 | ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); | |
48bf5def RN |
377 | */ |
378 | static struct sset * | |
3ca4086b VS |
379 | initialize(v, d, start) |
380 | struct vars *v; /* used only for debug flags */ | |
381 | struct dfa *d; | |
382 | chr *start; | |
48bf5def RN |
383 | { |
384 | struct sset *ss; | |
3ca4086b | 385 | int i; |
48bf5def RN |
386 | |
387 | /* is previous one still there? */ | |
3ca4086b | 388 | if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) |
48bf5def | 389 | ss = &d->ssets[0]; |
3ca4086b | 390 | else { /* no, must (re)build it */ |
48bf5def RN |
391 | ss = getvacant(v, d, start, start); |
392 | for (i = 0; i < d->wordsper; i++) | |
393 | ss->states[i] = 0; | |
394 | BSET(ss->states, d->cnfa->pre); | |
395 | ss->hash = HASH(ss->states, d->wordsper); | |
396 | assert(d->cnfa->pre != d->cnfa->post); | |
3ca4086b | 397 | ss->flags = STARTER|LOCKED|NOPROGRESS; |
48bf5def RN |
398 | /* lastseen dealt with below */ |
399 | } | |
400 | ||
401 | for (i = 0; i < d->nssused; i++) | |
402 | d->ssets[i].lastseen = NULL; | |
403 | ss->lastseen = start; /* maybe untrue, but harmless */ | |
404 | d->lastpost = NULL; | |
405 | d->lastnopr = NULL; | |
406 | return ss; | |
407 | } | |
408 | ||
409 | /* | |
3ca4086b VS |
410 | - miss - handle a cache miss |
411 | ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, | |
412 | ^ pcolor, chr *, chr *); | |
48bf5def | 413 | */ |
3ca4086b VS |
414 | static struct sset * /* NULL if goes to empty set */ |
415 | miss(v, d, css, co, cp, start) | |
416 | struct vars *v; /* used only for debug flags */ | |
417 | struct dfa *d; | |
418 | struct sset *css; | |
419 | pcolor co; | |
420 | chr *cp; /* next chr */ | |
421 | chr *start; /* where the attempt got started */ | |
48bf5def RN |
422 | { |
423 | struct cnfa *cnfa = d->cnfa; | |
3ca4086b VS |
424 | int i; |
425 | unsigned h; | |
48bf5def RN |
426 | struct carc *ca; |
427 | struct sset *p; | |
3ca4086b VS |
428 | int ispost; |
429 | int noprogress; | |
430 | int gotstate; | |
431 | int dolacons; | |
432 | int sawlacons; | |
48bf5def RN |
433 | |
434 | /* for convenience, we can be called even if it might not be a miss */ | |
3ca4086b | 435 | if (css->outs[co] != NULL) { |
48bf5def RN |
436 | FDEBUG(("hit\n")); |
437 | return css->outs[co]; | |
438 | } | |
439 | FDEBUG(("miss\n")); | |
440 | ||
441 | /* first, what set of states would we end up in? */ | |
442 | for (i = 0; i < d->wordsper; i++) | |
443 | d->work[i] = 0; | |
444 | ispost = 0; | |
445 | noprogress = 1; | |
446 | gotstate = 0; | |
447 | for (i = 0; i < d->nstates; i++) | |
448 | if (ISBSET(css->states, i)) | |
3ca4086b VS |
449 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) |
450 | if (ca->co == co) { | |
48bf5def RN |
451 | BSET(d->work, ca->to); |
452 | gotstate = 1; | |
453 | if (ca->to == cnfa->post) | |
454 | ispost = 1; | |
455 | if (!cnfa->states[ca->to]->co) | |
456 | noprogress = 0; | |
457 | FDEBUG(("%d -> %d\n", i, ca->to)); | |
458 | } | |
3ca4086b | 459 | dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0; |
48bf5def | 460 | sawlacons = 0; |
3ca4086b | 461 | while (dolacons) { /* transitive closure */ |
48bf5def RN |
462 | dolacons = 0; |
463 | for (i = 0; i < d->nstates; i++) | |
464 | if (ISBSET(d->work, i)) | |
3ca4086b VS |
465 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; |
466 | ca++) { | |
48bf5def | 467 | if (ca->co <= cnfa->ncolors) |
3ca4086b | 468 | continue; /* NOTE CONTINUE */ |
48bf5def RN |
469 | sawlacons = 1; |
470 | if (ISBSET(d->work, ca->to)) | |
3ca4086b | 471 | continue; /* NOTE CONTINUE */ |
48bf5def | 472 | if (!lacon(v, cnfa, cp, ca->co)) |
3ca4086b | 473 | continue; /* NOTE CONTINUE */ |
48bf5def RN |
474 | BSET(d->work, ca->to); |
475 | dolacons = 1; | |
476 | if (ca->to == cnfa->post) | |
477 | ispost = 1; | |
478 | if (!cnfa->states[ca->to]->co) | |
479 | noprogress = 0; | |
480 | FDEBUG(("%d :> %d\n", i, ca->to)); | |
481 | } | |
482 | } | |
483 | if (!gotstate) | |
484 | return NULL; | |
485 | h = HASH(d->work, d->wordsper); | |
486 | ||
487 | /* next, is that in the cache? */ | |
488 | for (p = d->ssets, i = d->nssused; i > 0; p++, i--) | |
3ca4086b | 489 | if (HIT(h, d->work, p, d->wordsper)) { |
48bf5def | 490 | FDEBUG(("cached c%d\n", p - d->ssets)); |
3ca4086b | 491 | break; /* NOTE BREAK OUT */ |
48bf5def | 492 | } |
3ca4086b | 493 | if (i == 0) { /* nope, need a new cache entry */ |
48bf5def RN |
494 | p = getvacant(v, d, cp, start); |
495 | assert(p != css); | |
496 | for (i = 0; i < d->wordsper; i++) | |
497 | p->states[i] = d->work[i]; | |
498 | p->hash = h; | |
499 | p->flags = (ispost) ? POSTSTATE : 0; | |
500 | if (noprogress) | |
501 | p->flags |= NOPROGRESS; | |
502 | /* lastseen to be dealt with by caller */ | |
503 | } | |
504 | ||
3ca4086b | 505 | if (!sawlacons) { /* lookahead conds. always cache miss */ |
48bf5def RN |
506 | FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); |
507 | css->outs[co] = p; | |
508 | css->inchain[co] = p->ins; | |
509 | p->ins.ss = css; | |
3ca4086b | 510 | p->ins.co = (color)co; |
48bf5def RN |
511 | } |
512 | return p; | |
513 | } | |
514 | ||
515 | /* | |
3ca4086b VS |
516 | - lacon - lookahead-constraint checker for miss() |
517 | ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); | |
48bf5def | 518 | */ |
3ca4086b VS |
519 | static int /* predicate: constraint satisfied? */ |
520 | lacon(v, pcnfa, cp, co) | |
521 | struct vars *v; | |
522 | struct cnfa *pcnfa; /* parent cnfa */ | |
523 | chr *cp; | |
524 | pcolor co; /* "color" of the lookahead constraint */ | |
48bf5def | 525 | { |
3ca4086b | 526 | int n; |
48bf5def RN |
527 | struct subre *sub; |
528 | struct dfa *d; | |
529 | struct smalldfa sd; | |
3ca4086b | 530 | chr *end; |
48bf5def RN |
531 | |
532 | n = co - pcnfa->ncolors; | |
533 | assert(n < v->g->nlacons && v->g->lacons != NULL); | |
534 | FDEBUG(("=== testing lacon %d\n", n)); | |
535 | sub = &v->g->lacons[n]; | |
536 | d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); | |
3ca4086b | 537 | if (d == NULL) { |
48bf5def RN |
538 | ERR(REG_ESPACE); |
539 | return 0; | |
540 | } | |
3ca4086b | 541 | end = longest(v, d, cp, v->stop, (int *)NULL); |
48bf5def RN |
542 | freedfa(d); |
543 | FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); | |
544 | return (sub->subno) ? (end != NULL) : (end == NULL); | |
545 | } | |
546 | ||
547 | /* | |
3ca4086b | 548 | - getvacant - get a vacant state set |
48bf5def RN |
549 | * This routine clears out the inarcs and outarcs, but does not otherwise |
550 | * clear the innards of the state set -- that's up to the caller. | |
3ca4086b | 551 | ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); |
48bf5def RN |
552 | */ |
553 | static struct sset * | |
3ca4086b VS |
554 | getvacant(v, d, cp, start) |
555 | struct vars *v; /* used only for debug flags */ | |
556 | struct dfa *d; | |
557 | chr *cp; | |
558 | chr *start; | |
48bf5def | 559 | { |
3ca4086b | 560 | int i; |
48bf5def RN |
561 | struct sset *ss; |
562 | struct sset *p; | |
563 | struct arcp ap; | |
564 | struct arcp lastap; | |
3ca4086b | 565 | color co; |
48bf5def RN |
566 | |
567 | ss = pickss(v, d, cp, start); | |
3ca4086b | 568 | assert(!(ss->flags&LOCKED)); |
48bf5def RN |
569 | |
570 | /* clear out its inarcs, including self-referential ones */ | |
571 | ap = ss->ins; | |
3ca4086b | 572 | while ((p = ap.ss) != NULL) { |
48bf5def | 573 | co = ap.co; |
3ca4086b | 574 | FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); |
48bf5def RN |
575 | p->outs[co] = NULL; |
576 | ap = p->inchain[co]; | |
3ca4086b | 577 | p->inchain[co].ss = NULL; /* paranoia */ |
48bf5def RN |
578 | } |
579 | ss->ins.ss = NULL; | |
580 | ||
581 | /* take it off the inarc chains of the ssets reached by its outarcs */ | |
3ca4086b | 582 | for (i = 0; i < d->ncolors; i++) { |
48bf5def RN |
583 | p = ss->outs[i]; |
584 | assert(p != ss); /* not self-referential */ | |
585 | if (p == NULL) | |
3ca4086b | 586 | continue; /* NOTE CONTINUE */ |
48bf5def RN |
587 | FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); |
588 | if (p->ins.ss == ss && p->ins.co == i) | |
589 | p->ins = ss->inchain[i]; | |
3ca4086b | 590 | else { |
48bf5def RN |
591 | assert(p->ins.ss != NULL); |
592 | for (ap = p->ins; ap.ss != NULL && | |
3ca4086b VS |
593 | !(ap.ss == ss && ap.co == i); |
594 | ap = ap.ss->inchain[ap.co]) | |
48bf5def RN |
595 | lastap = ap; |
596 | assert(ap.ss != NULL); | |
597 | lastap.ss->inchain[lastap.co] = ss->inchain[i]; | |
598 | } | |
599 | ss->outs[i] = NULL; | |
600 | ss->inchain[i].ss = NULL; | |
601 | } | |
602 | ||
603 | /* if ss was a success state, may need to remember location */ | |
3ca4086b VS |
604 | if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && |
605 | (d->lastpost == NULL || d->lastpost < ss->lastseen)) | |
48bf5def RN |
606 | d->lastpost = ss->lastseen; |
607 | ||
608 | /* likewise for a no-progress state */ | |
3ca4086b VS |
609 | if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr && |
610 | (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) | |
48bf5def RN |
611 | d->lastnopr = ss->lastseen; |
612 | ||
613 | return ss; | |
614 | } | |
615 | ||
616 | /* | |
3ca4086b VS |
617 | - pickss - pick the next stateset to be used |
618 | ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); | |
48bf5def RN |
619 | */ |
620 | static struct sset * | |
3ca4086b VS |
621 | pickss(v, d, cp, start) |
622 | struct vars *v; /* used only for debug flags */ | |
623 | struct dfa *d; | |
624 | chr *cp; | |
625 | chr *start; | |
48bf5def | 626 | { |
3ca4086b | 627 | int i; |
48bf5def RN |
628 | struct sset *ss; |
629 | struct sset *end; | |
3ca4086b | 630 | chr *ancient; |
48bf5def RN |
631 | |
632 | /* shortcut for cases where cache isn't full */ | |
3ca4086b | 633 | if (d->nssused < d->nssets) { |
48bf5def RN |
634 | i = d->nssused; |
635 | d->nssused++; | |
636 | ss = &d->ssets[i]; | |
637 | FDEBUG(("new c%d\n", i)); | |
638 | /* set up innards */ | |
639 | ss->states = &d->statesarea[i * d->wordsper]; | |
640 | ss->flags = 0; | |
641 | ss->ins.ss = NULL; | |
642 | ss->ins.co = WHITE; /* give it some value */ | |
643 | ss->outs = &d->outsarea[i * d->ncolors]; | |
644 | ss->inchain = &d->incarea[i * d->ncolors]; | |
3ca4086b | 645 | for (i = 0; i < d->ncolors; i++) { |
48bf5def RN |
646 | ss->outs[i] = NULL; |
647 | ss->inchain[i].ss = NULL; | |
648 | } | |
649 | return ss; | |
650 | } | |
651 | ||
652 | /* look for oldest, or old enough anyway */ | |
3ca4086b VS |
653 | if (cp - start > d->nssets*2/3) /* oldest 33% are expendable */ |
654 | ancient = cp - d->nssets*2/3; | |
48bf5def RN |
655 | else |
656 | ancient = start; | |
657 | for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) | |
658 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
3ca4086b | 659 | !(ss->flags&LOCKED)) { |
48bf5def RN |
660 | d->search = ss + 1; |
661 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
662 | return ss; | |
663 | } | |
664 | for (ss = d->ssets, end = d->search; ss < end; ss++) | |
665 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
3ca4086b | 666 | !(ss->flags&LOCKED)) { |
48bf5def RN |
667 | d->search = ss + 1; |
668 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
669 | return ss; | |
670 | } | |
671 | ||
672 | /* nobody's old enough?!? -- something's really wrong */ | |
673 | FDEBUG(("can't find victim to replace!\n")); | |
674 | assert(NOTREACHED); | |
675 | ERR(REG_ASSERT); | |
676 | return d->ssets; | |
19f0995a | 677 | } |