]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * DFA routines | |
3 | * This file is #included by regexec.c. | |
4 | * | |
5 | * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. | |
6 | * | |
7 | * Development of this software was funded, in part, by Cray Research Inc., | |
8 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics | |
9 | * Corporation, none of whom are responsible for the results. The author | |
10 | * thanks all of them. | |
11 | * | |
12 | * Redistribution and use in source and binary forms -- with or without | |
13 | * modification -- are permitted for any purpose, provided that | |
14 | * redistributions in source form retain this entire copyright notice and | |
15 | * indicate the origin and nature of any modifications. | |
16 | * | |
17 | * I'd appreciate being given credit for this package in the documentation | |
18 | * of software which uses it, but that is not a requirement. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, | |
21 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
22 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL | |
23 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
26 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
27 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
28 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
29 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 | * | |
31 | * $Header$ | |
32 | * | |
33 | */ | |
34 | ||
35 | /* | |
36 | * longest - longest-preferred matching engine | |
37 | */ | |
38 | static chr * /* endpoint, or NULL */ | |
39 | longest(struct vars * v, /* used only for debug and exec flags */ | |
40 | struct dfa * d, | |
41 | chr *start, /* where the match should start */ | |
42 | chr *stop, /* match must end at or before here */ | |
43 | int *hitstopp) /* record whether hit v->stop, if non-NULL */ | |
44 | { | |
45 | chr *cp; | |
46 | chr *realstop = (stop == v->stop) ? stop : stop + 1; | |
47 | color co; | |
48 | struct sset *css; | |
49 | struct sset *ss; | |
50 | chr *post; | |
51 | int i; | |
52 | struct colormap *cm = d->cm; | |
53 | ||
54 | /* initialize */ | |
55 | css = initialize(v, d, start); | |
56 | cp = start; | |
57 | if (hitstopp != NULL) | |
58 | *hitstopp = 0; | |
59 | ||
60 | /* startup */ | |
61 | FDEBUG(("+++ startup +++\n")); | |
62 | if (cp == v->start) | |
63 | { | |
64 | co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1]; | |
65 | FDEBUG(("color %ld\n", (long) co)); | |
66 | } | |
67 | else | |
68 | { | |
69 | co = GETCOLOR(cm, *(cp - 1)); | |
70 | FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co)); | |
71 | } | |
72 | css = miss(v, d, css, co, cp, start); | |
73 | if (css == NULL) | |
74 | return NULL; | |
75 | css->lastseen = cp; | |
76 | ||
77 | /* main loop */ | |
78 | if (v->eflags & REG_FTRACE) | |
79 | while (cp < realstop) | |
80 | { | |
81 | FDEBUG(("+++ at c%d +++\n", css - d->ssets)); | |
82 | co = GETCOLOR(cm, *cp); | |
83 | FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co)); | |
84 | ss = css->outs[co]; | |
85 | if (ss == NULL) | |
86 | { | |
87 | ss = miss(v, d, css, co, cp + 1, start); | |
88 | if (ss == NULL) | |
89 | break; /* NOTE BREAK OUT */ | |
90 | } | |
91 | cp++; | |
92 | ss->lastseen = cp; | |
93 | css = ss; | |
94 | } | |
95 | else | |
96 | while (cp < realstop) | |
97 | { | |
98 | co = GETCOLOR(cm, *cp); | |
99 | ss = css->outs[co]; | |
100 | if (ss == NULL) | |
101 | { | |
102 | ss = miss(v, d, css, co, cp + 1, start); | |
103 | if (ss == NULL) | |
104 | break; /* NOTE BREAK OUT */ | |
105 | } | |
106 | cp++; | |
107 | ss->lastseen = cp; | |
108 | css = ss; | |
109 | } | |
110 | ||
111 | /* shutdown */ | |
112 | FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); | |
113 | if (cp == v->stop && stop == v->stop) | |
114 | { | |
115 | if (hitstopp != NULL) | |
116 | *hitstopp = 1; | |
117 | co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; | |
118 | FDEBUG(("color %ld\n", (long) co)); | |
119 | ss = miss(v, d, css, co, cp, start); | |
120 | /* special case: match ended at eol? */ | |
121 | if (ss != NULL && (ss->flags & POSTSTATE)) | |
122 | return cp; | |
123 | else if (ss != NULL) | |
124 | ss->lastseen = cp; /* to be tidy */ | |
125 | } | |
126 | ||
127 | /* find last match, if any */ | |
128 | post = d->lastpost; | |
129 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
130 | if ((ss->flags & POSTSTATE) && post != ss->lastseen && | |
131 | (post == NULL || post < ss->lastseen)) | |
132 | post = ss->lastseen; | |
133 | if (post != NULL) /* found one */ | |
134 | return post - 1; | |
135 | ||
136 | return NULL; | |
137 | } | |
138 | ||
139 | /* | |
140 | * shortest - shortest-preferred matching engine | |
141 | */ | |
142 | static chr * /* endpoint, or NULL */ | |
143 | shortest(struct vars * v, | |
144 | struct dfa * d, | |
145 | chr *start, /* where the match should start */ | |
146 | chr *min, /* match must end at or after here */ | |
147 | chr *max, /* match must end at or before here */ | |
148 | chr **coldp, /* store coldstart pointer here, if | |
149 | * nonNULL */ | |
150 | int *hitstopp) /* record whether hit v->stop, if non-NULL */ | |
151 | { | |
152 | chr *cp; | |
153 | chr *realmin = (min == v->stop) ? min : min + 1; | |
154 | chr *realmax = (max == v->stop) ? max : max + 1; | |
155 | color co; | |
156 | struct sset *css; | |
157 | struct sset *ss; | |
158 | struct colormap *cm = d->cm; | |
159 | ||
160 | /* initialize */ | |
161 | css = initialize(v, d, start); | |
162 | cp = start; | |
163 | if (hitstopp != NULL) | |
164 | *hitstopp = 0; | |
165 | ||
166 | /* startup */ | |
167 | FDEBUG(("--- startup ---\n")); | |
168 | if (cp == v->start) | |
169 | { | |
170 | co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1]; | |
171 | FDEBUG(("color %ld\n", (long) co)); | |
172 | } | |
173 | else | |
174 | { | |
175 | co = GETCOLOR(cm, *(cp - 1)); | |
176 | FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co)); | |
177 | } | |
178 | css = miss(v, d, css, co, cp, start); | |
179 | if (css == NULL) | |
180 | return NULL; | |
181 | css->lastseen = cp; | |
182 | ss = css; | |
183 | ||
184 | /* main loop */ | |
185 | if (v->eflags & REG_FTRACE) | |
186 | while (cp < realmax) | |
187 | { | |
188 | FDEBUG(("--- at c%d ---\n", css - d->ssets)); | |
189 | co = GETCOLOR(cm, *cp); | |
190 | FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co)); | |
191 | ss = css->outs[co]; | |
192 | if (ss == NULL) | |
193 | { | |
194 | ss = miss(v, d, css, co, cp + 1, start); | |
195 | if (ss == NULL) | |
196 | break; /* NOTE BREAK OUT */ | |
197 | } | |
198 | cp++; | |
199 | ss->lastseen = cp; | |
200 | css = ss; | |
201 | if ((ss->flags & POSTSTATE) && cp >= realmin) | |
202 | break; /* NOTE BREAK OUT */ | |
203 | } | |
204 | else | |
205 | while (cp < realmax) | |
206 | { | |
207 | co = GETCOLOR(cm, *cp); | |
208 | ss = css->outs[co]; | |
209 | if (ss == NULL) | |
210 | { | |
211 | ss = miss(v, d, css, co, cp + 1, start); | |
212 | if (ss == NULL) | |
213 | break; /* NOTE BREAK OUT */ | |
214 | } | |
215 | cp++; | |
216 | ss->lastseen = cp; | |
217 | css = ss; | |
218 | if ((ss->flags & POSTSTATE) && cp >= realmin) | |
219 | break; /* NOTE BREAK OUT */ | |
220 | } | |
221 | ||
222 | if (ss == NULL) | |
223 | return NULL; | |
224 | ||
225 | if (coldp != NULL) /* report last no-progress state set, if | |
226 | * any */ | |
227 | *coldp = lastcold(v, d); | |
228 | ||
229 | if ((ss->flags & POSTSTATE) && cp > min) | |
230 | { | |
231 | assert(cp >= realmin); | |
232 | cp--; | |
233 | } | |
234 | else if (cp == v->stop && max == v->stop) | |
235 | { | |
236 | co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1]; | |
237 | FDEBUG(("color %ld\n", (long) co)); | |
238 | ss = miss(v, d, css, co, cp, start); | |
239 | /* match might have ended at eol */ | |
240 | if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL) | |
241 | *hitstopp = 1; | |
242 | } | |
243 | ||
244 | if (ss == NULL || !(ss->flags & POSTSTATE)) | |
245 | return NULL; | |
246 | ||
247 | return cp; | |
248 | } | |
249 | ||
250 | /* | |
251 | * lastcold - determine last point at which no progress had been made | |
252 | */ | |
253 | static chr * /* endpoint, or NULL */ | |
254 | lastcold(struct vars * v, | |
255 | struct dfa * d) | |
256 | { | |
257 | struct sset *ss; | |
258 | chr *nopr; | |
259 | int i; | |
260 | ||
261 | nopr = d->lastnopr; | |
262 | if (nopr == NULL) | |
263 | nopr = v->start; | |
264 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
265 | if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen) | |
266 | nopr = ss->lastseen; | |
267 | return nopr; | |
268 | } | |
269 | ||
270 | /* | |
271 | * newdfa - set up a fresh DFA | |
272 | */ | |
273 | static struct dfa * | |
274 | newdfa(struct vars * v, | |
275 | struct cnfa * cnfa, | |
276 | struct colormap * cm, | |
277 | struct smalldfa * small) /* preallocated space, may be NULL */ | |
278 | { | |
279 | struct dfa *d; | |
280 | size_t nss = cnfa->nstates * 2; | |
281 | int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; | |
282 | struct smalldfa *smallwas = small; | |
283 | ||
284 | assert(cnfa != NULL && cnfa->nstates != 0); | |
285 | ||
286 | if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) | |
287 | { | |
288 | assert(wordsper == 1); | |
289 | if (small == NULL) | |
290 | { | |
291 | small = (struct smalldfa *) MALLOC( | |
292 | sizeof(struct smalldfa)); | |
293 | if (small == NULL) | |
294 | { | |
295 | ERR(REG_ESPACE); | |
296 | return NULL; | |
297 | } | |
298 | } | |
299 | d = &small->dfa; | |
300 | d->ssets = small->ssets; | |
301 | d->statesarea = small->statesarea; | |
302 | d->work = &d->statesarea[nss]; | |
303 | d->outsarea = small->outsarea; | |
304 | d->incarea = small->incarea; | |
305 | d->cptsmalloced = 0; | |
306 | d->mallocarea = (smallwas == NULL) ? (char *) small : NULL; | |
307 | } | |
308 | else | |
309 | { | |
310 | d = (struct dfa *) MALLOC(sizeof(struct dfa)); | |
311 | if (d == NULL) | |
312 | { | |
313 | ERR(REG_ESPACE); | |
314 | return NULL; | |
315 | } | |
316 | d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset)); | |
317 | d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper * | |
318 | sizeof(unsigned)); | |
319 | d->work = &d->statesarea[nss * wordsper]; | |
320 | d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors * | |
321 | sizeof(struct sset *)); | |
322 | d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors * | |
323 | sizeof(struct arcp)); | |
324 | d->cptsmalloced = 1; | |
325 | d->mallocarea = (char *) d; | |
326 | if (d->ssets == NULL || d->statesarea == NULL || | |
327 | d->outsarea == NULL || d->incarea == NULL) | |
328 | { | |
329 | freedfa(d); | |
330 | ERR(REG_ESPACE); | |
331 | return NULL; | |
332 | } | |
333 | } | |
334 | ||
335 | d->nssets = (v->eflags & REG_SMALL) ? 7 : nss; | |
336 | d->nssused = 0; | |
337 | d->nstates = cnfa->nstates; | |
338 | d->ncolors = cnfa->ncolors; | |
339 | d->wordsper = wordsper; | |
340 | d->cnfa = cnfa; | |
341 | d->cm = cm; | |
342 | d->lastpost = NULL; | |
343 | d->lastnopr = NULL; | |
344 | d->search = d->ssets; | |
345 | ||
346 | /* initialization of sset fields is done as needed */ | |
347 | ||
348 | return d; | |
349 | } | |
350 | ||
351 | /* | |
352 | * freedfa - free a DFA | |
353 | */ | |
354 | static void | |
355 | freedfa(struct dfa * d) | |
356 | { | |
357 | if (d->cptsmalloced) | |
358 | { | |
359 | if (d->ssets != NULL) | |
360 | FREE(d->ssets); | |
361 | if (d->statesarea != NULL) | |
362 | FREE(d->statesarea); | |
363 | if (d->outsarea != NULL) | |
364 | FREE(d->outsarea); | |
365 | if (d->incarea != NULL) | |
366 | FREE(d->incarea); | |
367 | } | |
368 | ||
369 | if (d->mallocarea != NULL) | |
370 | FREE(d->mallocarea); | |
371 | } | |
372 | ||
373 | /* | |
374 | * hash - construct a hash code for a bitvector | |
375 | * | |
376 | * There are probably better ways, but they're more expensive. | |
377 | */ | |
378 | static unsigned | |
379 | hash(unsigned *uv, | |
380 | int n) | |
381 | { | |
382 | int i; | |
383 | unsigned h; | |
384 | ||
385 | h = 0; | |
386 | for (i = 0; i < n; i++) | |
387 | h ^= uv[i]; | |
388 | return h; | |
389 | } | |
390 | ||
391 | /* | |
392 | * initialize - hand-craft a cache entry for startup, otherwise get ready | |
393 | */ | |
394 | static struct sset * | |
395 | initialize(struct vars * v, /* used only for debug flags */ | |
396 | struct dfa * d, | |
397 | chr *start) | |
398 | { | |
399 | struct sset *ss; | |
400 | int i; | |
401 | ||
402 | /* is previous one still there? */ | |
403 | if (d->nssused > 0 && (d->ssets[0].flags & STARTER)) | |
404 | ss = &d->ssets[0]; | |
405 | else | |
406 | { /* no, must (re)build it */ | |
407 | ss = getvacant(v, d, start, start); | |
408 | for (i = 0; i < d->wordsper; i++) | |
409 | ss->states[i] = 0; | |
410 | BSET(ss->states, d->cnfa->pre); | |
411 | ss->hash = HASH(ss->states, d->wordsper); | |
412 | assert(d->cnfa->pre != d->cnfa->post); | |
413 | ss->flags = STARTER | LOCKED | NOPROGRESS; | |
414 | /* lastseen dealt with below */ | |
415 | } | |
416 | ||
417 | for (i = 0; i < d->nssused; i++) | |
418 | d->ssets[i].lastseen = NULL; | |
419 | ss->lastseen = start; /* maybe untrue, but harmless */ | |
420 | d->lastpost = NULL; | |
421 | d->lastnopr = NULL; | |
422 | return ss; | |
423 | } | |
424 | ||
425 | /* | |
426 | * miss - handle a cache miss | |
427 | */ | |
428 | static struct sset * /* NULL if goes to empty set */ | |
429 | miss(struct vars * v, /* used only for debug flags */ | |
430 | struct dfa * d, | |
431 | struct sset * css, | |
432 | pcolor co, | |
433 | chr *cp, /* next chr */ | |
434 | chr *start) /* where the attempt got started */ | |
435 | { | |
436 | struct cnfa *cnfa = d->cnfa; | |
437 | int i; | |
438 | unsigned h; | |
439 | struct carc *ca; | |
440 | struct sset *p; | |
441 | int ispost; | |
442 | int noprogress; | |
443 | int gotstate; | |
444 | int dolacons; | |
445 | int sawlacons; | |
446 | ||
447 | /* for convenience, we can be called even if it might not be a miss */ | |
448 | if (css->outs[co] != NULL) | |
449 | { | |
450 | FDEBUG(("hit\n")); | |
451 | return css->outs[co]; | |
452 | } | |
453 | FDEBUG(("miss\n")); | |
454 | ||
455 | /* first, what set of states would we end up in? */ | |
456 | for (i = 0; i < d->wordsper; i++) | |
457 | d->work[i] = 0; | |
458 | ispost = 0; | |
459 | noprogress = 1; | |
460 | gotstate = 0; | |
461 | for (i = 0; i < d->nstates; i++) | |
462 | if (ISBSET(css->states, i)) | |
463 | for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++) | |
464 | if (ca->co == co) | |
465 | { | |
466 | BSET(d->work, ca->to); | |
467 | gotstate = 1; | |
468 | if (ca->to == cnfa->post) | |
469 | ispost = 1; | |
470 | if (!cnfa->states[ca->to]->co) | |
471 | noprogress = 0; | |
472 | FDEBUG(("%d -> %d\n", i, ca->to)); | |
473 | } | |
474 | dolacons = (gotstate) ? (cnfa->flags & HASLACONS) : 0; | |
475 | sawlacons = 0; | |
476 | while (dolacons) | |
477 | { /* transitive closure */ | |
478 | dolacons = 0; | |
479 | for (i = 0; i < d->nstates; i++) | |
480 | if (ISBSET(d->work, i)) | |
481 | for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; | |
482 | ca++) | |
483 | { | |
484 | if (ca->co <= cnfa->ncolors) | |
485 | continue; /* NOTE CONTINUE */ | |
486 | sawlacons = 1; | |
487 | if (ISBSET(d->work, ca->to)) | |
488 | continue; /* NOTE CONTINUE */ | |
489 | if (!lacon(v, cnfa, cp, ca->co)) | |
490 | continue; /* NOTE CONTINUE */ | |
491 | BSET(d->work, ca->to); | |
492 | dolacons = 1; | |
493 | if (ca->to == cnfa->post) | |
494 | ispost = 1; | |
495 | if (!cnfa->states[ca->to]->co) | |
496 | noprogress = 0; | |
497 | FDEBUG(("%d :> %d\n", i, ca->to)); | |
498 | } | |
499 | } | |
500 | if (!gotstate) | |
501 | return NULL; | |
502 | h = HASH(d->work, d->wordsper); | |
503 | ||
504 | /* next, is that in the cache? */ | |
505 | for (p = d->ssets, i = d->nssused; i > 0; p++, i--) | |
506 | if (HIT(h, d->work, p, d->wordsper)) | |
507 | { | |
508 | FDEBUG(("cached c%d\n", p - d->ssets)); | |
509 | break; /* NOTE BREAK OUT */ | |
510 | } | |
511 | if (i == 0) | |
512 | { /* nope, need a new cache entry */ | |
513 | p = getvacant(v, d, cp, start); | |
514 | assert(p != css); | |
515 | for (i = 0; i < d->wordsper; i++) | |
516 | p->states[i] = d->work[i]; | |
517 | p->hash = h; | |
518 | p->flags = (ispost) ? POSTSTATE : 0; | |
519 | if (noprogress) | |
520 | p->flags |= NOPROGRESS; | |
521 | /* lastseen to be dealt with by caller */ | |
522 | } | |
523 | ||
524 | if (!sawlacons) | |
525 | { /* lookahead conds. always cache miss */ | |
526 | FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); | |
527 | css->outs[co] = p; | |
528 | css->inchain[co] = p->ins; | |
529 | p->ins.ss = css; | |
530 | p->ins.co = (color) co; | |
531 | } | |
532 | return p; | |
533 | } | |
534 | ||
535 | /* | |
536 | * lacon - lookahead-constraint checker for miss() | |
537 | */ | |
538 | static int /* predicate: constraint satisfied? */ | |
539 | lacon(struct vars * v, | |
540 | struct cnfa * pcnfa, /* parent cnfa */ | |
541 | chr *cp, | |
542 | pcolor co) /* "color" of the lookahead constraint */ | |
543 | { | |
544 | int n; | |
545 | struct subre *sub; | |
546 | struct dfa *d; | |
547 | struct smalldfa sd; | |
548 | chr *end; | |
549 | ||
550 | n = co - pcnfa->ncolors; | |
551 | assert(n < v->g->nlacons && v->g->lacons != NULL); | |
552 | FDEBUG(("=== testing lacon %d\n", n)); | |
553 | sub = &v->g->lacons[n]; | |
554 | d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); | |
555 | if (d == NULL) | |
556 | { | |
557 | ERR(REG_ESPACE); | |
558 | return 0; | |
559 | } | |
560 | end = longest(v, d, cp, v->stop, (int *) NULL); | |
561 | freedfa(d); | |
562 | FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); | |
563 | return (sub->subno) ? (end != NULL) : (end == NULL); | |
564 | } | |
565 | ||
566 | /* | |
567 | * getvacant - get a vacant state set | |
568 | * This routine clears out the inarcs and outarcs, but does not otherwise | |
569 | * clear the innards of the state set -- that's up to the caller. | |
570 | */ | |
571 | static struct sset * | |
572 | getvacant(struct vars * v, /* used only for debug flags */ | |
573 | struct dfa * d, | |
574 | chr *cp, | |
575 | chr *start) | |
576 | { | |
577 | int i; | |
578 | struct sset *ss; | |
579 | struct sset *p; | |
580 | struct arcp ap; | |
581 | struct arcp lastap; | |
582 | color co; | |
583 | ||
584 | ss = pickss(v, d, cp, start); | |
585 | assert(!(ss->flags & LOCKED)); | |
586 | ||
587 | /* clear out its inarcs, including self-referential ones */ | |
588 | ap = ss->ins; | |
589 | while ((p = ap.ss) != NULL) | |
590 | { | |
591 | co = ap.co; | |
592 | FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long) co)); | |
593 | p->outs[co] = NULL; | |
594 | ap = p->inchain[co]; | |
595 | p->inchain[co].ss = NULL; /* paranoia */ | |
596 | } | |
597 | ss->ins.ss = NULL; | |
598 | ||
599 | /* take it off the inarc chains of the ssets reached by its outarcs */ | |
600 | for (i = 0; i < d->ncolors; i++) | |
601 | { | |
602 | p = ss->outs[i]; | |
603 | assert(p != ss); /* not self-referential */ | |
604 | if (p == NULL) | |
605 | continue; /* NOTE CONTINUE */ | |
606 | FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); | |
607 | if (p->ins.ss == ss && p->ins.co == i) | |
608 | p->ins = ss->inchain[i]; | |
609 | else | |
610 | { | |
611 | assert(p->ins.ss != NULL); | |
612 | for (ap = p->ins; ap.ss != NULL && | |
613 | !(ap.ss == ss && ap.co == i); | |
614 | ap = ap.ss->inchain[ap.co]) | |
615 | lastap = ap; | |
616 | assert(ap.ss != NULL); | |
617 | lastap.ss->inchain[lastap.co] = ss->inchain[i]; | |
618 | } | |
619 | ss->outs[i] = NULL; | |
620 | ss->inchain[i].ss = NULL; | |
621 | } | |
622 | ||
623 | /* if ss was a success state, may need to remember location */ | |
624 | if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost && | |
625 | (d->lastpost == NULL || d->lastpost < ss->lastseen)) | |
626 | d->lastpost = ss->lastseen; | |
627 | ||
628 | /* likewise for a no-progress state */ | |
629 | if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr && | |
630 | (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) | |
631 | d->lastnopr = ss->lastseen; | |
632 | ||
633 | return ss; | |
634 | } | |
635 | ||
636 | /* | |
637 | * pickss - pick the next stateset to be used | |
638 | */ | |
639 | static struct sset * | |
640 | pickss(struct vars * v, /* used only for debug flags */ | |
641 | struct dfa * d, | |
642 | chr *cp, | |
643 | chr *start) | |
644 | { | |
645 | int i; | |
646 | struct sset *ss; | |
647 | struct sset *end; | |
648 | chr *ancient; | |
649 | ||
650 | /* shortcut for cases where cache isn't full */ | |
651 | if (d->nssused < d->nssets) | |
652 | { | |
653 | i = d->nssused; | |
654 | d->nssused++; | |
655 | ss = &d->ssets[i]; | |
656 | FDEBUG(("new c%d\n", i)); | |
657 | /* set up innards */ | |
658 | ss->states = &d->statesarea[i * d->wordsper]; | |
659 | ss->flags = 0; | |
660 | ss->ins.ss = NULL; | |
661 | ss->ins.co = WHITE; /* give it some value */ | |
662 | ss->outs = &d->outsarea[i * d->ncolors]; | |
663 | ss->inchain = &d->incarea[i * d->ncolors]; | |
664 | for (i = 0; i < d->ncolors; i++) | |
665 | { | |
666 | ss->outs[i] = NULL; | |
667 | ss->inchain[i].ss = NULL; | |
668 | } | |
669 | return ss; | |
670 | } | |
671 | ||
672 | /* look for oldest, or old enough anyway */ | |
673 | if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */ | |
674 | ancient = cp - d->nssets * 2 / 3; | |
675 | else | |
676 | ancient = start; | |
677 | for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) | |
678 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
679 | !(ss->flags & LOCKED)) | |
680 | { | |
681 | d->search = ss + 1; | |
682 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
683 | return ss; | |
684 | } | |
685 | for (ss = d->ssets, end = d->search; ss < end; ss++) | |
686 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
687 | !(ss->flags & LOCKED)) | |
688 | { | |
689 | d->search = ss + 1; | |
690 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
691 | return ss; | |
692 | } | |
693 | ||
694 | /* nobody's old enough?!? -- something's really wrong */ | |
695 | FDEBUG(("can't find victim to replace!\n")); | |
696 | assert(NOTREACHED); | |
697 | ERR(REG_ASSERT); | |
698 | return d->ssets; | |
699 | } |