]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * DFA routines | |
3 | * This file is #included by regexec.c. | |
4 | * | |
5 | * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. | |
6 | * | |
7 | * Development of this software was funded, in part, by Cray Research Inc., | |
8 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics | |
9 | * Corporation, none of whom are responsible for the results. The author | |
10 | * thanks all of them. | |
11 | * | |
12 | * Redistribution and use in source and binary forms -- with or without | |
13 | * modification -- are permitted for any purpose, provided that | |
14 | * redistributions in source form retain this entire copyright notice and | |
15 | * indicate the origin and nature of any modifications. | |
16 | * | |
17 | * I'd appreciate being given credit for this package in the documentation | |
18 | * of software which uses it, but that is not a requirement. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, | |
21 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
22 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL | |
23 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
26 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
27 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
28 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
29 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 | * | |
31 | */ | |
32 | ||
33 | /* | |
34 | - longest - longest-preferred matching engine | |
35 | ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); | |
36 | */ | |
37 | static chr * /* endpoint, or NULL */ | |
38 | longest(v, d, start, stop, hitstopp) | |
39 | struct vars *v; /* used only for debug and exec flags */ | |
40 | struct dfa *d; | |
41 | chr *start; /* where the match should start */ | |
42 | chr *stop; /* match must end at or before here */ | |
43 | int *hitstopp; /* record whether hit v->stop, if non-NULL */ | |
44 | { | |
45 | chr *cp; | |
46 | chr *realstop = (stop == v->stop) ? stop : stop + 1; | |
47 | color co; | |
48 | struct sset *css; | |
49 | struct sset *ss; | |
50 | chr *post; | |
51 | int i; | |
52 | struct colormap *cm = d->cm; | |
53 | ||
54 | /* initialize */ | |
55 | css = initialize(v, d, start); | |
56 | cp = start; | |
57 | if (hitstopp != NULL) | |
58 | *hitstopp = 0; | |
59 | ||
60 | /* startup */ | |
61 | FDEBUG(("+++ startup +++\n")); | |
62 | if (cp == v->start) { | |
63 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; | |
64 | FDEBUG(("color %ld\n", (long)co)); | |
65 | } else { | |
66 | co = GETCOLOR(cm, *(cp - 1)); | |
67 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); | |
68 | } | |
69 | css = miss(v, d, css, co, cp, start); | |
70 | if (css == NULL) | |
71 | return NULL; | |
72 | css->lastseen = cp; | |
73 | ||
74 | /* main loop */ | |
75 | if (v->eflags®_FTRACE) | |
76 | while (cp < realstop) { | |
77 | FDEBUG(("+++ at c%d +++\n", css - d->ssets)); | |
78 | co = GETCOLOR(cm, *cp); | |
79 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); | |
80 | ss = css->outs[co]; | |
81 | if (ss == NULL) { | |
82 | ss = miss(v, d, css, co, cp+1, start); | |
83 | if (ss == NULL) | |
84 | break; /* NOTE BREAK OUT */ | |
85 | } | |
86 | cp++; | |
87 | ss->lastseen = cp; | |
88 | css = ss; | |
89 | } | |
90 | else | |
91 | while (cp < realstop) { | |
92 | co = GETCOLOR(cm, *cp); | |
93 | ss = css->outs[co]; | |
94 | if (ss == NULL) { | |
95 | ss = miss(v, d, css, co, cp+1, start); | |
96 | if (ss == NULL) | |
97 | break; /* NOTE BREAK OUT */ | |
98 | } | |
99 | cp++; | |
100 | ss->lastseen = cp; | |
101 | css = ss; | |
102 | } | |
103 | ||
104 | /* shutdown */ | |
105 | FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); | |
106 | if (cp == v->stop && stop == v->stop) { | |
107 | if (hitstopp != NULL) | |
108 | *hitstopp = 1; | |
109 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; | |
110 | FDEBUG(("color %ld\n", (long)co)); | |
111 | ss = miss(v, d, css, co, cp, start); | |
112 | /* special case: match ended at eol? */ | |
113 | if (ss != NULL && (ss->flags&POSTSTATE)) | |
114 | return cp; | |
115 | else if (ss != NULL) | |
116 | ss->lastseen = cp; /* to be tidy */ | |
117 | } | |
118 | ||
119 | /* find last match, if any */ | |
120 | post = d->lastpost; | |
121 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
122 | if ((ss->flags&POSTSTATE) && post != ss->lastseen && | |
123 | (post == NULL || post < ss->lastseen)) | |
124 | post = ss->lastseen; | |
125 | if (post != NULL) /* found one */ | |
126 | return post - 1; | |
127 | ||
128 | return NULL; | |
129 | } | |
130 | ||
131 | /* | |
132 | - shortest - shortest-preferred matching engine | |
133 | ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, | |
134 | ^ chr **, int *); | |
135 | */ | |
136 | static chr * /* endpoint, or NULL */ | |
137 | shortest(v, d, start, min, max, coldp, hitstopp) | |
138 | struct vars *v; | |
139 | struct dfa *d; | |
140 | chr *start; /* where the match should start */ | |
141 | chr *min; /* match must end at or after here */ | |
142 | chr *max; /* match must end at or before here */ | |
143 | chr **coldp; /* store coldstart pointer here, if nonNULL */ | |
144 | int *hitstopp; /* record whether hit v->stop, if non-NULL */ | |
145 | { | |
146 | chr *cp; | |
147 | chr *realmin = (min == v->stop) ? min : min + 1; | |
148 | chr *realmax = (max == v->stop) ? max : max + 1; | |
149 | color co; | |
150 | struct sset *css; | |
151 | struct sset *ss; | |
152 | struct colormap *cm = d->cm; | |
153 | ||
154 | /* initialize */ | |
155 | css = initialize(v, d, start); | |
156 | cp = start; | |
157 | if (hitstopp != NULL) | |
158 | *hitstopp = 0; | |
159 | ||
160 | /* startup */ | |
161 | FDEBUG(("--- startup ---\n")); | |
162 | if (cp == v->start) { | |
163 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; | |
164 | FDEBUG(("color %ld\n", (long)co)); | |
165 | } else { | |
166 | co = GETCOLOR(cm, *(cp - 1)); | |
167 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); | |
168 | } | |
169 | css = miss(v, d, css, co, cp, start); | |
170 | if (css == NULL) | |
171 | return NULL; | |
172 | css->lastseen = cp; | |
173 | ss = css; | |
174 | ||
175 | /* main loop */ | |
176 | if (v->eflags®_FTRACE) | |
177 | while (cp < realmax) { | |
178 | FDEBUG(("--- at c%d ---\n", css - d->ssets)); | |
179 | co = GETCOLOR(cm, *cp); | |
180 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); | |
181 | ss = css->outs[co]; | |
182 | if (ss == NULL) { | |
183 | ss = miss(v, d, css, co, cp+1, start); | |
184 | if (ss == NULL) | |
185 | break; /* NOTE BREAK OUT */ | |
186 | } | |
187 | cp++; | |
188 | ss->lastseen = cp; | |
189 | css = ss; | |
190 | if ((ss->flags&POSTSTATE) && cp >= realmin) | |
191 | break; /* NOTE BREAK OUT */ | |
192 | } | |
193 | else | |
194 | while (cp < realmax) { | |
195 | co = GETCOLOR(cm, *cp); | |
196 | ss = css->outs[co]; | |
197 | if (ss == NULL) { | |
198 | ss = miss(v, d, css, co, cp+1, start); | |
199 | if (ss == NULL) | |
200 | break; /* NOTE BREAK OUT */ | |
201 | } | |
202 | cp++; | |
203 | ss->lastseen = cp; | |
204 | css = ss; | |
205 | if ((ss->flags&POSTSTATE) && cp >= realmin) | |
206 | break; /* NOTE BREAK OUT */ | |
207 | } | |
208 | ||
209 | if (ss == NULL) | |
210 | return NULL; | |
211 | ||
212 | if (coldp != NULL) /* report last no-progress state set, if any */ | |
213 | *coldp = lastcold(v, d); | |
214 | ||
215 | if ((ss->flags&POSTSTATE) && cp > min) { | |
216 | assert(cp >= realmin); | |
217 | cp--; | |
218 | } else if (cp == v->stop && max == v->stop) { | |
219 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; | |
220 | FDEBUG(("color %ld\n", (long)co)); | |
221 | ss = miss(v, d, css, co, cp, start); | |
222 | /* match might have ended at eol */ | |
223 | if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) | |
224 | *hitstopp = 1; | |
225 | } | |
226 | ||
227 | if (ss == NULL || !(ss->flags&POSTSTATE)) | |
228 | return NULL; | |
229 | ||
230 | return cp; | |
231 | } | |
232 | ||
233 | /* | |
234 | - lastcold - determine last point at which no progress had been made | |
235 | ^ static chr *lastcold(struct vars *, struct dfa *); | |
236 | */ | |
237 | static chr * /* endpoint, or NULL */ | |
238 | lastcold(v, d) | |
239 | struct vars *v; | |
240 | struct dfa *d; | |
241 | { | |
242 | struct sset *ss; | |
243 | chr *nopr; | |
244 | int i; | |
245 | ||
246 | nopr = d->lastnopr; | |
247 | if (nopr == NULL) | |
248 | nopr = v->start; | |
249 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
250 | if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) | |
251 | nopr = ss->lastseen; | |
252 | return nopr; | |
253 | } | |
254 | ||
255 | /* | |
256 | - newdfa - set up a fresh DFA | |
257 | ^ static struct dfa *newdfa(struct vars *, struct cnfa *, | |
258 | ^ struct colormap *, struct smalldfa *); | |
259 | */ | |
260 | static struct dfa * | |
261 | newdfa(v, cnfa, cm, small) | |
262 | struct vars *v; | |
263 | struct cnfa *cnfa; | |
264 | struct colormap *cm; | |
265 | struct smalldfa *small; /* preallocated space, may be NULL */ | |
266 | { | |
267 | struct dfa *d; | |
268 | size_t nss = cnfa->nstates * 2; | |
269 | int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; | |
270 | struct smalldfa *smallwas = small; | |
271 | ||
272 | assert(cnfa != NULL && cnfa->nstates != 0); | |
273 | ||
274 | if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) { | |
275 | assert(wordsper == 1); | |
276 | if (small == NULL) { | |
277 | small = (struct smalldfa *)MALLOC( | |
278 | sizeof(struct smalldfa)); | |
279 | if (small == NULL) { | |
280 | ERR(REG_ESPACE); | |
281 | return NULL; | |
282 | } | |
283 | } | |
284 | d = &small->dfa; | |
285 | d->ssets = small->ssets; | |
286 | d->statesarea = small->statesarea; | |
287 | d->work = &d->statesarea[nss]; | |
288 | d->outsarea = small->outsarea; | |
289 | d->incarea = small->incarea; | |
290 | d->cptsmalloced = 0; | |
291 | d->mallocarea = (smallwas == NULL) ? (char *)small : NULL; | |
292 | } else { | |
293 | d = (struct dfa *)MALLOC(sizeof(struct dfa)); | |
294 | if (d == NULL) { | |
295 | ERR(REG_ESPACE); | |
296 | return NULL; | |
297 | } | |
298 | d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset)); | |
299 | d->statesarea = (unsigned *)MALLOC((nss+WORK) * wordsper * | |
300 | sizeof(unsigned)); | |
301 | d->work = &d->statesarea[nss * wordsper]; | |
302 | d->outsarea = (struct sset **)MALLOC(nss * cnfa->ncolors * | |
303 | sizeof(struct sset *)); | |
304 | d->incarea = (struct arcp *)MALLOC(nss * cnfa->ncolors * | |
305 | sizeof(struct arcp)); | |
306 | d->cptsmalloced = 1; | |
307 | d->mallocarea = (char *)d; | |
308 | if (d->ssets == NULL || d->statesarea == NULL || | |
309 | d->outsarea == NULL || d->incarea == NULL) { | |
310 | freedfa(d); | |
311 | ERR(REG_ESPACE); | |
312 | return NULL; | |
313 | } | |
314 | } | |
315 | ||
316 | d->nssets = (v->eflags®_SMALL) ? 7 : nss; | |
317 | d->nssused = 0; | |
318 | d->nstates = cnfa->nstates; | |
319 | d->ncolors = cnfa->ncolors; | |
320 | d->wordsper = wordsper; | |
321 | d->cnfa = cnfa; | |
322 | d->cm = cm; | |
323 | d->lastpost = NULL; | |
324 | d->lastnopr = NULL; | |
325 | d->search = d->ssets; | |
326 | ||
327 | /* initialization of sset fields is done as needed */ | |
328 | ||
329 | return d; | |
330 | } | |
331 | ||
332 | /* | |
333 | - freedfa - free a DFA | |
334 | ^ static VOID freedfa(struct dfa *); | |
335 | */ | |
336 | static VOID | |
337 | freedfa(d) | |
338 | struct dfa *d; | |
339 | { | |
340 | if (d->cptsmalloced) { | |
341 | if (d->ssets != NULL) | |
342 | FREE(d->ssets); | |
343 | if (d->statesarea != NULL) | |
344 | FREE(d->statesarea); | |
345 | if (d->outsarea != NULL) | |
346 | FREE(d->outsarea); | |
347 | if (d->incarea != NULL) | |
348 | FREE(d->incarea); | |
349 | } | |
350 | ||
351 | if (d->mallocarea != NULL) | |
352 | FREE(d->mallocarea); | |
353 | } | |
354 | ||
355 | /* | |
356 | - hash - construct a hash code for a bitvector | |
357 | * There are probably better ways, but they're more expensive. | |
358 | ^ static unsigned hash(unsigned *, int); | |
359 | */ | |
360 | static unsigned | |
361 | hash(uv, n) | |
362 | unsigned *uv; | |
363 | int n; | |
364 | { | |
365 | int i; | |
366 | unsigned h; | |
367 | ||
368 | h = 0; | |
369 | for (i = 0; i < n; i++) | |
370 | h ^= uv[i]; | |
371 | return h; | |
372 | } | |
373 | ||
374 | /* | |
375 | - initialize - hand-craft a cache entry for startup, otherwise get ready | |
376 | ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); | |
377 | */ | |
378 | static struct sset * | |
379 | initialize(v, d, start) | |
380 | struct vars *v; /* used only for debug flags */ | |
381 | struct dfa *d; | |
382 | chr *start; | |
383 | { | |
384 | struct sset *ss; | |
385 | int i; | |
386 | ||
387 | /* is previous one still there? */ | |
388 | if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) | |
389 | ss = &d->ssets[0]; | |
390 | else { /* no, must (re)build it */ | |
391 | ss = getvacant(v, d, start, start); | |
392 | for (i = 0; i < d->wordsper; i++) | |
393 | ss->states[i] = 0; | |
394 | BSET(ss->states, d->cnfa->pre); | |
395 | ss->hash = HASH(ss->states, d->wordsper); | |
396 | assert(d->cnfa->pre != d->cnfa->post); | |
397 | ss->flags = STARTER|LOCKED|NOPROGRESS; | |
398 | /* lastseen dealt with below */ | |
399 | } | |
400 | ||
401 | for (i = 0; i < d->nssused; i++) | |
402 | d->ssets[i].lastseen = NULL; | |
403 | ss->lastseen = start; /* maybe untrue, but harmless */ | |
404 | d->lastpost = NULL; | |
405 | d->lastnopr = NULL; | |
406 | return ss; | |
407 | } | |
408 | ||
409 | /* | |
410 | - miss - handle a cache miss | |
411 | ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, | |
412 | ^ pcolor, chr *, chr *); | |
413 | */ | |
414 | static struct sset * /* NULL if goes to empty set */ | |
415 | miss(v, d, css, co, cp, start) | |
416 | struct vars *v; /* used only for debug flags */ | |
417 | struct dfa *d; | |
418 | struct sset *css; | |
419 | pcolor co; | |
420 | chr *cp; /* next chr */ | |
421 | chr *start; /* where the attempt got started */ | |
422 | { | |
423 | struct cnfa *cnfa = d->cnfa; | |
424 | int i; | |
425 | unsigned h; | |
426 | struct carc *ca; | |
427 | struct sset *p; | |
428 | int ispost; | |
429 | int noprogress; | |
430 | int gotstate; | |
431 | int dolacons; | |
432 | int sawlacons; | |
433 | ||
434 | /* for convenience, we can be called even if it might not be a miss */ | |
435 | if (css->outs[co] != NULL) { | |
436 | FDEBUG(("hit\n")); | |
437 | return css->outs[co]; | |
438 | } | |
439 | FDEBUG(("miss\n")); | |
440 | ||
441 | /* first, what set of states would we end up in? */ | |
442 | for (i = 0; i < d->wordsper; i++) | |
443 | d->work[i] = 0; | |
444 | ispost = 0; | |
445 | noprogress = 1; | |
446 | gotstate = 0; | |
447 | for (i = 0; i < d->nstates; i++) | |
448 | if (ISBSET(css->states, i)) | |
449 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) | |
450 | if (ca->co == co) { | |
451 | BSET(d->work, ca->to); | |
452 | gotstate = 1; | |
453 | if (ca->to == cnfa->post) | |
454 | ispost = 1; | |
455 | if (!cnfa->states[ca->to]->co) | |
456 | noprogress = 0; | |
457 | FDEBUG(("%d -> %d\n", i, ca->to)); | |
458 | } | |
459 | dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0; | |
460 | sawlacons = 0; | |
461 | while (dolacons) { /* transitive closure */ | |
462 | dolacons = 0; | |
463 | for (i = 0; i < d->nstates; i++) | |
464 | if (ISBSET(d->work, i)) | |
465 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; | |
466 | ca++) { | |
467 | if (ca->co <= cnfa->ncolors) | |
468 | continue; /* NOTE CONTINUE */ | |
469 | sawlacons = 1; | |
470 | if (ISBSET(d->work, ca->to)) | |
471 | continue; /* NOTE CONTINUE */ | |
472 | if (!lacon(v, cnfa, cp, ca->co)) | |
473 | continue; /* NOTE CONTINUE */ | |
474 | BSET(d->work, ca->to); | |
475 | dolacons = 1; | |
476 | if (ca->to == cnfa->post) | |
477 | ispost = 1; | |
478 | if (!cnfa->states[ca->to]->co) | |
479 | noprogress = 0; | |
480 | FDEBUG(("%d :> %d\n", i, ca->to)); | |
481 | } | |
482 | } | |
483 | if (!gotstate) | |
484 | return NULL; | |
485 | h = HASH(d->work, d->wordsper); | |
486 | ||
487 | /* next, is that in the cache? */ | |
488 | for (p = d->ssets, i = d->nssused; i > 0; p++, i--) | |
489 | if (HIT(h, d->work, p, d->wordsper)) { | |
490 | FDEBUG(("cached c%d\n", p - d->ssets)); | |
491 | break; /* NOTE BREAK OUT */ | |
492 | } | |
493 | if (i == 0) { /* nope, need a new cache entry */ | |
494 | p = getvacant(v, d, cp, start); | |
495 | assert(p != css); | |
496 | for (i = 0; i < d->wordsper; i++) | |
497 | p->states[i] = d->work[i]; | |
498 | p->hash = h; | |
499 | p->flags = (ispost) ? POSTSTATE : 0; | |
500 | if (noprogress) | |
501 | p->flags |= NOPROGRESS; | |
502 | /* lastseen to be dealt with by caller */ | |
503 | } | |
504 | ||
505 | if (!sawlacons) { /* lookahead conds. always cache miss */ | |
506 | FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); | |
507 | css->outs[co] = p; | |
508 | css->inchain[co] = p->ins; | |
509 | p->ins.ss = css; | |
510 | p->ins.co = (color)co; | |
511 | } | |
512 | return p; | |
513 | } | |
514 | ||
515 | /* | |
516 | - lacon - lookahead-constraint checker for miss() | |
517 | ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); | |
518 | */ | |
519 | static int /* predicate: constraint satisfied? */ | |
520 | lacon(v, pcnfa, cp, co) | |
521 | struct vars *v; | |
522 | struct cnfa *pcnfa; /* parent cnfa */ | |
523 | chr *cp; | |
524 | pcolor co; /* "color" of the lookahead constraint */ | |
525 | { | |
526 | int n; | |
527 | struct subre *sub; | |
528 | struct dfa *d; | |
529 | struct smalldfa sd; | |
530 | chr *end; | |
531 | ||
532 | n = co - pcnfa->ncolors; | |
533 | assert(n < v->g->nlacons && v->g->lacons != NULL); | |
534 | FDEBUG(("=== testing lacon %d\n", n)); | |
535 | sub = &v->g->lacons[n]; | |
536 | d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); | |
537 | if (d == NULL) { | |
538 | ERR(REG_ESPACE); | |
539 | return 0; | |
540 | } | |
541 | end = longest(v, d, cp, v->stop, (int *)NULL); | |
542 | freedfa(d); | |
543 | FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); | |
544 | return (sub->subno) ? (end != NULL) : (end == NULL); | |
545 | } | |
546 | ||
547 | /* | |
548 | - getvacant - get a vacant state set | |
549 | * This routine clears out the inarcs and outarcs, but does not otherwise | |
550 | * clear the innards of the state set -- that's up to the caller. | |
551 | ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); | |
552 | */ | |
553 | static struct sset * | |
554 | getvacant(v, d, cp, start) | |
555 | struct vars *v; /* used only for debug flags */ | |
556 | struct dfa *d; | |
557 | chr *cp; | |
558 | chr *start; | |
559 | { | |
560 | int i; | |
561 | struct sset *ss; | |
562 | struct sset *p; | |
563 | struct arcp ap; | |
564 | struct arcp lastap; | |
565 | color co; | |
566 | ||
567 | ss = pickss(v, d, cp, start); | |
568 | assert(!(ss->flags&LOCKED)); | |
569 | ||
570 | /* clear out its inarcs, including self-referential ones */ | |
571 | ap = ss->ins; | |
572 | while ((p = ap.ss) != NULL) { | |
573 | co = ap.co; | |
574 | FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); | |
575 | p->outs[co] = NULL; | |
576 | ap = p->inchain[co]; | |
577 | p->inchain[co].ss = NULL; /* paranoia */ | |
578 | } | |
579 | ss->ins.ss = NULL; | |
580 | ||
581 | /* take it off the inarc chains of the ssets reached by its outarcs */ | |
582 | for (i = 0; i < d->ncolors; i++) { | |
583 | p = ss->outs[i]; | |
584 | assert(p != ss); /* not self-referential */ | |
585 | if (p == NULL) | |
586 | continue; /* NOTE CONTINUE */ | |
587 | FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); | |
588 | if (p->ins.ss == ss && p->ins.co == i) | |
589 | p->ins = ss->inchain[i]; | |
590 | else { | |
591 | assert(p->ins.ss != NULL); | |
592 | for (ap = p->ins; ap.ss != NULL && | |
593 | !(ap.ss == ss && ap.co == i); | |
594 | ap = ap.ss->inchain[ap.co]) | |
595 | lastap = ap; | |
596 | assert(ap.ss != NULL); | |
597 | lastap.ss->inchain[lastap.co] = ss->inchain[i]; | |
598 | } | |
599 | ss->outs[i] = NULL; | |
600 | ss->inchain[i].ss = NULL; | |
601 | } | |
602 | ||
603 | /* if ss was a success state, may need to remember location */ | |
604 | if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && | |
605 | (d->lastpost == NULL || d->lastpost < ss->lastseen)) | |
606 | d->lastpost = ss->lastseen; | |
607 | ||
608 | /* likewise for a no-progress state */ | |
609 | if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr && | |
610 | (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) | |
611 | d->lastnopr = ss->lastseen; | |
612 | ||
613 | return ss; | |
614 | } | |
615 | ||
616 | /* | |
617 | - pickss - pick the next stateset to be used | |
618 | ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); | |
619 | */ | |
620 | static struct sset * | |
621 | pickss(v, d, cp, start) | |
622 | struct vars *v; /* used only for debug flags */ | |
623 | struct dfa *d; | |
624 | chr *cp; | |
625 | chr *start; | |
626 | { | |
627 | int i; | |
628 | struct sset *ss; | |
629 | struct sset *end; | |
630 | chr *ancient; | |
631 | ||
632 | /* shortcut for cases where cache isn't full */ | |
633 | if (d->nssused < d->nssets) { | |
634 | i = d->nssused; | |
635 | d->nssused++; | |
636 | ss = &d->ssets[i]; | |
637 | FDEBUG(("new c%d\n", i)); | |
638 | /* set up innards */ | |
639 | ss->states = &d->statesarea[i * d->wordsper]; | |
640 | ss->flags = 0; | |
641 | ss->ins.ss = NULL; | |
642 | ss->ins.co = WHITE; /* give it some value */ | |
643 | ss->outs = &d->outsarea[i * d->ncolors]; | |
644 | ss->inchain = &d->incarea[i * d->ncolors]; | |
645 | for (i = 0; i < d->ncolors; i++) { | |
646 | ss->outs[i] = NULL; | |
647 | ss->inchain[i].ss = NULL; | |
648 | } | |
649 | return ss; | |
650 | } | |
651 | ||
652 | /* look for oldest, or old enough anyway */ | |
653 | if (cp - start > d->nssets*2/3) /* oldest 33% are expendable */ | |
654 | ancient = cp - d->nssets*2/3; | |
655 | else | |
656 | ancient = start; | |
657 | for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) | |
658 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
659 | !(ss->flags&LOCKED)) { | |
660 | d->search = ss + 1; | |
661 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
662 | return ss; | |
663 | } | |
664 | for (ss = d->ssets, end = d->search; ss < end; ss++) | |
665 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
666 | !(ss->flags&LOCKED)) { | |
667 | d->search = ss + 1; | |
668 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
669 | return ss; | |
670 | } | |
671 | ||
672 | /* nobody's old enough?!? -- something's really wrong */ | |
673 | FDEBUG(("can't find victim to replace!\n")); | |
674 | assert(NOTREACHED); | |
675 | ERR(REG_ASSERT); | |
676 | return d->ssets; | |
677 | } |