]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * DFA routines | |
3 | * This file is #included by regexec.c. | |
4 | * | |
5 | * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. | |
6 | * | |
7 | * Development of this software was funded, in part, by Cray Research Inc., | |
8 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics | |
9 | * Corporation, none of whom are responsible for the results. The author | |
10 | * thanks all of them. | |
11 | * | |
12 | * Redistribution and use in source and binary forms -- with or without | |
13 | * modification -- are permitted for any purpose, provided that | |
14 | * redistributions in source form retain this entire copyright notice and | |
15 | * indicate the origin and nature of any modifications. | |
16 | * | |
17 | * I'd appreciate being given credit for this package in the documentation | |
18 | * of software which uses it, but that is not a requirement. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, | |
21 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
22 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL | |
23 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
26 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
27 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
28 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
29 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 | * | |
31 | */ | |
32 | ||
33 | /* | |
34 | - longest - longest-preferred matching engine | |
35 | ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); | |
36 | */ | |
37 | static chr * /* endpoint, or NULL */ | |
38 | longest(v, d, start, stop, hitstopp) | |
39 | struct vars *v; /* used only for debug and exec flags */ | |
40 | struct dfa *d; | |
41 | chr *start; /* where the match should start */ | |
42 | chr *stop; /* match must end at or before here */ | |
43 | int *hitstopp; /* record whether hit v->stop, if non-NULL */ | |
44 | { | |
45 | chr *cp; | |
46 | chr *realstop = (stop == v->stop) ? stop : stop + 1; | |
47 | color co; | |
48 | struct sset *css; | |
49 | struct sset *ss; | |
50 | chr *post; | |
51 | int i; | |
52 | struct colormap *cm = d->cm; | |
53 | ||
54 | /* initialize */ | |
55 | css = initialize(v, d, start); | |
56 | cp = start; | |
57 | if (hitstopp != NULL) | |
58 | *hitstopp = 0; | |
59 | ||
60 | /* startup */ | |
61 | FDEBUG(("+++ startup +++\n")); | |
62 | if (cp == v->start) { | |
63 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; | |
64 | FDEBUG(("color %ld\n", (long)co)); | |
65 | } else { | |
66 | co = GETCOLOR(cm, *(cp - 1)); | |
67 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); | |
68 | } | |
69 | css = miss(v, d, css, co, cp, start); | |
70 | if (css == NULL) | |
71 | return NULL; | |
72 | css->lastseen = cp; | |
73 | ||
74 | /* main loop */ | |
75 | if (v->eflags®_FTRACE) | |
76 | while (cp < realstop) { | |
77 | FDEBUG(("+++ at c%d +++\n", css - d->ssets)); | |
78 | co = GETCOLOR(cm, *cp); | |
79 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); | |
80 | ss = css->outs[co]; | |
81 | if (ss == NULL) { | |
82 | ss = miss(v, d, css, co, cp+1, start); | |
83 | if (ss == NULL) | |
84 | break; /* NOTE BREAK OUT */ | |
85 | } | |
86 | cp++; | |
87 | ss->lastseen = cp; | |
88 | css = ss; | |
89 | } | |
90 | else | |
91 | while (cp < realstop) { | |
92 | co = GETCOLOR(cm, *cp); | |
93 | ss = css->outs[co]; | |
94 | if (ss == NULL) { | |
95 | ss = miss(v, d, css, co, cp+1, start); | |
96 | if (ss == NULL) | |
97 | break; /* NOTE BREAK OUT */ | |
98 | } | |
99 | cp++; | |
100 | ss->lastseen = cp; | |
101 | css = ss; | |
102 | } | |
103 | ||
104 | /* shutdown */ | |
105 | FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); | |
106 | if (cp == v->stop && stop == v->stop) { | |
107 | if (hitstopp != NULL) | |
108 | *hitstopp = 1; | |
109 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; | |
110 | FDEBUG(("color %ld\n", (long)co)); | |
111 | ss = miss(v, d, css, co, cp, start); | |
112 | /* special case: match ended at eol? */ | |
113 | if (ss != NULL && (ss->flags&POSTSTATE)) | |
114 | return cp; | |
115 | else if (ss != NULL) | |
116 | ss->lastseen = cp; /* to be tidy */ | |
117 | } | |
118 | ||
119 | /* find last match, if any */ | |
120 | post = d->lastpost; | |
121 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
122 | if ((ss->flags&POSTSTATE) && post != ss->lastseen && | |
123 | (post == NULL || post < ss->lastseen)) | |
124 | post = ss->lastseen; | |
125 | if (post != NULL) /* found one */ | |
126 | return post - 1; | |
127 | ||
128 | return NULL; | |
129 | } | |
130 | ||
131 | /* | |
132 | - shortest - shortest-preferred matching engine | |
133 | ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, | |
134 | ^ chr **, int *); | |
135 | */ | |
136 | static chr * /* endpoint, or NULL */ | |
137 | shortest(v, d, start, min, max, coldp, hitstopp) | |
138 | struct vars *v; | |
139 | struct dfa *d; | |
140 | chr *start; /* where the match should start */ | |
141 | chr *min; /* match must end at or after here */ | |
142 | chr *max; /* match must end at or before here */ | |
143 | chr **coldp; /* store coldstart pointer here, if nonNULL */ | |
144 | int *hitstopp; /* record whether hit v->stop, if non-NULL */ | |
145 | { | |
146 | chr *cp; | |
147 | chr *realmin = (min == v->stop) ? min : min + 1; | |
148 | chr *realmax = (max == v->stop) ? max : max + 1; | |
149 | color co; | |
150 | struct sset *css; | |
151 | struct sset *ss; | |
152 | struct colormap *cm = d->cm; | |
153 | ||
154 | /* initialize */ | |
155 | css = initialize(v, d, start); | |
156 | cp = start; | |
157 | if (hitstopp != NULL) | |
158 | *hitstopp = 0; | |
159 | ||
160 | /* startup */ | |
161 | FDEBUG(("--- startup ---\n")); | |
162 | if (cp == v->start) { | |
163 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; | |
164 | FDEBUG(("color %ld\n", (long)co)); | |
165 | } else { | |
166 | co = GETCOLOR(cm, *(cp - 1)); | |
167 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); | |
168 | } | |
169 | css = miss(v, d, css, co, cp, start); | |
170 | if (css == NULL) | |
171 | return NULL; | |
172 | css->lastseen = cp; | |
173 | ss = css; | |
174 | ||
175 | /* main loop */ | |
176 | if (v->eflags®_FTRACE) | |
177 | while (cp < realmax) { | |
178 | FDEBUG(("--- at c%d ---\n", css - d->ssets)); | |
179 | co = GETCOLOR(cm, *cp); | |
180 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); | |
181 | ss = css->outs[co]; | |
182 | if (ss == NULL) { | |
183 | ss = miss(v, d, css, co, cp+1, start); | |
184 | if (ss == NULL) | |
185 | break; /* NOTE BREAK OUT */ | |
186 | } | |
187 | cp++; | |
188 | ss->lastseen = cp; | |
189 | css = ss; | |
190 | if ((ss->flags&POSTSTATE) && cp >= realmin) | |
191 | break; /* NOTE BREAK OUT */ | |
192 | } | |
193 | else | |
194 | while (cp < realmax) { | |
195 | co = GETCOLOR(cm, *cp); | |
196 | ss = css->outs[co]; | |
197 | if (ss == NULL) { | |
198 | ss = miss(v, d, css, co, cp+1, start); | |
199 | if (ss == NULL) | |
200 | break; /* NOTE BREAK OUT */ | |
201 | } | |
202 | cp++; | |
203 | ss->lastseen = cp; | |
204 | css = ss; | |
205 | if ((ss->flags&POSTSTATE) && cp >= realmin) | |
206 | break; /* NOTE BREAK OUT */ | |
207 | } | |
208 | ||
209 | if (ss == NULL) | |
210 | return NULL; | |
211 | ||
212 | if (coldp != NULL) /* report last no-progress state set, if any */ | |
213 | *coldp = lastcold(v, d); | |
214 | ||
215 | if ((ss->flags&POSTSTATE) && cp > min) { | |
216 | assert(cp >= realmin); | |
217 | cp--; | |
218 | } else if (cp == v->stop && max == v->stop) { | |
219 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; | |
220 | FDEBUG(("color %ld\n", (long)co)); | |
221 | ss = miss(v, d, css, co, cp, start); | |
222 | /* match might have ended at eol */ | |
223 | if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) | |
224 | *hitstopp = 1; | |
225 | } | |
226 | ||
227 | if (ss == NULL || !(ss->flags&POSTSTATE)) | |
228 | return NULL; | |
229 | ||
230 | return cp; | |
231 | } | |
232 | ||
233 | /* | |
234 | - lastcold - determine last point at which no progress had been made | |
235 | ^ static chr *lastcold(struct vars *, struct dfa *); | |
236 | */ | |
237 | static chr * /* endpoint, or NULL */ | |
238 | lastcold(v, d) | |
239 | struct vars *v; | |
240 | struct dfa *d; | |
241 | { | |
242 | struct sset *ss; | |
243 | chr *nopr; | |
244 | int i; | |
245 | ||
246 | nopr = d->lastnopr; | |
247 | if (nopr == NULL) | |
248 | nopr = v->start; | |
249 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
250 | if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) | |
251 | nopr = ss->lastseen; | |
252 | return nopr; | |
253 | } | |
254 | ||
255 | /* | |
256 | - newdfa - set up a fresh DFA | |
257 | ^ static struct dfa *newdfa(struct vars *, struct cnfa *, | |
258 | ^ struct colormap *, struct smalldfa *); | |
259 | */ | |
260 | ||
261 | /* FIXME Required for CW 8 on Mac since it's not in limits.h */ | |
262 | #ifndef __CHAR_BIT__ | |
263 | #define __CHAR_BIT__ 8 | |
264 | #endif | |
265 | ||
266 | static struct dfa * | |
267 | newdfa(v, cnfa, cm, small) | |
268 | struct vars *v; | |
269 | struct cnfa *cnfa; | |
270 | struct colormap *cm; | |
271 | struct smalldfa *small; /* preallocated space, may be NULL */ | |
272 | { | |
273 | struct dfa *d; | |
274 | size_t nss = cnfa->nstates * 2; | |
275 | int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; | |
276 | struct smalldfa *smallwas = small; | |
277 | ||
278 | assert(cnfa != NULL && cnfa->nstates != 0); | |
279 | ||
280 | if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) { | |
281 | assert(wordsper == 1); | |
282 | if (small == NULL) { | |
283 | small = (struct smalldfa *)MALLOC( | |
284 | sizeof(struct smalldfa)); | |
285 | if (small == NULL) { | |
286 | ERR(REG_ESPACE); | |
287 | return NULL; | |
288 | } | |
289 | } | |
290 | d = &small->dfa; | |
291 | d->ssets = small->ssets; | |
292 | d->statesarea = small->statesarea; | |
293 | d->work = &d->statesarea[nss]; | |
294 | d->outsarea = small->outsarea; | |
295 | d->incarea = small->incarea; | |
296 | d->cptsmalloced = 0; | |
297 | d->mallocarea = (smallwas == NULL) ? (char *)small : NULL; | |
298 | } else { | |
299 | d = (struct dfa *)MALLOC(sizeof(struct dfa)); | |
300 | if (d == NULL) { | |
301 | ERR(REG_ESPACE); | |
302 | return NULL; | |
303 | } | |
304 | d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset)); | |
305 | d->statesarea = (unsigned *)MALLOC((nss+WORK) * wordsper * | |
306 | sizeof(unsigned)); | |
307 | d->work = &d->statesarea[nss * wordsper]; | |
308 | d->outsarea = (struct sset **)MALLOC(nss * cnfa->ncolors * | |
309 | sizeof(struct sset *)); | |
310 | d->incarea = (struct arcp *)MALLOC(nss * cnfa->ncolors * | |
311 | sizeof(struct arcp)); | |
312 | d->cptsmalloced = 1; | |
313 | d->mallocarea = (char *)d; | |
314 | if (d->ssets == NULL || d->statesarea == NULL || | |
315 | d->outsarea == NULL || d->incarea == NULL) { | |
316 | freedfa(d); | |
317 | ERR(REG_ESPACE); | |
318 | return NULL; | |
319 | } | |
320 | } | |
321 | ||
322 | d->nssets = (v->eflags®_SMALL) ? 7 : nss; | |
323 | d->nssused = 0; | |
324 | d->nstates = cnfa->nstates; | |
325 | d->ncolors = cnfa->ncolors; | |
326 | d->wordsper = wordsper; | |
327 | d->cnfa = cnfa; | |
328 | d->cm = cm; | |
329 | d->lastpost = NULL; | |
330 | d->lastnopr = NULL; | |
331 | d->search = d->ssets; | |
332 | ||
333 | /* initialization of sset fields is done as needed */ | |
334 | ||
335 | return d; | |
336 | } | |
337 | ||
338 | /* | |
339 | - freedfa - free a DFA | |
340 | ^ static VOID freedfa(struct dfa *); | |
341 | */ | |
342 | static VOID | |
343 | freedfa(d) | |
344 | struct dfa *d; | |
345 | { | |
346 | if (d->cptsmalloced) { | |
347 | if (d->ssets != NULL) | |
348 | FREE(d->ssets); | |
349 | if (d->statesarea != NULL) | |
350 | FREE(d->statesarea); | |
351 | if (d->outsarea != NULL) | |
352 | FREE(d->outsarea); | |
353 | if (d->incarea != NULL) | |
354 | FREE(d->incarea); | |
355 | } | |
356 | ||
357 | if (d->mallocarea != NULL) | |
358 | FREE(d->mallocarea); | |
359 | } | |
360 | ||
361 | /* | |
362 | - hash - construct a hash code for a bitvector | |
363 | * There are probably better ways, but they're more expensive. | |
364 | ^ static unsigned hash(unsigned *, int); | |
365 | */ | |
366 | static unsigned | |
367 | hash(uv, n) | |
368 | unsigned *uv; | |
369 | int n; | |
370 | { | |
371 | int i; | |
372 | unsigned h; | |
373 | ||
374 | h = 0; | |
375 | for (i = 0; i < n; i++) | |
376 | h ^= uv[i]; | |
377 | return h; | |
378 | } | |
379 | ||
380 | /* | |
381 | - initialize - hand-craft a cache entry for startup, otherwise get ready | |
382 | ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); | |
383 | */ | |
384 | static struct sset * | |
385 | initialize(v, d, start) | |
386 | struct vars *v; /* used only for debug flags */ | |
387 | struct dfa *d; | |
388 | chr *start; | |
389 | { | |
390 | struct sset *ss; | |
391 | int i; | |
392 | ||
393 | /* is previous one still there? */ | |
394 | if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) | |
395 | ss = &d->ssets[0]; | |
396 | else { /* no, must (re)build it */ | |
397 | ss = getvacant(v, d, start, start); | |
398 | for (i = 0; i < d->wordsper; i++) | |
399 | ss->states[i] = 0; | |
400 | BSET(ss->states, d->cnfa->pre); | |
401 | ss->hash = HASH(ss->states, d->wordsper); | |
402 | assert(d->cnfa->pre != d->cnfa->post); | |
403 | ss->flags = STARTER|LOCKED|NOPROGRESS; | |
404 | /* lastseen dealt with below */ | |
405 | } | |
406 | ||
407 | for (i = 0; i < d->nssused; i++) | |
408 | d->ssets[i].lastseen = NULL; | |
409 | ss->lastseen = start; /* maybe untrue, but harmless */ | |
410 | d->lastpost = NULL; | |
411 | d->lastnopr = NULL; | |
412 | return ss; | |
413 | } | |
414 | ||
415 | /* | |
416 | - miss - handle a cache miss | |
417 | ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, | |
418 | ^ pcolor, chr *, chr *); | |
419 | */ | |
420 | static struct sset * /* NULL if goes to empty set */ | |
421 | miss(v, d, css, co, cp, start) | |
422 | struct vars *v; /* used only for debug flags */ | |
423 | struct dfa *d; | |
424 | struct sset *css; | |
425 | pcolor co; | |
426 | chr *cp; /* next chr */ | |
427 | chr *start; /* where the attempt got started */ | |
428 | { | |
429 | struct cnfa *cnfa = d->cnfa; | |
430 | int i; | |
431 | unsigned h; | |
432 | struct carc *ca; | |
433 | struct sset *p; | |
434 | int ispost; | |
435 | int noprogress; | |
436 | int gotstate; | |
437 | int dolacons; | |
438 | int sawlacons; | |
439 | ||
440 | /* for convenience, we can be called even if it might not be a miss */ | |
441 | if (css->outs[co] != NULL) { | |
442 | FDEBUG(("hit\n")); | |
443 | return css->outs[co]; | |
444 | } | |
445 | FDEBUG(("miss\n")); | |
446 | ||
447 | /* first, what set of states would we end up in? */ | |
448 | for (i = 0; i < d->wordsper; i++) | |
449 | d->work[i] = 0; | |
450 | ispost = 0; | |
451 | noprogress = 1; | |
452 | gotstate = 0; | |
453 | for (i = 0; i < d->nstates; i++) | |
454 | if (ISBSET(css->states, i)) | |
455 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) | |
456 | if (ca->co == co) { | |
457 | BSET(d->work, ca->to); | |
458 | gotstate = 1; | |
459 | if (ca->to == cnfa->post) | |
460 | ispost = 1; | |
461 | if (!cnfa->states[ca->to]->co) | |
462 | noprogress = 0; | |
463 | FDEBUG(("%d -> %d\n", i, ca->to)); | |
464 | } | |
465 | dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0; | |
466 | sawlacons = 0; | |
467 | while (dolacons) { /* transitive closure */ | |
468 | dolacons = 0; | |
469 | for (i = 0; i < d->nstates; i++) | |
470 | if (ISBSET(d->work, i)) | |
471 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; | |
472 | ca++) { | |
473 | if (ca->co <= cnfa->ncolors) | |
474 | continue; /* NOTE CONTINUE */ | |
475 | sawlacons = 1; | |
476 | if (ISBSET(d->work, ca->to)) | |
477 | continue; /* NOTE CONTINUE */ | |
478 | if (!lacon(v, cnfa, cp, ca->co)) | |
479 | continue; /* NOTE CONTINUE */ | |
480 | BSET(d->work, ca->to); | |
481 | dolacons = 1; | |
482 | if (ca->to == cnfa->post) | |
483 | ispost = 1; | |
484 | if (!cnfa->states[ca->to]->co) | |
485 | noprogress = 0; | |
486 | FDEBUG(("%d :> %d\n", i, ca->to)); | |
487 | } | |
488 | } | |
489 | if (!gotstate) | |
490 | return NULL; | |
491 | h = HASH(d->work, d->wordsper); | |
492 | ||
493 | /* next, is that in the cache? */ | |
494 | for (p = d->ssets, i = d->nssused; i > 0; p++, i--) | |
495 | if (HIT(h, d->work, p, d->wordsper)) { | |
496 | FDEBUG(("cached c%d\n", p - d->ssets)); | |
497 | break; /* NOTE BREAK OUT */ | |
498 | } | |
499 | if (i == 0) { /* nope, need a new cache entry */ | |
500 | p = getvacant(v, d, cp, start); | |
501 | assert(p != css); | |
502 | for (i = 0; i < d->wordsper; i++) | |
503 | p->states[i] = d->work[i]; | |
504 | p->hash = h; | |
505 | p->flags = (ispost) ? POSTSTATE : 0; | |
506 | if (noprogress) | |
507 | p->flags |= NOPROGRESS; | |
508 | /* lastseen to be dealt with by caller */ | |
509 | } | |
510 | ||
511 | if (!sawlacons) { /* lookahead conds. always cache miss */ | |
512 | FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); | |
513 | css->outs[co] = p; | |
514 | css->inchain[co] = p->ins; | |
515 | p->ins.ss = css; | |
516 | p->ins.co = (color)co; | |
517 | } | |
518 | return p; | |
519 | } | |
520 | ||
521 | /* | |
522 | - lacon - lookahead-constraint checker for miss() | |
523 | ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); | |
524 | */ | |
525 | static int /* predicate: constraint satisfied? */ | |
526 | lacon(v, pcnfa, cp, co) | |
527 | struct vars *v; | |
528 | struct cnfa *pcnfa; /* parent cnfa */ | |
529 | chr *cp; | |
530 | pcolor co; /* "color" of the lookahead constraint */ | |
531 | { | |
532 | int n; | |
533 | struct subre *sub; | |
534 | struct dfa *d; | |
535 | struct smalldfa sd; | |
536 | chr *end; | |
537 | ||
538 | n = co - pcnfa->ncolors; | |
539 | assert(n < v->g->nlacons && v->g->lacons != NULL); | |
540 | FDEBUG(("=== testing lacon %d\n", n)); | |
541 | sub = &v->g->lacons[n]; | |
542 | d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); | |
543 | if (d == NULL) { | |
544 | ERR(REG_ESPACE); | |
545 | return 0; | |
546 | } | |
547 | end = longest(v, d, cp, v->stop, (int *)NULL); | |
548 | freedfa(d); | |
549 | FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); | |
550 | return (sub->subno) ? (end != NULL) : (end == NULL); | |
551 | } | |
552 | ||
553 | /* | |
554 | - getvacant - get a vacant state set | |
555 | * This routine clears out the inarcs and outarcs, but does not otherwise | |
556 | * clear the innards of the state set -- that's up to the caller. | |
557 | ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); | |
558 | */ | |
559 | static struct sset * | |
560 | getvacant(v, d, cp, start) | |
561 | struct vars *v; /* used only for debug flags */ | |
562 | struct dfa *d; | |
563 | chr *cp; | |
564 | chr *start; | |
565 | { | |
566 | int i; | |
567 | struct sset *ss; | |
568 | struct sset *p; | |
569 | struct arcp ap; | |
570 | struct arcp lastap; | |
571 | color co; | |
572 | lastap.ss = NULL; lastap.co = 0; // WX: suppress dummy gcc warnings | |
573 | ss = pickss(v, d, cp, start); | |
574 | assert(!(ss->flags&LOCKED)); | |
575 | ||
576 | /* clear out its inarcs, including self-referential ones */ | |
577 | ap = ss->ins; | |
578 | while ((p = ap.ss) != NULL) { | |
579 | co = ap.co; | |
580 | FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); | |
581 | p->outs[co] = NULL; | |
582 | ap = p->inchain[co]; | |
583 | p->inchain[co].ss = NULL; /* paranoia */ | |
584 | } | |
585 | ss->ins.ss = NULL; | |
586 | ||
587 | /* take it off the inarc chains of the ssets reached by its outarcs */ | |
588 | for (i = 0; i < d->ncolors; i++) { | |
589 | p = ss->outs[i]; | |
590 | assert(p != ss); /* not self-referential */ | |
591 | if (p == NULL) | |
592 | continue; /* NOTE CONTINUE */ | |
593 | FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); | |
594 | if (p->ins.ss == ss && p->ins.co == i) | |
595 | p->ins = ss->inchain[i]; | |
596 | else { | |
597 | assert(p->ins.ss != NULL); | |
598 | for (ap = p->ins; ap.ss != NULL && | |
599 | !(ap.ss == ss && ap.co == i); | |
600 | ap = ap.ss->inchain[ap.co]) | |
601 | lastap = ap; | |
602 | assert(ap.ss != NULL); | |
603 | lastap.ss->inchain[lastap.co] = ss->inchain[i]; | |
604 | } | |
605 | ss->outs[i] = NULL; | |
606 | ss->inchain[i].ss = NULL; | |
607 | } | |
608 | ||
609 | /* if ss was a success state, may need to remember location */ | |
610 | if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && | |
611 | (d->lastpost == NULL || d->lastpost < ss->lastseen)) | |
612 | d->lastpost = ss->lastseen; | |
613 | ||
614 | /* likewise for a no-progress state */ | |
615 | if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr && | |
616 | (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) | |
617 | d->lastnopr = ss->lastseen; | |
618 | ||
619 | return ss; | |
620 | } | |
621 | ||
622 | /* | |
623 | - pickss - pick the next stateset to be used | |
624 | ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); | |
625 | */ | |
626 | static struct sset * | |
627 | pickss(v, d, cp, start) | |
628 | struct vars *v; /* used only for debug flags */ | |
629 | struct dfa *d; | |
630 | chr *cp; | |
631 | chr *start; | |
632 | { | |
633 | int i; | |
634 | struct sset *ss; | |
635 | struct sset *end; | |
636 | chr *ancient; | |
637 | ||
638 | /* shortcut for cases where cache isn't full */ | |
639 | if (d->nssused < d->nssets) { | |
640 | i = d->nssused; | |
641 | d->nssused++; | |
642 | ss = &d->ssets[i]; | |
643 | FDEBUG(("new c%d\n", i)); | |
644 | /* set up innards */ | |
645 | ss->states = &d->statesarea[i * d->wordsper]; | |
646 | ss->flags = 0; | |
647 | ss->ins.ss = NULL; | |
648 | ss->ins.co = WHITE; /* give it some value */ | |
649 | ss->outs = &d->outsarea[i * d->ncolors]; | |
650 | ss->inchain = &d->incarea[i * d->ncolors]; | |
651 | for (i = 0; i < d->ncolors; i++) { | |
652 | ss->outs[i] = NULL; | |
653 | ss->inchain[i].ss = NULL; | |
654 | } | |
655 | return ss; | |
656 | } | |
657 | ||
658 | /* look for oldest, or old enough anyway */ | |
659 | if (cp - start > d->nssets*2/3) /* oldest 33% are expendable */ | |
660 | ancient = cp - d->nssets*2/3; | |
661 | else | |
662 | ancient = start; | |
663 | for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) | |
664 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
665 | !(ss->flags&LOCKED)) { | |
666 | d->search = ss + 1; | |
667 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
668 | return ss; | |
669 | } | |
670 | for (ss = d->ssets, end = d->search; ss < end; ss++) | |
671 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
672 | !(ss->flags&LOCKED)) { | |
673 | d->search = ss + 1; | |
674 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
675 | return ss; | |
676 | } | |
677 | ||
678 | /* nobody's old enough?!? -- something's really wrong */ | |
679 | FDEBUG(("can't find victim to replace!\n")); | |
680 | assert(NOTREACHED); | |
681 | ERR(REG_ASSERT); | |
682 | return d->ssets; | |
683 | } |