Commit | Line | Data |
---|---|---|
48bf5def RN |
1 | /* |
2 | * DFA routines | |
3 | * This file is #included by regexec.c. | |
4 | * | |
3ca4086b VS |
5 | * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. |
6 | * | |
48bf5def RN |
7 | * Development of this software was funded, in part, by Cray Research Inc., |
8 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics | |
9 | * Corporation, none of whom are responsible for the results. The author | |
3ca4086b VS |
10 | * thanks all of them. |
11 | * | |
48bf5def RN |
12 | * Redistribution and use in source and binary forms -- with or without |
13 | * modification -- are permitted for any purpose, provided that | |
14 | * redistributions in source form retain this entire copyright notice and | |
15 | * indicate the origin and nature of any modifications. | |
3ca4086b | 16 | * |
48bf5def RN |
17 | * I'd appreciate being given credit for this package in the documentation |
18 | * of software which uses it, but that is not a requirement. | |
3ca4086b | 19 | * |
48bf5def RN |
20 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
21 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
22 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL | |
23 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
26 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
27 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
28 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
29 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
30 | * | |
48bf5def RN |
31 | */ |
32 | ||
33 | /* | |
3ca4086b VS |
34 | - longest - longest-preferred matching engine |
35 | ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); | |
48bf5def | 36 | */ |
3ca4086b VS |
37 | static chr * /* endpoint, or NULL */ |
38 | longest(v, d, start, stop, hitstopp) | |
39 | struct vars *v; /* used only for debug and exec flags */ | |
40 | struct dfa *d; | |
41 | chr *start; /* where the match should start */ | |
42 | chr *stop; /* match must end at or before here */ | |
43 | int *hitstopp; /* record whether hit v->stop, if non-NULL */ | |
48bf5def | 44 | { |
3ca4086b VS |
45 | chr *cp; |
46 | chr *realstop = (stop == v->stop) ? stop : stop + 1; | |
47 | color co; | |
48bf5def RN |
48 | struct sset *css; |
49 | struct sset *ss; | |
3ca4086b VS |
50 | chr *post; |
51 | int i; | |
48bf5def RN |
52 | struct colormap *cm = d->cm; |
53 | ||
54 | /* initialize */ | |
55 | css = initialize(v, d, start); | |
56 | cp = start; | |
57 | if (hitstopp != NULL) | |
58 | *hitstopp = 0; | |
59 | ||
60 | /* startup */ | |
61 | FDEBUG(("+++ startup +++\n")); | |
3ca4086b VS |
62 | if (cp == v->start) { |
63 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; | |
64 | FDEBUG(("color %ld\n", (long)co)); | |
65 | } else { | |
48bf5def | 66 | co = GETCOLOR(cm, *(cp - 1)); |
3ca4086b | 67 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); |
48bf5def RN |
68 | } |
69 | css = miss(v, d, css, co, cp, start); | |
70 | if (css == NULL) | |
71 | return NULL; | |
72 | css->lastseen = cp; | |
73 | ||
74 | /* main loop */ | |
3ca4086b VS |
75 | if (v->eflags®_FTRACE) |
76 | while (cp < realstop) { | |
48bf5def RN |
77 | FDEBUG(("+++ at c%d +++\n", css - d->ssets)); |
78 | co = GETCOLOR(cm, *cp); | |
3ca4086b | 79 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); |
48bf5def | 80 | ss = css->outs[co]; |
3ca4086b VS |
81 | if (ss == NULL) { |
82 | ss = miss(v, d, css, co, cp+1, start); | |
48bf5def | 83 | if (ss == NULL) |
3ca4086b | 84 | break; /* NOTE BREAK OUT */ |
48bf5def RN |
85 | } |
86 | cp++; | |
87 | ss->lastseen = cp; | |
88 | css = ss; | |
89 | } | |
90 | else | |
3ca4086b | 91 | while (cp < realstop) { |
48bf5def RN |
92 | co = GETCOLOR(cm, *cp); |
93 | ss = css->outs[co]; | |
3ca4086b VS |
94 | if (ss == NULL) { |
95 | ss = miss(v, d, css, co, cp+1, start); | |
48bf5def | 96 | if (ss == NULL) |
3ca4086b | 97 | break; /* NOTE BREAK OUT */ |
48bf5def RN |
98 | } |
99 | cp++; | |
100 | ss->lastseen = cp; | |
101 | css = ss; | |
102 | } | |
103 | ||
104 | /* shutdown */ | |
105 | FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); | |
3ca4086b | 106 | if (cp == v->stop && stop == v->stop) { |
48bf5def RN |
107 | if (hitstopp != NULL) |
108 | *hitstopp = 1; | |
3ca4086b VS |
109 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; |
110 | FDEBUG(("color %ld\n", (long)co)); | |
48bf5def RN |
111 | ss = miss(v, d, css, co, cp, start); |
112 | /* special case: match ended at eol? */ | |
3ca4086b | 113 | if (ss != NULL && (ss->flags&POSTSTATE)) |
48bf5def RN |
114 | return cp; |
115 | else if (ss != NULL) | |
116 | ss->lastseen = cp; /* to be tidy */ | |
117 | } | |
118 | ||
119 | /* find last match, if any */ | |
120 | post = d->lastpost; | |
121 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
3ca4086b VS |
122 | if ((ss->flags&POSTSTATE) && post != ss->lastseen && |
123 | (post == NULL || post < ss->lastseen)) | |
48bf5def | 124 | post = ss->lastseen; |
3ca4086b | 125 | if (post != NULL) /* found one */ |
48bf5def RN |
126 | return post - 1; |
127 | ||
128 | return NULL; | |
129 | } | |
130 | ||
131 | /* | |
3ca4086b VS |
132 | - shortest - shortest-preferred matching engine |
133 | ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, | |
134 | ^ chr **, int *); | |
48bf5def | 135 | */ |
3ca4086b VS |
136 | static chr * /* endpoint, or NULL */ |
137 | shortest(v, d, start, min, max, coldp, hitstopp) | |
138 | struct vars *v; | |
139 | struct dfa *d; | |
140 | chr *start; /* where the match should start */ | |
141 | chr *min; /* match must end at or after here */ | |
142 | chr *max; /* match must end at or before here */ | |
143 | chr **coldp; /* store coldstart pointer here, if nonNULL */ | |
144 | int *hitstopp; /* record whether hit v->stop, if non-NULL */ | |
48bf5def | 145 | { |
3ca4086b VS |
146 | chr *cp; |
147 | chr *realmin = (min == v->stop) ? min : min + 1; | |
148 | chr *realmax = (max == v->stop) ? max : max + 1; | |
149 | color co; | |
48bf5def RN |
150 | struct sset *css; |
151 | struct sset *ss; | |
152 | struct colormap *cm = d->cm; | |
153 | ||
154 | /* initialize */ | |
155 | css = initialize(v, d, start); | |
156 | cp = start; | |
157 | if (hitstopp != NULL) | |
158 | *hitstopp = 0; | |
159 | ||
160 | /* startup */ | |
161 | FDEBUG(("--- startup ---\n")); | |
3ca4086b VS |
162 | if (cp == v->start) { |
163 | co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; | |
164 | FDEBUG(("color %ld\n", (long)co)); | |
165 | } else { | |
48bf5def | 166 | co = GETCOLOR(cm, *(cp - 1)); |
3ca4086b | 167 | FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); |
48bf5def RN |
168 | } |
169 | css = miss(v, d, css, co, cp, start); | |
170 | if (css == NULL) | |
171 | return NULL; | |
172 | css->lastseen = cp; | |
173 | ss = css; | |
174 | ||
175 | /* main loop */ | |
3ca4086b VS |
176 | if (v->eflags®_FTRACE) |
177 | while (cp < realmax) { | |
48bf5def RN |
178 | FDEBUG(("--- at c%d ---\n", css - d->ssets)); |
179 | co = GETCOLOR(cm, *cp); | |
3ca4086b | 180 | FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); |
48bf5def | 181 | ss = css->outs[co]; |
3ca4086b VS |
182 | if (ss == NULL) { |
183 | ss = miss(v, d, css, co, cp+1, start); | |
48bf5def | 184 | if (ss == NULL) |
3ca4086b | 185 | break; /* NOTE BREAK OUT */ |
48bf5def RN |
186 | } |
187 | cp++; | |
188 | ss->lastseen = cp; | |
189 | css = ss; | |
3ca4086b VS |
190 | if ((ss->flags&POSTSTATE) && cp >= realmin) |
191 | break; /* NOTE BREAK OUT */ | |
48bf5def RN |
192 | } |
193 | else | |
3ca4086b | 194 | while (cp < realmax) { |
48bf5def RN |
195 | co = GETCOLOR(cm, *cp); |
196 | ss = css->outs[co]; | |
3ca4086b VS |
197 | if (ss == NULL) { |
198 | ss = miss(v, d, css, co, cp+1, start); | |
48bf5def | 199 | if (ss == NULL) |
3ca4086b | 200 | break; /* NOTE BREAK OUT */ |
48bf5def RN |
201 | } |
202 | cp++; | |
203 | ss->lastseen = cp; | |
204 | css = ss; | |
3ca4086b VS |
205 | if ((ss->flags&POSTSTATE) && cp >= realmin) |
206 | break; /* NOTE BREAK OUT */ | |
48bf5def RN |
207 | } |
208 | ||
209 | if (ss == NULL) | |
210 | return NULL; | |
211 | ||
3ca4086b | 212 | if (coldp != NULL) /* report last no-progress state set, if any */ |
48bf5def RN |
213 | *coldp = lastcold(v, d); |
214 | ||
3ca4086b | 215 | if ((ss->flags&POSTSTATE) && cp > min) { |
48bf5def RN |
216 | assert(cp >= realmin); |
217 | cp--; | |
3ca4086b VS |
218 | } else if (cp == v->stop && max == v->stop) { |
219 | co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; | |
220 | FDEBUG(("color %ld\n", (long)co)); | |
48bf5def RN |
221 | ss = miss(v, d, css, co, cp, start); |
222 | /* match might have ended at eol */ | |
3ca4086b | 223 | if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) |
48bf5def RN |
224 | *hitstopp = 1; |
225 | } | |
226 | ||
3ca4086b | 227 | if (ss == NULL || !(ss->flags&POSTSTATE)) |
48bf5def RN |
228 | return NULL; |
229 | ||
230 | return cp; | |
231 | } | |
232 | ||
233 | /* | |
3ca4086b VS |
234 | - lastcold - determine last point at which no progress had been made |
235 | ^ static chr *lastcold(struct vars *, struct dfa *); | |
48bf5def | 236 | */ |
3ca4086b VS |
237 | static chr * /* endpoint, or NULL */ |
238 | lastcold(v, d) | |
239 | struct vars *v; | |
240 | struct dfa *d; | |
48bf5def RN |
241 | { |
242 | struct sset *ss; | |
3ca4086b VS |
243 | chr *nopr; |
244 | int i; | |
48bf5def RN |
245 | |
246 | nopr = d->lastnopr; | |
247 | if (nopr == NULL) | |
248 | nopr = v->start; | |
249 | for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) | |
3ca4086b | 250 | if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) |
48bf5def RN |
251 | nopr = ss->lastseen; |
252 | return nopr; | |
253 | } | |
254 | ||
255 | /* | |
3ca4086b VS |
256 | - newdfa - set up a fresh DFA |
257 | ^ static struct dfa *newdfa(struct vars *, struct cnfa *, | |
258 | ^ struct colormap *, struct smalldfa *); | |
48bf5def | 259 | */ |
0f0fd26d JS |
260 | |
261 | /* FIXME Required for CW 8 on Mac since it's not in limits.h */ | |
262 | #ifndef __CHAR_BIT__ | |
263 | #define __CHAR_BIT__ 8 | |
264 | #endif | |
265 | ||
48bf5def | 266 | static struct dfa * |
3ca4086b VS |
267 | newdfa(v, cnfa, cm, small) |
268 | struct vars *v; | |
269 | struct cnfa *cnfa; | |
270 | struct colormap *cm; | |
271 | struct smalldfa *small; /* preallocated space, may be NULL */ | |
48bf5def RN |
272 | { |
273 | struct dfa *d; | |
3ca4086b VS |
274 | size_t nss = cnfa->nstates * 2; |
275 | int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; | |
48bf5def RN |
276 | struct smalldfa *smallwas = small; |
277 | ||
278 | assert(cnfa != NULL && cnfa->nstates != 0); | |
279 | ||
3ca4086b | 280 | if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) { |
48bf5def | 281 | assert(wordsper == 1); |
3ca4086b VS |
282 | if (small == NULL) { |
283 | small = (struct smalldfa *)MALLOC( | |
284 | sizeof(struct smalldfa)); | |
285 | if (small == NULL) { | |
48bf5def RN |
286 | ERR(REG_ESPACE); |
287 | return NULL; | |
288 | } | |
289 | } | |
290 | d = &small->dfa; | |
291 | d->ssets = small->ssets; | |
292 | d->statesarea = small->statesarea; | |
293 | d->work = &d->statesarea[nss]; | |
294 | d->outsarea = small->outsarea; | |
295 | d->incarea = small->incarea; | |
296 | d->cptsmalloced = 0; | |
3ca4086b VS |
297 | d->mallocarea = (smallwas == NULL) ? (char *)small : NULL; |
298 | } else { | |
299 | d = (struct dfa *)MALLOC(sizeof(struct dfa)); | |
300 | if (d == NULL) { | |
48bf5def RN |
301 | ERR(REG_ESPACE); |
302 | return NULL; | |
303 | } | |
3ca4086b VS |
304 | d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset)); |
305 | d->statesarea = (unsigned *)MALLOC((nss+WORK) * wordsper * | |
306 | sizeof(unsigned)); | |
48bf5def | 307 | d->work = &d->statesarea[nss * wordsper]; |
3ca4086b VS |
308 | d->outsarea = (struct sset **)MALLOC(nss * cnfa->ncolors * |
309 | sizeof(struct sset *)); | |
310 | d->incarea = (struct arcp *)MALLOC(nss * cnfa->ncolors * | |
311 | sizeof(struct arcp)); | |
48bf5def | 312 | d->cptsmalloced = 1; |
3ca4086b | 313 | d->mallocarea = (char *)d; |
48bf5def | 314 | if (d->ssets == NULL || d->statesarea == NULL || |
3ca4086b | 315 | d->outsarea == NULL || d->incarea == NULL) { |
48bf5def RN |
316 | freedfa(d); |
317 | ERR(REG_ESPACE); | |
318 | return NULL; | |
319 | } | |
320 | } | |
321 | ||
3ca4086b | 322 | d->nssets = (v->eflags®_SMALL) ? 7 : nss; |
48bf5def RN |
323 | d->nssused = 0; |
324 | d->nstates = cnfa->nstates; | |
325 | d->ncolors = cnfa->ncolors; | |
326 | d->wordsper = wordsper; | |
327 | d->cnfa = cnfa; | |
328 | d->cm = cm; | |
329 | d->lastpost = NULL; | |
330 | d->lastnopr = NULL; | |
331 | d->search = d->ssets; | |
332 | ||
333 | /* initialization of sset fields is done as needed */ | |
334 | ||
335 | return d; | |
336 | } | |
337 | ||
338 | /* | |
3ca4086b VS |
339 | - freedfa - free a DFA |
340 | ^ static VOID freedfa(struct dfa *); | |
48bf5def | 341 | */ |
3ca4086b VS |
342 | static VOID |
343 | freedfa(d) | |
344 | struct dfa *d; | |
48bf5def | 345 | { |
3ca4086b | 346 | if (d->cptsmalloced) { |
48bf5def RN |
347 | if (d->ssets != NULL) |
348 | FREE(d->ssets); | |
349 | if (d->statesarea != NULL) | |
350 | FREE(d->statesarea); | |
351 | if (d->outsarea != NULL) | |
352 | FREE(d->outsarea); | |
353 | if (d->incarea != NULL) | |
354 | FREE(d->incarea); | |
355 | } | |
356 | ||
357 | if (d->mallocarea != NULL) | |
358 | FREE(d->mallocarea); | |
359 | } | |
360 | ||
361 | /* | |
3ca4086b | 362 | - hash - construct a hash code for a bitvector |
48bf5def | 363 | * There are probably better ways, but they're more expensive. |
3ca4086b | 364 | ^ static unsigned hash(unsigned *, int); |
48bf5def RN |
365 | */ |
366 | static unsigned | |
3ca4086b VS |
367 | hash(uv, n) |
368 | unsigned *uv; | |
369 | int n; | |
48bf5def | 370 | { |
3ca4086b VS |
371 | int i; |
372 | unsigned h; | |
48bf5def RN |
373 | |
374 | h = 0; | |
375 | for (i = 0; i < n; i++) | |
376 | h ^= uv[i]; | |
377 | return h; | |
378 | } | |
379 | ||
380 | /* | |
3ca4086b VS |
381 | - initialize - hand-craft a cache entry for startup, otherwise get ready |
382 | ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); | |
48bf5def RN |
383 | */ |
384 | static struct sset * | |
3ca4086b VS |
385 | initialize(v, d, start) |
386 | struct vars *v; /* used only for debug flags */ | |
387 | struct dfa *d; | |
388 | chr *start; | |
48bf5def RN |
389 | { |
390 | struct sset *ss; | |
3ca4086b | 391 | int i; |
48bf5def RN |
392 | |
393 | /* is previous one still there? */ | |
3ca4086b | 394 | if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) |
48bf5def | 395 | ss = &d->ssets[0]; |
3ca4086b | 396 | else { /* no, must (re)build it */ |
48bf5def RN |
397 | ss = getvacant(v, d, start, start); |
398 | for (i = 0; i < d->wordsper; i++) | |
399 | ss->states[i] = 0; | |
400 | BSET(ss->states, d->cnfa->pre); | |
401 | ss->hash = HASH(ss->states, d->wordsper); | |
402 | assert(d->cnfa->pre != d->cnfa->post); | |
3ca4086b | 403 | ss->flags = STARTER|LOCKED|NOPROGRESS; |
48bf5def RN |
404 | /* lastseen dealt with below */ |
405 | } | |
406 | ||
407 | for (i = 0; i < d->nssused; i++) | |
408 | d->ssets[i].lastseen = NULL; | |
409 | ss->lastseen = start; /* maybe untrue, but harmless */ | |
410 | d->lastpost = NULL; | |
411 | d->lastnopr = NULL; | |
412 | return ss; | |
413 | } | |
414 | ||
415 | /* | |
3ca4086b VS |
416 | - miss - handle a cache miss |
417 | ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, | |
418 | ^ pcolor, chr *, chr *); | |
48bf5def | 419 | */ |
3ca4086b VS |
420 | static struct sset * /* NULL if goes to empty set */ |
421 | miss(v, d, css, co, cp, start) | |
422 | struct vars *v; /* used only for debug flags */ | |
423 | struct dfa *d; | |
424 | struct sset *css; | |
425 | pcolor co; | |
426 | chr *cp; /* next chr */ | |
427 | chr *start; /* where the attempt got started */ | |
48bf5def RN |
428 | { |
429 | struct cnfa *cnfa = d->cnfa; | |
3ca4086b VS |
430 | int i; |
431 | unsigned h; | |
48bf5def RN |
432 | struct carc *ca; |
433 | struct sset *p; | |
3ca4086b VS |
434 | int ispost; |
435 | int noprogress; | |
436 | int gotstate; | |
437 | int dolacons; | |
438 | int sawlacons; | |
48bf5def RN |
439 | |
440 | /* for convenience, we can be called even if it might not be a miss */ | |
3ca4086b | 441 | if (css->outs[co] != NULL) { |
48bf5def RN |
442 | FDEBUG(("hit\n")); |
443 | return css->outs[co]; | |
444 | } | |
445 | FDEBUG(("miss\n")); | |
446 | ||
447 | /* first, what set of states would we end up in? */ | |
448 | for (i = 0; i < d->wordsper; i++) | |
449 | d->work[i] = 0; | |
450 | ispost = 0; | |
451 | noprogress = 1; | |
452 | gotstate = 0; | |
453 | for (i = 0; i < d->nstates; i++) | |
454 | if (ISBSET(css->states, i)) | |
3ca4086b VS |
455 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) |
456 | if (ca->co == co) { | |
48bf5def RN |
457 | BSET(d->work, ca->to); |
458 | gotstate = 1; | |
459 | if (ca->to == cnfa->post) | |
460 | ispost = 1; | |
461 | if (!cnfa->states[ca->to]->co) | |
462 | noprogress = 0; | |
463 | FDEBUG(("%d -> %d\n", i, ca->to)); | |
464 | } | |
3ca4086b | 465 | dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0; |
48bf5def | 466 | sawlacons = 0; |
3ca4086b | 467 | while (dolacons) { /* transitive closure */ |
48bf5def RN |
468 | dolacons = 0; |
469 | for (i = 0; i < d->nstates; i++) | |
470 | if (ISBSET(d->work, i)) | |
3ca4086b VS |
471 | for (ca = cnfa->states[i]+1; ca->co != COLORLESS; |
472 | ca++) { | |
48bf5def | 473 | if (ca->co <= cnfa->ncolors) |
3ca4086b | 474 | continue; /* NOTE CONTINUE */ |
48bf5def RN |
475 | sawlacons = 1; |
476 | if (ISBSET(d->work, ca->to)) | |
3ca4086b | 477 | continue; /* NOTE CONTINUE */ |
48bf5def | 478 | if (!lacon(v, cnfa, cp, ca->co)) |
3ca4086b | 479 | continue; /* NOTE CONTINUE */ |
48bf5def RN |
480 | BSET(d->work, ca->to); |
481 | dolacons = 1; | |
482 | if (ca->to == cnfa->post) | |
483 | ispost = 1; | |
484 | if (!cnfa->states[ca->to]->co) | |
485 | noprogress = 0; | |
486 | FDEBUG(("%d :> %d\n", i, ca->to)); | |
487 | } | |
488 | } | |
489 | if (!gotstate) | |
490 | return NULL; | |
491 | h = HASH(d->work, d->wordsper); | |
492 | ||
493 | /* next, is that in the cache? */ | |
494 | for (p = d->ssets, i = d->nssused; i > 0; p++, i--) | |
3ca4086b | 495 | if (HIT(h, d->work, p, d->wordsper)) { |
48bf5def | 496 | FDEBUG(("cached c%d\n", p - d->ssets)); |
3ca4086b | 497 | break; /* NOTE BREAK OUT */ |
48bf5def | 498 | } |
3ca4086b | 499 | if (i == 0) { /* nope, need a new cache entry */ |
48bf5def RN |
500 | p = getvacant(v, d, cp, start); |
501 | assert(p != css); | |
502 | for (i = 0; i < d->wordsper; i++) | |
503 | p->states[i] = d->work[i]; | |
504 | p->hash = h; | |
505 | p->flags = (ispost) ? POSTSTATE : 0; | |
506 | if (noprogress) | |
507 | p->flags |= NOPROGRESS; | |
508 | /* lastseen to be dealt with by caller */ | |
509 | } | |
510 | ||
3ca4086b | 511 | if (!sawlacons) { /* lookahead conds. always cache miss */ |
48bf5def RN |
512 | FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); |
513 | css->outs[co] = p; | |
514 | css->inchain[co] = p->ins; | |
515 | p->ins.ss = css; | |
3ca4086b | 516 | p->ins.co = (color)co; |
48bf5def RN |
517 | } |
518 | return p; | |
519 | } | |
520 | ||
521 | /* | |
3ca4086b VS |
522 | - lacon - lookahead-constraint checker for miss() |
523 | ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); | |
48bf5def | 524 | */ |
3ca4086b VS |
525 | static int /* predicate: constraint satisfied? */ |
526 | lacon(v, pcnfa, cp, co) | |
527 | struct vars *v; | |
528 | struct cnfa *pcnfa; /* parent cnfa */ | |
529 | chr *cp; | |
530 | pcolor co; /* "color" of the lookahead constraint */ | |
48bf5def | 531 | { |
3ca4086b | 532 | int n; |
48bf5def RN |
533 | struct subre *sub; |
534 | struct dfa *d; | |
535 | struct smalldfa sd; | |
3ca4086b | 536 | chr *end; |
48bf5def RN |
537 | |
538 | n = co - pcnfa->ncolors; | |
539 | assert(n < v->g->nlacons && v->g->lacons != NULL); | |
540 | FDEBUG(("=== testing lacon %d\n", n)); | |
541 | sub = &v->g->lacons[n]; | |
542 | d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); | |
3ca4086b | 543 | if (d == NULL) { |
48bf5def RN |
544 | ERR(REG_ESPACE); |
545 | return 0; | |
546 | } | |
3ca4086b | 547 | end = longest(v, d, cp, v->stop, (int *)NULL); |
48bf5def RN |
548 | freedfa(d); |
549 | FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); | |
550 | return (sub->subno) ? (end != NULL) : (end == NULL); | |
551 | } | |
552 | ||
553 | /* | |
3ca4086b | 554 | - getvacant - get a vacant state set |
48bf5def RN |
555 | * This routine clears out the inarcs and outarcs, but does not otherwise |
556 | * clear the innards of the state set -- that's up to the caller. | |
3ca4086b | 557 | ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); |
48bf5def RN |
558 | */ |
559 | static struct sset * | |
3ca4086b VS |
560 | getvacant(v, d, cp, start) |
561 | struct vars *v; /* used only for debug flags */ | |
562 | struct dfa *d; | |
563 | chr *cp; | |
564 | chr *start; | |
48bf5def | 565 | { |
3ca4086b | 566 | int i; |
48bf5def RN |
567 | struct sset *ss; |
568 | struct sset *p; | |
569 | struct arcp ap; | |
570 | struct arcp lastap; | |
3ca4086b | 571 | color co; |
ceed155e | 572 | lastap.ss = NULL; lastap.co = 0; // WX: suppress dummy gcc warnings |
48bf5def | 573 | ss = pickss(v, d, cp, start); |
3ca4086b | 574 | assert(!(ss->flags&LOCKED)); |
48bf5def RN |
575 | |
576 | /* clear out its inarcs, including self-referential ones */ | |
577 | ap = ss->ins; | |
3ca4086b | 578 | while ((p = ap.ss) != NULL) { |
48bf5def | 579 | co = ap.co; |
3ca4086b | 580 | FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); |
48bf5def RN |
581 | p->outs[co] = NULL; |
582 | ap = p->inchain[co]; | |
3ca4086b | 583 | p->inchain[co].ss = NULL; /* paranoia */ |
48bf5def RN |
584 | } |
585 | ss->ins.ss = NULL; | |
586 | ||
587 | /* take it off the inarc chains of the ssets reached by its outarcs */ | |
3ca4086b | 588 | for (i = 0; i < d->ncolors; i++) { |
48bf5def RN |
589 | p = ss->outs[i]; |
590 | assert(p != ss); /* not self-referential */ | |
591 | if (p == NULL) | |
3ca4086b | 592 | continue; /* NOTE CONTINUE */ |
48bf5def RN |
593 | FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); |
594 | if (p->ins.ss == ss && p->ins.co == i) | |
595 | p->ins = ss->inchain[i]; | |
3ca4086b | 596 | else { |
48bf5def RN |
597 | assert(p->ins.ss != NULL); |
598 | for (ap = p->ins; ap.ss != NULL && | |
3ca4086b VS |
599 | !(ap.ss == ss && ap.co == i); |
600 | ap = ap.ss->inchain[ap.co]) | |
48bf5def RN |
601 | lastap = ap; |
602 | assert(ap.ss != NULL); | |
603 | lastap.ss->inchain[lastap.co] = ss->inchain[i]; | |
604 | } | |
605 | ss->outs[i] = NULL; | |
606 | ss->inchain[i].ss = NULL; | |
607 | } | |
608 | ||
609 | /* if ss was a success state, may need to remember location */ | |
3ca4086b VS |
610 | if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && |
611 | (d->lastpost == NULL || d->lastpost < ss->lastseen)) | |
48bf5def RN |
612 | d->lastpost = ss->lastseen; |
613 | ||
614 | /* likewise for a no-progress state */ | |
3ca4086b VS |
615 | if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr && |
616 | (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) | |
48bf5def RN |
617 | d->lastnopr = ss->lastseen; |
618 | ||
619 | return ss; | |
620 | } | |
621 | ||
622 | /* | |
3ca4086b VS |
623 | - pickss - pick the next stateset to be used |
624 | ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); | |
48bf5def RN |
625 | */ |
626 | static struct sset * | |
3ca4086b VS |
627 | pickss(v, d, cp, start) |
628 | struct vars *v; /* used only for debug flags */ | |
629 | struct dfa *d; | |
630 | chr *cp; | |
631 | chr *start; | |
48bf5def | 632 | { |
3ca4086b | 633 | int i; |
48bf5def RN |
634 | struct sset *ss; |
635 | struct sset *end; | |
3ca4086b | 636 | chr *ancient; |
48bf5def RN |
637 | |
638 | /* shortcut for cases where cache isn't full */ | |
3ca4086b | 639 | if (d->nssused < d->nssets) { |
48bf5def RN |
640 | i = d->nssused; |
641 | d->nssused++; | |
642 | ss = &d->ssets[i]; | |
643 | FDEBUG(("new c%d\n", i)); | |
644 | /* set up innards */ | |
645 | ss->states = &d->statesarea[i * d->wordsper]; | |
646 | ss->flags = 0; | |
647 | ss->ins.ss = NULL; | |
648 | ss->ins.co = WHITE; /* give it some value */ | |
649 | ss->outs = &d->outsarea[i * d->ncolors]; | |
650 | ss->inchain = &d->incarea[i * d->ncolors]; | |
3ca4086b | 651 | for (i = 0; i < d->ncolors; i++) { |
48bf5def RN |
652 | ss->outs[i] = NULL; |
653 | ss->inchain[i].ss = NULL; | |
654 | } | |
655 | return ss; | |
656 | } | |
657 | ||
658 | /* look for oldest, or old enough anyway */ | |
3ca4086b VS |
659 | if (cp - start > d->nssets*2/3) /* oldest 33% are expendable */ |
660 | ancient = cp - d->nssets*2/3; | |
48bf5def RN |
661 | else |
662 | ancient = start; | |
663 | for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) | |
664 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
3ca4086b | 665 | !(ss->flags&LOCKED)) { |
48bf5def RN |
666 | d->search = ss + 1; |
667 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
668 | return ss; | |
669 | } | |
670 | for (ss = d->ssets, end = d->search; ss < end; ss++) | |
671 | if ((ss->lastseen == NULL || ss->lastseen < ancient) && | |
3ca4086b | 672 | !(ss->flags&LOCKED)) { |
48bf5def RN |
673 | d->search = ss + 1; |
674 | FDEBUG(("replacing c%d\n", ss - d->ssets)); | |
675 | return ss; | |
676 | } | |
677 | ||
678 | /* nobody's old enough?!? -- something's really wrong */ | |
679 | FDEBUG(("can't find victim to replace!\n")); | |
680 | assert(NOTREACHED); | |
681 | ERR(REG_ASSERT); | |
682 | return d->ssets; | |
19f0995a | 683 | } |