]> git.saurik.com Git - wxWidgets.git/blob - src/regex/regc_nfa.c
added raw key code and flags support (based on patch from Bryce Denney)
[wxWidgets.git] / src / regex / regc_nfa.c
1 /*
2 * NFA utilities.
3 * This file is #included by regcomp.c.
4 *
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 *
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
10 * thanks all of them.
11 *
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
16 *
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
19 *
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * $Header$
32 *
33 *
34 * One or two things that technically ought to be in here
35 * are actually in color.c, thanks to some incestuous relationships in
36 * the color chains.
37 */
38
39 #define NISERR() VISERR(nfa->v)
40 #define NERR(e) VERR(nfa->v, (e))
41
42
43 /*
44 * newnfa - set up an NFA
45 */
46 static struct nfa * /* the NFA, or NULL */
47 newnfa(struct vars * v,
48 struct colormap * cm,
49 struct nfa * parent) /* NULL if primary NFA */
50 {
51 struct nfa *nfa;
52
53 nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
54 if (nfa == NULL)
55 return NULL;
56
57 nfa->states = NULL;
58 nfa->slast = NULL;
59 nfa->free = NULL;
60 nfa->nstates = 0;
61 nfa->cm = cm;
62 nfa->v = v;
63 nfa->bos[0] = nfa->bos[1] = COLORLESS;
64 nfa->eos[0] = nfa->eos[1] = COLORLESS;
65 nfa->post = newfstate(nfa, '@'); /* number 0 */
66 nfa->pre = newfstate(nfa, '>'); /* number 1 */
67 nfa->parent = parent;
68
69 nfa->init = newstate(nfa); /* may become invalid later */
70 nfa->final = newstate(nfa);
71 if (ISERR())
72 {
73 freenfa(nfa);
74 return NULL;
75 }
76 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
77 newarc(nfa, '^', 1, nfa->pre, nfa->init);
78 newarc(nfa, '^', 0, nfa->pre, nfa->init);
79 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
80 newarc(nfa, '$', 1, nfa->final, nfa->post);
81 newarc(nfa, '$', 0, nfa->final, nfa->post);
82
83 if (ISERR())
84 {
85 freenfa(nfa);
86 return NULL;
87 }
88 return nfa;
89 }
90
91 /*
92 * freenfa - free an entire NFA
93 */
94 static void
95 freenfa(struct nfa * nfa)
96 {
97 struct state *s;
98
99 while ((s = nfa->states) != NULL)
100 {
101 s->nins = s->nouts = 0; /* don't worry about arcs */
102 freestate(nfa, s);
103 }
104 while ((s = nfa->free) != NULL)
105 {
106 nfa->free = s->next;
107 destroystate(nfa, s);
108 }
109
110 nfa->slast = NULL;
111 nfa->nstates = -1;
112 nfa->pre = NULL;
113 nfa->post = NULL;
114 FREE(nfa);
115 }
116
117 /*
118 * newstate - allocate an NFA state, with zero flag value
119 */
120 static struct state * /* NULL on error */
121 newstate(struct nfa * nfa)
122 {
123 struct state *s;
124
125 if (nfa->free != NULL)
126 {
127 s = nfa->free;
128 nfa->free = s->next;
129 }
130 else
131 {
132 s = (struct state *) MALLOC(sizeof(struct state));
133 if (s == NULL)
134 {
135 NERR(REG_ESPACE);
136 return NULL;
137 }
138 s->oas.next = NULL;
139 s->free = NULL;
140 s->noas = 0;
141 }
142
143 assert(nfa->nstates >= 0);
144 s->no = nfa->nstates++;
145 s->flag = 0;
146 if (nfa->states == NULL)
147 nfa->states = s;
148 s->nins = 0;
149 s->ins = NULL;
150 s->nouts = 0;
151 s->outs = NULL;
152 s->tmp = NULL;
153 s->next = NULL;
154 if (nfa->slast != NULL)
155 {
156 assert(nfa->slast->next == NULL);
157 nfa->slast->next = s;
158 }
159 s->prev = nfa->slast;
160 nfa->slast = s;
161 return s;
162 }
163
164 /*
165 * newfstate - allocate an NFA state with a specified flag value
166 */
167 static struct state * /* NULL on error */
168 newfstate(struct nfa * nfa, int flag)
169 {
170 struct state *s;
171
172 s = newstate(nfa);
173 if (s != NULL)
174 s->flag = (char) flag;
175 return s;
176 }
177
178 /*
179 * dropstate - delete a state's inarcs and outarcs and free it
180 */
181 static void
182 dropstate(struct nfa * nfa,
183 struct state * s)
184 {
185 struct arc *a;
186
187 while ((a = s->ins) != NULL)
188 freearc(nfa, a);
189 while ((a = s->outs) != NULL)
190 freearc(nfa, a);
191 freestate(nfa, s);
192 }
193
194 /*
195 * freestate - free a state, which has no in-arcs or out-arcs
196 */
197 static void
198 freestate(struct nfa * nfa,
199 struct state * s)
200 {
201 assert(s != NULL);
202 assert(s->nins == 0 && s->nouts == 0);
203
204 s->no = FREESTATE;
205 s->flag = 0;
206 if (s->next != NULL)
207 s->next->prev = s->prev;
208 else
209 {
210 assert(s == nfa->slast);
211 nfa->slast = s->prev;
212 }
213 if (s->prev != NULL)
214 s->prev->next = s->next;
215 else
216 {
217 assert(s == nfa->states);
218 nfa->states = s->next;
219 }
220 s->prev = NULL;
221 s->next = nfa->free; /* don't delete it, put it on the free
222 * list */
223 nfa->free = s;
224 }
225
226 /*
227 * destroystate - really get rid of an already-freed state
228 */
229 static void
230 destroystate(struct nfa * nfa,
231 struct state * s)
232 {
233 struct arcbatch *ab;
234 struct arcbatch *abnext;
235
236 assert(s->no == FREESTATE);
237 for (ab = s->oas.next; ab != NULL; ab = abnext)
238 {
239 abnext = ab->next;
240 FREE(ab);
241 }
242 s->ins = NULL;
243 s->outs = NULL;
244 s->next = NULL;
245 FREE(s);
246 }
247
248 /*
249 * newarc - set up a new arc within an NFA
250 */
251 static void
252 newarc(struct nfa * nfa,
253 int t,
254 pcolor co,
255 struct state * from,
256 struct state * to)
257 {
258 struct arc *a;
259
260 assert(from != NULL && to != NULL);
261
262 /* check for duplicates */
263 for (a = from->outs; a != NULL; a = a->outchain)
264 if (a->to == to && a->co == co && a->type == t)
265 return;
266
267 a = allocarc(nfa, from);
268 if (NISERR())
269 return;
270 assert(a != NULL);
271
272 a->type = t;
273 a->co = (color) co;
274 a->to = to;
275 a->from = from;
276
277 /*
278 * Put the new arc on the beginning, not the end, of the chains. Not
279 * only is this easier, it has the very useful side effect that
280 * deleting the most-recently-added arc is the cheapest case rather
281 * than the most expensive one.
282 */
283 a->inchain = to->ins;
284 to->ins = a;
285 a->outchain = from->outs;
286 from->outs = a;
287
288 from->nouts++;
289 to->nins++;
290
291 if (COLORED(a) && nfa->parent == NULL)
292 colorchain(nfa->cm, a);
293
294 return;
295 }
296
297 /*
298 * allocarc - allocate a new out-arc within a state
299 */
300 static struct arc * /* NULL for failure */
301 allocarc(struct nfa * nfa,
302 struct state * s)
303 {
304 struct arc *a;
305 struct arcbatch *new;
306 int i;
307
308 /* shortcut */
309 if (s->free == NULL && s->noas < ABSIZE)
310 {
311 a = &s->oas.a[s->noas];
312 s->noas++;
313 return a;
314 }
315
316 /* if none at hand, get more */
317 if (s->free == NULL)
318 {
319 new = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
320 if (new == NULL)
321 {
322 NERR(REG_ESPACE);
323 return NULL;
324 }
325 new->next = s->oas.next;
326 s->oas.next = new;
327
328 for (i = 0; i < ABSIZE; i++)
329 {
330 new->a[i].type = 0;
331 new->a[i].freechain = &new->a[i + 1];
332 }
333 new->a[ABSIZE - 1].freechain = NULL;
334 s->free = &new->a[0];
335 }
336 assert(s->free != NULL);
337
338 a = s->free;
339 s->free = a->freechain;
340 return a;
341 }
342
343 /*
344 * freearc - free an arc
345 */
346 static void
347 freearc(struct nfa * nfa,
348 struct arc * victim)
349 {
350 struct state *from = victim->from;
351 struct state *to = victim->to;
352 struct arc *a;
353
354 assert(victim->type != 0);
355
356 /* take it off color chain if necessary */
357 if (COLORED(victim) && nfa->parent == NULL)
358 uncolorchain(nfa->cm, victim);
359
360 /* take it off source's out-chain */
361 assert(from != NULL);
362 assert(from->outs != NULL);
363 a = from->outs;
364 if (a == victim) /* simple case: first in chain */
365 from->outs = victim->outchain;
366 else
367 {
368 for (; a != NULL && a->outchain != victim; a = a->outchain)
369 continue;
370 assert(a != NULL);
371 a->outchain = victim->outchain;
372 }
373 from->nouts--;
374
375 /* take it off target's in-chain */
376 assert(to != NULL);
377 assert(to->ins != NULL);
378 a = to->ins;
379 if (a == victim) /* simple case: first in chain */
380 to->ins = victim->inchain;
381 else
382 {
383 for (; a != NULL && a->inchain != victim; a = a->inchain)
384 continue;
385 assert(a != NULL);
386 a->inchain = victim->inchain;
387 }
388 to->nins--;
389
390 /* clean up and place on free list */
391 victim->type = 0;
392 victim->from = NULL; /* precautions... */
393 victim->to = NULL;
394 victim->inchain = NULL;
395 victim->outchain = NULL;
396 victim->freechain = from->free;
397 from->free = victim;
398 }
399
400 /*
401 * findarc - find arc, if any, from given source with given type and color
402 * If there is more than one such arc, the result is random.
403 */
404 static struct arc *
405 findarc(struct state * s,
406 int type,
407 pcolor co)
408 {
409 struct arc *a;
410
411 for (a = s->outs; a != NULL; a = a->outchain)
412 if (a->type == type && a->co == co)
413 return a;
414 return NULL;
415 }
416
417 /*
418 * cparc - allocate a new arc within an NFA, copying details from old one
419 */
420 static void
421 cparc(struct nfa * nfa,
422 struct arc * oa,
423 struct state * from,
424 struct state * to)
425 {
426 newarc(nfa, oa->type, oa->co, from, to);
427 }
428
429 /*
430 * moveins - move all in arcs of a state to another state
431 *
432 * You might think this could be done better by just updating the
433 * existing arcs, and you would be right if it weren't for the desire
434 * for duplicate suppression, which makes it easier to just make new
435 * ones to exploit the suppression built into newarc.
436 */
437 static void
438 moveins(struct nfa * nfa,
439 struct state * old,
440 struct state * new)
441 {
442 struct arc *a;
443
444 assert(old != new);
445
446 while ((a = old->ins) != NULL)
447 {
448 cparc(nfa, a, a->from, new);
449 freearc(nfa, a);
450 }
451 assert(old->nins == 0);
452 assert(old->ins == NULL);
453 }
454
455 /*
456 * copyins - copy all in arcs of a state to another state
457 */
458 static void
459 copyins(struct nfa * nfa,
460 struct state * old,
461 struct state * new)
462 {
463 struct arc *a;
464
465 assert(old != new);
466
467 for (a = old->ins; a != NULL; a = a->inchain)
468 cparc(nfa, a, a->from, new);
469 }
470
471 /*
472 * moveouts - move all out arcs of a state to another state
473 */
474 static void
475 moveouts(struct nfa * nfa,
476 struct state * old,
477 struct state * new)
478 {
479 struct arc *a;
480
481 assert(old != new);
482
483 while ((a = old->outs) != NULL)
484 {
485 cparc(nfa, a, new, a->to);
486 freearc(nfa, a);
487 }
488 }
489
490 /*
491 * copyouts - copy all out arcs of a state to another state
492 */
493 static void
494 copyouts(struct nfa * nfa,
495 struct state * old,
496 struct state * new)
497 {
498 struct arc *a;
499
500 assert(old != new);
501
502 for (a = old->outs; a != NULL; a = a->outchain)
503 cparc(nfa, a, new, a->to);
504 }
505
506 /*
507 * cloneouts - copy out arcs of a state to another state pair, modifying type
508 */
509 static void
510 cloneouts(struct nfa * nfa,
511 struct state * old,
512 struct state * from,
513 struct state * to,
514 int type)
515 {
516 struct arc *a;
517
518 assert(old != from);
519
520 for (a = old->outs; a != NULL; a = a->outchain)
521 newarc(nfa, type, a->co, from, to);
522 }
523
524 /*
525 * delsub - delete a sub-NFA, updating subre pointers if necessary
526 *
527 * This uses a recursive traversal of the sub-NFA, marking already-seen
528 * states using their tmp pointer.
529 */
530 static void
531 delsub(struct nfa * nfa,
532 struct state * lp, /* the sub-NFA goes from here... */
533 struct state * rp) /* ...to here, *not* inclusive */
534 {
535 assert(lp != rp);
536
537 rp->tmp = rp; /* mark end */
538
539 deltraverse(nfa, lp, lp);
540 assert(lp->nouts == 0 && rp->nins == 0); /* did the job */
541 assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
542
543 rp->tmp = NULL; /* unmark end */
544 lp->tmp = NULL; /* and begin, marked by deltraverse */
545 }
546
547 /*
548 * deltraverse - the recursive heart of delsub
549 * This routine's basic job is to destroy all out-arcs of the state.
550 */
551 static void
552 deltraverse(struct nfa * nfa,
553 struct state * leftend,
554 struct state * s)
555 {
556 struct arc *a;
557 struct state *to;
558
559 if (s->nouts == 0)
560 return; /* nothing to do */
561 if (s->tmp != NULL)
562 return; /* already in progress */
563
564 s->tmp = s; /* mark as in progress */
565
566 while ((a = s->outs) != NULL)
567 {
568 to = a->to;
569 deltraverse(nfa, leftend, to);
570 assert(to->nouts == 0 || to->tmp != NULL);
571 freearc(nfa, a);
572 if (to->nins == 0 && to->tmp == NULL)
573 {
574 assert(to->nouts == 0);
575 freestate(nfa, to);
576 }
577 }
578
579 assert(s->no != FREESTATE); /* we're still here */
580 assert(s == leftend || s->nins != 0); /* and still reachable */
581 assert(s->nouts == 0); /* but have no outarcs */
582
583 s->tmp = NULL; /* we're done here */
584 }
585
586 /*
587 * dupnfa - duplicate sub-NFA
588 *
589 * Another recursive traversal, this time using tmp to point to duplicates
590 * as well as mark already-seen states. (You knew there was a reason why
591 * it's a state pointer, didn't you? :-))
592 */
593 static void
594 dupnfa(struct nfa * nfa,
595 struct state * start, /* duplicate of subNFA starting here */
596 struct state * stop, /* and stopping here */
597 struct state * from, /* stringing duplicate from here */
598 struct state * to) /* to here */
599 {
600 if (start == stop)
601 {
602 newarc(nfa, EMPTY, 0, from, to);
603 return;
604 }
605
606 stop->tmp = to;
607 duptraverse(nfa, start, from);
608 /* done, except for clearing out the tmp pointers */
609
610 stop->tmp = NULL;
611 cleartraverse(nfa, start);
612 }
613
614 /*
615 * duptraverse - recursive heart of dupnfa
616 */
617 static void
618 duptraverse(struct nfa * nfa,
619 struct state * s,
620 struct state * stmp) /* s's duplicate, or NULL */
621 {
622 struct arc *a;
623
624 if (s->tmp != NULL)
625 return; /* already done */
626
627 s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
628 if (s->tmp == NULL)
629 {
630 assert(NISERR());
631 return;
632 }
633
634 for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
635 {
636 duptraverse(nfa, a->to, (struct state *) NULL);
637 assert(a->to->tmp != NULL);
638 cparc(nfa, a, s->tmp, a->to->tmp);
639 }
640 }
641
642 /*
643 * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
644 */
645 static void
646 cleartraverse(struct nfa * nfa,
647 struct state * s)
648 {
649 struct arc *a;
650
651 if (s->tmp == NULL)
652 return;
653 s->tmp = NULL;
654
655 for (a = s->outs; a != NULL; a = a->outchain)
656 cleartraverse(nfa, a->to);
657 }
658
659 /*
660 * specialcolors - fill in special colors for an NFA
661 */
662 static void
663 specialcolors(struct nfa * nfa)
664 {
665 /* false colors for BOS, BOL, EOS, EOL */
666 if (nfa->parent == NULL)
667 {
668 nfa->bos[0] = pseudocolor(nfa->cm);
669 nfa->bos[1] = pseudocolor(nfa->cm);
670 nfa->eos[0] = pseudocolor(nfa->cm);
671 nfa->eos[1] = pseudocolor(nfa->cm);
672 }
673 else
674 {
675 assert(nfa->parent->bos[0] != COLORLESS);
676 nfa->bos[0] = nfa->parent->bos[0];
677 assert(nfa->parent->bos[1] != COLORLESS);
678 nfa->bos[1] = nfa->parent->bos[1];
679 assert(nfa->parent->eos[0] != COLORLESS);
680 nfa->eos[0] = nfa->parent->eos[0];
681 assert(nfa->parent->eos[1] != COLORLESS);
682 nfa->eos[1] = nfa->parent->eos[1];
683 }
684 }
685
686 /*
687 * optimize - optimize an NFA
688 */
689 static long /* re_info bits */
690 optimize(struct nfa * nfa,
691 FILE *f) /* for debug output; NULL none */
692 {
693 #ifdef REG_DEBUG
694 int verbose = (f != NULL) ? 1 : 0;
695
696 if (verbose)
697 fprintf(f, "\ninitial cleanup:\n");
698 #endif
699 cleanup(nfa); /* may simplify situation */
700 #ifdef REG_DEBUG
701 if (verbose)
702 dumpnfa(nfa, f);
703 if (verbose)
704 fprintf(f, "\nempties:\n");
705 #endif
706 fixempties(nfa, f); /* get rid of EMPTY arcs */
707 #ifdef REG_DEBUG
708 if (verbose)
709 fprintf(f, "\nconstraints:\n");
710 #endif
711 pullback(nfa, f); /* pull back constraints backward */
712 pushfwd(nfa, f); /* push fwd constraints forward */
713 #ifdef REG_DEBUG
714 if (verbose)
715 fprintf(f, "\nfinal cleanup:\n");
716 #endif
717 cleanup(nfa); /* final tidying */
718 return analyze(nfa); /* and analysis */
719 }
720
721 /*
722 * pullback - pull back constraints backward to (with luck) eliminate them
723 */
724 static void
725 pullback(struct nfa * nfa,
726 FILE *f) /* for debug output; NULL none */
727 {
728 struct state *s;
729 struct state *nexts;
730 struct arc *a;
731 struct arc *nexta;
732 int progress;
733
734 /* find and pull until there are no more */
735 do
736 {
737 progress = 0;
738 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
739 {
740 nexts = s->next;
741 for (a = s->outs; a != NULL && !NISERR(); a = nexta)
742 {
743 nexta = a->outchain;
744 if (a->type == '^' || a->type == BEHIND)
745 if (pull(nfa, a))
746 progress = 1;
747 assert(nexta == NULL || s->no != FREESTATE);
748 }
749 }
750 if (progress && f != NULL)
751 dumpnfa(nfa, f);
752 } while (progress && !NISERR());
753 if (NISERR())
754 return;
755
756 for (a = nfa->pre->outs; a != NULL; a = nexta)
757 {
758 nexta = a->outchain;
759 if (a->type == '^')
760 {
761 assert(a->co == 0 || a->co == 1);
762 newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
763 freearc(nfa, a);
764 }
765 }
766 }
767
768 /*
769 * pull - pull a back constraint backward past its source state
770 * A significant property of this function is that it deletes at most
771 * one state -- the constraint's from state -- and only if the constraint
772 * was that state's last outarc.
773 */
774 static int /* 0 couldn't, 1 could */
775 pull(struct nfa * nfa,
776 struct arc * con)
777 {
778 struct state *from = con->from;
779 struct state *to = con->to;
780 struct arc *a;
781 struct arc *nexta;
782 struct state *s;
783
784 if (from == to)
785 { /* circular constraint is pointless */
786 freearc(nfa, con);
787 return 1;
788 }
789 if (from->flag) /* can't pull back beyond start */
790 return 0;
791 if (from->nins == 0)
792 { /* unreachable */
793 freearc(nfa, con);
794 return 1;
795 }
796
797 /* first, clone from state if necessary to avoid other outarcs */
798 if (from->nouts > 1)
799 {
800 s = newstate(nfa);
801 if (NISERR())
802 return 0;
803 assert(to != from); /* con is not an inarc */
804 copyins(nfa, from, s); /* duplicate inarcs */
805 cparc(nfa, con, s, to); /* move constraint arc */
806 freearc(nfa, con);
807 from = s;
808 con = from->outs;
809 }
810 assert(from->nouts == 1);
811
812 /* propagate the constraint into the from state's inarcs */
813 for (a = from->ins; a != NULL; a = nexta)
814 {
815 nexta = a->inchain;
816 switch (combine(con, a))
817 {
818 case INCOMPATIBLE: /* destroy the arc */
819 freearc(nfa, a);
820 break;
821 case SATISFIED: /* no action needed */
822 break;
823 case COMPATIBLE: /* swap the two arcs, more or less */
824 s = newstate(nfa);
825 if (NISERR())
826 return 0;
827 cparc(nfa, a, s, to); /* anticipate move */
828 cparc(nfa, con, a->from, s);
829 if (NISERR())
830 return 0;
831 freearc(nfa, a);
832 break;
833 default:
834 assert(NOTREACHED);
835 break;
836 }
837 }
838
839 /* remaining inarcs, if any, incorporate the constraint */
840 moveins(nfa, from, to);
841 dropstate(nfa, from); /* will free the constraint */
842 return 1;
843 }
844
845 /*
846 * pushfwd - push forward constraints forward to (with luck) eliminate them
847 */
848 static void
849 pushfwd(struct nfa * nfa,
850 FILE *f) /* for debug output; NULL none */
851 {
852 struct state *s;
853 struct state *nexts;
854 struct arc *a;
855 struct arc *nexta;
856 int progress;
857
858 /* find and push until there are no more */
859 do
860 {
861 progress = 0;
862 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
863 {
864 nexts = s->next;
865 for (a = s->ins; a != NULL && !NISERR(); a = nexta)
866 {
867 nexta = a->inchain;
868 if (a->type == '$' || a->type == AHEAD)
869 if (push(nfa, a))
870 progress = 1;
871 assert(nexta == NULL || s->no != FREESTATE);
872 }
873 }
874 if (progress && f != NULL)
875 dumpnfa(nfa, f);
876 } while (progress && !NISERR());
877 if (NISERR())
878 return;
879
880 for (a = nfa->post->ins; a != NULL; a = nexta)
881 {
882 nexta = a->inchain;
883 if (a->type == '$')
884 {
885 assert(a->co == 0 || a->co == 1);
886 newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
887 freearc(nfa, a);
888 }
889 }
890 }
891
892 /*
893 * push - push a forward constraint forward past its destination state
894 * A significant property of this function is that it deletes at most
895 * one state -- the constraint's to state -- and only if the constraint
896 * was that state's last inarc.
897 */
898 static int /* 0 couldn't, 1 could */
899 push(struct nfa * nfa,
900 struct arc * con)
901 {
902 struct state *from = con->from;
903 struct state *to = con->to;
904 struct arc *a;
905 struct arc *nexta;
906 struct state *s;
907
908 if (to == from)
909 { /* circular constraint is pointless */
910 freearc(nfa, con);
911 return 1;
912 }
913 if (to->flag) /* can't push forward beyond end */
914 return 0;
915 if (to->nouts == 0)
916 { /* dead end */
917 freearc(nfa, con);
918 return 1;
919 }
920
921 /* first, clone to state if necessary to avoid other inarcs */
922 if (to->nins > 1)
923 {
924 s = newstate(nfa);
925 if (NISERR())
926 return 0;
927 copyouts(nfa, to, s); /* duplicate outarcs */
928 cparc(nfa, con, from, s); /* move constraint */
929 freearc(nfa, con);
930 to = s;
931 con = to->ins;
932 }
933 assert(to->nins == 1);
934
935 /* propagate the constraint into the to state's outarcs */
936 for (a = to->outs; a != NULL; a = nexta)
937 {
938 nexta = a->outchain;
939 switch (combine(con, a))
940 {
941 case INCOMPATIBLE: /* destroy the arc */
942 freearc(nfa, a);
943 break;
944 case SATISFIED: /* no action needed */
945 break;
946 case COMPATIBLE: /* swap the two arcs, more or less */
947 s = newstate(nfa);
948 if (NISERR())
949 return 0;
950 cparc(nfa, con, s, a->to); /* anticipate move */
951 cparc(nfa, a, from, s);
952 if (NISERR())
953 return 0;
954 freearc(nfa, a);
955 break;
956 default:
957 assert(NOTREACHED);
958 break;
959 }
960 }
961
962 /* remaining outarcs, if any, incorporate the constraint */
963 moveouts(nfa, to, from);
964 dropstate(nfa, to); /* will free the constraint */
965 return 1;
966 }
967
968 /*
969 * combine - constraint lands on an arc, what happens?
970 *
971 * #def INCOMPATIBLE 1 // destroys arc
972 * #def SATISFIED 2 // constraint satisfied
973 * #def COMPATIBLE 3 // compatible but not satisfied yet
974 */
975 static int
976 combine(struct arc * con,
977 struct arc * a)
978 {
979 #define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
980
981 switch (CA(con->type, a->type))
982 {
983 case CA('^', PLAIN): /* newlines are handled separately */
984 case CA('$', PLAIN):
985 return INCOMPATIBLE;
986 break;
987 case CA(AHEAD, PLAIN): /* color constraints meet colors */
988 case CA(BEHIND, PLAIN):
989 if (con->co == a->co)
990 return SATISFIED;
991 return INCOMPATIBLE;
992 break;
993 case CA('^', '^'): /* collision, similar constraints */
994 case CA('$', '$'):
995 case CA(AHEAD, AHEAD):
996 case CA(BEHIND, BEHIND):
997 if (con->co == a->co) /* true duplication */
998 return SATISFIED;
999 return INCOMPATIBLE;
1000 break;
1001 case CA('^', BEHIND): /* collision, dissimilar constraints */
1002 case CA(BEHIND, '^'):
1003 case CA('$', AHEAD):
1004 case CA(AHEAD, '$'):
1005 return INCOMPATIBLE;
1006 break;
1007 case CA('^', '$'): /* constraints passing each other */
1008 case CA('^', AHEAD):
1009 case CA(BEHIND, '$'):
1010 case CA(BEHIND, AHEAD):
1011 case CA('$', '^'):
1012 case CA('$', BEHIND):
1013 case CA(AHEAD, '^'):
1014 case CA(AHEAD, BEHIND):
1015 case CA('^', LACON):
1016 case CA(BEHIND, LACON):
1017 case CA('$', LACON):
1018 case CA(AHEAD, LACON):
1019 return COMPATIBLE;
1020 break;
1021 }
1022 assert(NOTREACHED);
1023 return INCOMPATIBLE; /* for benefit of blind compilers */
1024 }
1025
1026 /*
1027 * fixempties - get rid of EMPTY arcs
1028 */
1029 static void
1030 fixempties(struct nfa * nfa,
1031 FILE *f) /* for debug output; NULL none */
1032 {
1033 struct state *s;
1034 struct state *nexts;
1035 struct arc *a;
1036 struct arc *nexta;
1037 int progress;
1038
1039 /* find and eliminate empties until there are no more */
1040 do
1041 {
1042 progress = 0;
1043 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1044 {
1045 nexts = s->next;
1046 for (a = s->outs; a != NULL && !NISERR(); a = nexta)
1047 {
1048 nexta = a->outchain;
1049 if (a->type == EMPTY && unempty(nfa, a))
1050 progress = 1;
1051 assert(nexta == NULL || s->no != FREESTATE);
1052 }
1053 }
1054 if (progress && f != NULL)
1055 dumpnfa(nfa, f);
1056 } while (progress && !NISERR());
1057 }
1058
1059 /*
1060 * unempty - optimize out an EMPTY arc, if possible
1061 *
1062 * Actually, as it stands this function always succeeds, but the return
1063 * value is kept with an eye on possible future changes.
1064 */
1065 static int /* 0 couldn't, 1 could */
1066 unempty(struct nfa * nfa,
1067 struct arc * a)
1068 {
1069 struct state *from = a->from;
1070 struct state *to = a->to;
1071 int usefrom; /* work on from, as opposed to to? */
1072
1073 assert(a->type == EMPTY);
1074 assert(from != nfa->pre && to != nfa->post);
1075
1076 if (from == to)
1077 { /* vacuous loop */
1078 freearc(nfa, a);
1079 return 1;
1080 }
1081
1082 /* decide which end to work on */
1083 usefrom = 1; /* default: attack from */
1084 if (from->nouts > to->nins)
1085 usefrom = 0;
1086 else if (from->nouts == to->nins)
1087 {
1088 /* decide on secondary issue: move/copy fewest arcs */
1089 if (from->nins > to->nouts)
1090 usefrom = 0;
1091 }
1092
1093 freearc(nfa, a);
1094 if (usefrom)
1095 {
1096 if (from->nouts == 0)
1097 {
1098 /* was the state's only outarc */
1099 moveins(nfa, from, to);
1100 freestate(nfa, from);
1101 }
1102 else
1103 copyins(nfa, from, to);
1104 }
1105 else
1106 {
1107 if (to->nins == 0)
1108 {
1109 /* was the state's only inarc */
1110 moveouts(nfa, to, from);
1111 freestate(nfa, to);
1112 }
1113 else
1114 copyouts(nfa, to, from);
1115 }
1116
1117 return 1;
1118 }
1119
1120 /*
1121 * cleanup - clean up NFA after optimizations
1122 */
1123 static void
1124 cleanup(struct nfa * nfa)
1125 {
1126 struct state *s;
1127 struct state *nexts;
1128 int n;
1129
1130 /* clear out unreachable or dead-end states */
1131 /* use pre to mark reachable, then post to mark can-reach-post */
1132 markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
1133 markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
1134 for (s = nfa->states; s != NULL; s = nexts)
1135 {
1136 nexts = s->next;
1137 if (s->tmp != nfa->post && !s->flag)
1138 dropstate(nfa, s);
1139 }
1140 assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
1141 cleartraverse(nfa, nfa->pre);
1142 assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
1143 /* the nins==0 (final unreachable) case will be caught later */
1144
1145 /* renumber surviving states */
1146 n = 0;
1147 for (s = nfa->states; s != NULL; s = s->next)
1148 s->no = n++;
1149 nfa->nstates = n;
1150 }
1151
1152 /*
1153 * markreachable - recursive marking of reachable states
1154 */
1155 static void
1156 markreachable(struct nfa * nfa,
1157 struct state * s,
1158 struct state * okay, /* consider only states with this
1159 * mark */
1160 struct state * mark) /* the value to mark with */
1161 {
1162 struct arc *a;
1163
1164 if (s->tmp != okay)
1165 return;
1166 s->tmp = mark;
1167
1168 for (a = s->outs; a != NULL; a = a->outchain)
1169 markreachable(nfa, a->to, okay, mark);
1170 }
1171
1172 /*
1173 * markcanreach - recursive marking of states which can reach here
1174 */
1175 static void
1176 markcanreach(struct nfa * nfa,
1177 struct state * s,
1178 struct state * okay, /* consider only states with this
1179 * mark */
1180 struct state * mark) /* the value to mark with */
1181 {
1182 struct arc *a;
1183
1184 if (s->tmp != okay)
1185 return;
1186 s->tmp = mark;
1187
1188 for (a = s->ins; a != NULL; a = a->inchain)
1189 markcanreach(nfa, a->from, okay, mark);
1190 }
1191
1192 /*
1193 * analyze - ascertain potentially-useful facts about an optimized NFA
1194 */
1195 static long /* re_info bits to be ORed in */
1196 analyze(struct nfa * nfa)
1197 {
1198 struct arc *a;
1199 struct arc *aa;
1200
1201 if (nfa->pre->outs == NULL)
1202 return REG_UIMPOSSIBLE;
1203 for (a = nfa->pre->outs; a != NULL; a = a->outchain)
1204 for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
1205 if (aa->to == nfa->post)
1206 return REG_UEMPTYMATCH;
1207 return 0;
1208 }
1209
1210 /*
1211 * compact - compact an NFA
1212 */
1213 static void
1214 compact(struct nfa * nfa,
1215 struct cnfa * cnfa)
1216 {
1217 struct state *s;
1218 struct arc *a;
1219 size_t nstates;
1220 size_t narcs;
1221 struct carc *ca;
1222 struct carc *first;
1223
1224 assert(!NISERR());
1225
1226 nstates = 0;
1227 narcs = 0;
1228 for (s = nfa->states; s != NULL; s = s->next)
1229 {
1230 nstates++;
1231 narcs += 1 + s->nouts + 1;
1232 /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
1233 }
1234
1235 cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
1236 cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
1237 if (cnfa->states == NULL || cnfa->arcs == NULL)
1238 {
1239 if (cnfa->states != NULL)
1240 FREE(cnfa->states);
1241 if (cnfa->arcs != NULL)
1242 FREE(cnfa->arcs);
1243 NERR(REG_ESPACE);
1244 return;
1245 }
1246 cnfa->nstates = nstates;
1247 cnfa->pre = nfa->pre->no;
1248 cnfa->post = nfa->post->no;
1249 cnfa->bos[0] = nfa->bos[0];
1250 cnfa->bos[1] = nfa->bos[1];
1251 cnfa->eos[0] = nfa->eos[0];
1252 cnfa->eos[1] = nfa->eos[1];
1253 cnfa->ncolors = maxcolor(nfa->cm) + 1;
1254 cnfa->flags = 0;
1255
1256 ca = cnfa->arcs;
1257 for (s = nfa->states; s != NULL; s = s->next)
1258 {
1259 assert((size_t) s->no < nstates);
1260 cnfa->states[s->no] = ca;
1261 ca->co = 0; /* clear and skip flags "arc" */
1262 ca++;
1263 first = ca;
1264 for (a = s->outs; a != NULL; a = a->outchain)
1265 switch (a->type)
1266 {
1267 case PLAIN:
1268 ca->co = a->co;
1269 ca->to = a->to->no;
1270 ca++;
1271 break;
1272 case LACON:
1273 assert(s->no != cnfa->pre);
1274 ca->co = (color) (cnfa->ncolors + a->co);
1275 ca->to = a->to->no;
1276 ca++;
1277 cnfa->flags |= HASLACONS;
1278 break;
1279 default:
1280 assert(NOTREACHED);
1281 break;
1282 }
1283 carcsort(first, ca - 1);
1284 ca->co = COLORLESS;
1285 ca->to = 0;
1286 ca++;
1287 }
1288 assert(ca == &cnfa->arcs[narcs]);
1289 assert(cnfa->nstates != 0);
1290
1291 /* mark no-progress states */
1292 for (a = nfa->pre->outs; a != NULL; a = a->outchain)
1293 cnfa->states[a->to->no]->co = 1;
1294 cnfa->states[nfa->pre->no]->co = 1;
1295 }
1296
1297 /*
1298 * carcsort - sort compacted-NFA arcs by color
1299 *
1300 * Really dumb algorithm, but if the list is long enough for that to matter,
1301 * you're in real trouble anyway.
1302 */
1303 static void
1304 carcsort(struct carc * first,
1305 struct carc * last)
1306 {
1307 struct carc *p;
1308 struct carc *q;
1309 struct carc tmp;
1310
1311 if (last - first <= 1)
1312 return;
1313
1314 for (p = first; p <= last; p++)
1315 for (q = p; q <= last; q++)
1316 if (p->co > q->co ||
1317 (p->co == q->co && p->to > q->to))
1318 {
1319 assert(p != q);
1320 tmp = *p;
1321 *p = *q;
1322 *q = tmp;
1323 }
1324 }
1325
1326 /*
1327 * freecnfa - free a compacted NFA
1328 */
1329 static void
1330 freecnfa(struct cnfa * cnfa)
1331 {
1332 assert(cnfa->nstates != 0); /* not empty already */
1333 cnfa->nstates = 0;
1334 FREE(cnfa->states);
1335 FREE(cnfa->arcs);
1336 }
1337
1338 /*
1339 * dumpnfa - dump an NFA in human-readable form
1340 */
1341 static void
1342 dumpnfa(struct nfa * nfa,
1343 FILE *f)
1344 {
1345 #ifdef REG_DEBUG
1346 struct state *s;
1347
1348 fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
1349 if (nfa->bos[0] != COLORLESS)
1350 fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
1351 if (nfa->bos[1] != COLORLESS)
1352 fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
1353 if (nfa->eos[0] != COLORLESS)
1354 fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
1355 if (nfa->eos[1] != COLORLESS)
1356 fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
1357 fprintf(f, "\n");
1358 for (s = nfa->states; s != NULL; s = s->next)
1359 dumpstate(s, f);
1360 if (nfa->parent == NULL)
1361 dumpcolors(nfa->cm, f);
1362 fflush(f);
1363 #endif
1364 }
1365
1366 #ifdef REG_DEBUG /* subordinates of dumpnfa */
1367
1368 /*
1369 * dumpstate - dump an NFA state in human-readable form
1370 */
1371 static void
1372 dumpstate(struct state * s,
1373 FILE *f)
1374 {
1375 struct arc *a;
1376
1377 fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
1378 (s->flag) ? s->flag : '.');
1379 if (s->prev != NULL && s->prev->next != s)
1380 fprintf(f, "\tstate chain bad\n");
1381 if (s->nouts == 0)
1382 fprintf(f, "\tno out arcs\n");
1383 else
1384 dumparcs(s, f);
1385 fflush(f);
1386 for (a = s->ins; a != NULL; a = a->inchain)
1387 {
1388 if (a->to != s)
1389 fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
1390 a->from->no, a->to->no, s->no);
1391 }
1392 }
1393
1394 /*
1395 * dumparcs - dump out-arcs in human-readable form
1396 */
1397 static void
1398 dumparcs(struct state * s,
1399 FILE *f)
1400 {
1401 int pos;
1402
1403 assert(s->nouts > 0);
1404 /* printing arcs in reverse order is usually clearer */
1405 pos = dumprarcs(s->outs, s, f, 1);
1406 if (pos != 1)
1407 fprintf(f, "\n");
1408 }
1409
1410 /*
1411 * dumprarcs - dump remaining outarcs, recursively, in reverse order
1412 */
1413 static int /* resulting print position */
1414 dumprarcs(struct arc * a,
1415 struct state * s,
1416 FILE *f,
1417 int pos) /* initial print position */
1418 {
1419 if (a->outchain != NULL)
1420 pos = dumprarcs(a->outchain, s, f, pos);
1421 dumparc(a, s, f);
1422 if (pos == 5)
1423 {
1424 fprintf(f, "\n");
1425 pos = 1;
1426 }
1427 else
1428 pos++;
1429 return pos;
1430 }
1431
1432 /*
1433 * dumparc - dump one outarc in readable form, including prefixing tab
1434 */
1435 static void
1436 dumparc(struct arc * a,
1437 struct state * s,
1438 FILE *f)
1439 {
1440 struct arc *aa;
1441 struct arcbatch *ab;
1442
1443 fprintf(f, "\t");
1444 switch (a->type)
1445 {
1446 case PLAIN:
1447 fprintf(f, "[%ld]", (long) a->co);
1448 break;
1449 case AHEAD:
1450 fprintf(f, ">%ld>", (long) a->co);
1451 break;
1452 case BEHIND:
1453 fprintf(f, "<%ld<", (long) a->co);
1454 break;
1455 case LACON:
1456 fprintf(f, ":%ld:", (long) a->co);
1457 break;
1458 case '^':
1459 case '$':
1460 fprintf(f, "%c%d", a->type, (int) a->co);
1461 break;
1462 case EMPTY:
1463 break;
1464 default:
1465 fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
1466 break;
1467 }
1468 if (a->from != s)
1469 fprintf(f, "?%d?", a->from->no);
1470 for (ab = &a->from->oas; ab != NULL; ab = ab->next)
1471 {
1472 for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
1473 if (aa == a)
1474 break; /* NOTE BREAK OUT */
1475 if (aa < &ab->a[ABSIZE]) /* propagate break */
1476 break; /* NOTE BREAK OUT */
1477 }
1478 if (ab == NULL)
1479 fprintf(f, "?!?"); /* not in allocated space */
1480 fprintf(f, "->");
1481 if (a->to == NULL)
1482 {
1483 fprintf(f, "NULL");
1484 return;
1485 }
1486 fprintf(f, "%d", a->to->no);
1487 for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
1488 if (aa == a)
1489 break; /* NOTE BREAK OUT */
1490 if (aa == NULL)
1491 fprintf(f, "?!?"); /* missing from in-chain */
1492 }
1493 #endif /* REG_DEBUG */
1494
1495 /*
1496 * dumpcnfa - dump a compacted NFA in human-readable form
1497 */
1498 #ifdef REG_DEBUG
1499 static void
1500 dumpcnfa(struct cnfa * cnfa,
1501 FILE *f)
1502 {
1503 int st;
1504
1505 fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
1506 if (cnfa->bos[0] != COLORLESS)
1507 fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
1508 if (cnfa->bos[1] != COLORLESS)
1509 fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
1510 if (cnfa->eos[0] != COLORLESS)
1511 fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
1512 if (cnfa->eos[1] != COLORLESS)
1513 fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
1514 if (cnfa->flags & HASLACONS)
1515 fprintf(f, ", haslacons");
1516 fprintf(f, "\n");
1517 for (st = 0; st < cnfa->nstates; st++)
1518 dumpcstate(st, cnfa->states[st], cnfa, f);
1519 fflush(f);
1520 }
1521 #endif
1522
1523 #ifdef REG_DEBUG /* subordinates of dumpcnfa */
1524
1525 /*
1526 * dumpcstate - dump a compacted-NFA state in human-readable form
1527 */
1528 static void
1529 dumpcstate(int st,
1530 struct carc * ca,
1531 struct cnfa * cnfa,
1532 FILE *f)
1533 {
1534 int i;
1535 int pos;
1536
1537 fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
1538 pos = 1;
1539 for (i = 1; ca[i].co != COLORLESS; i++)
1540 {
1541 if (ca[i].co < cnfa->ncolors)
1542 fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
1543 else
1544 fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
1545 ca[i].to);
1546 if (pos == 5)
1547 {
1548 fprintf(f, "\n");
1549 pos = 1;
1550 }
1551 else
1552 pos++;
1553 }
1554 if (i == 1 || pos != 1)
1555 fprintf(f, "\n");
1556 fflush(f);
1557 }
1558
1559 #endif /* REG_DEBUG */