3 * This file is #included by regcomp.c.
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * One or two things that technically ought to be in here
35 * are actually in color.c, thanks to some incestuous relationships in
39 #define NISERR() VISERR(nfa->v)
40 #define NERR(e) VERR(nfa->v, (e))
44 * newnfa - set up an NFA
46 static struct nfa
* /* the NFA, or NULL */
47 newnfa(struct vars
* v
,
49 struct nfa
* parent
) /* NULL if primary NFA */
53 nfa
= (struct nfa
*) MALLOC(sizeof(struct nfa
));
63 nfa
->bos
[0] = nfa
->bos
[1] = COLORLESS
;
64 nfa
->eos
[0] = nfa
->eos
[1] = COLORLESS
;
65 nfa
->post
= newfstate(nfa
, '@'); /* number 0 */
66 nfa
->pre
= newfstate(nfa
, '>'); /* number 1 */
69 nfa
->init
= newstate(nfa
); /* may become invalid later */
70 nfa
->final
= newstate(nfa
);
76 rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->pre
, nfa
->init
);
77 newarc(nfa
, '^', 1, nfa
->pre
, nfa
->init
);
78 newarc(nfa
, '^', 0, nfa
->pre
, nfa
->init
);
79 rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->final
, nfa
->post
);
80 newarc(nfa
, '$', 1, nfa
->final
, nfa
->post
);
81 newarc(nfa
, '$', 0, nfa
->final
, nfa
->post
);
92 * freenfa - free an entire NFA
95 freenfa(struct nfa
* nfa
)
99 while ((s
= nfa
->states
) != NULL
)
101 s
->nins
= s
->nouts
= 0; /* don't worry about arcs */
104 while ((s
= nfa
->free
) != NULL
)
107 destroystate(nfa
, s
);
118 * newstate - allocate an NFA state, with zero flag value
120 static struct state
* /* NULL on error */
121 newstate(struct nfa
* nfa
)
125 if (nfa
->free
!= NULL
)
132 s
= (struct state
*) MALLOC(sizeof(struct state
));
143 assert(nfa
->nstates
>= 0);
144 s
->no
= nfa
->nstates
++;
146 if (nfa
->states
== NULL
)
154 if (nfa
->slast
!= NULL
)
156 assert(nfa
->slast
->next
== NULL
);
157 nfa
->slast
->next
= s
;
159 s
->prev
= nfa
->slast
;
165 * newfstate - allocate an NFA state with a specified flag value
167 static struct state
* /* NULL on error */
168 newfstate(struct nfa
* nfa
, int flag
)
174 s
->flag
= (char) flag
;
179 * dropstate - delete a state's inarcs and outarcs and free it
182 dropstate(struct nfa
* nfa
,
187 while ((a
= s
->ins
) != NULL
)
189 while ((a
= s
->outs
) != NULL
)
195 * freestate - free a state, which has no in-arcs or out-arcs
198 freestate(struct nfa
* nfa
,
202 assert(s
->nins
== 0 && s
->nouts
== 0);
207 s
->next
->prev
= s
->prev
;
210 assert(s
== nfa
->slast
);
211 nfa
->slast
= s
->prev
;
214 s
->prev
->next
= s
->next
;
217 assert(s
== nfa
->states
);
218 nfa
->states
= s
->next
;
221 s
->next
= nfa
->free
; /* don't delete it, put it on the free
227 * destroystate - really get rid of an already-freed state
230 destroystate(struct nfa
* nfa
,
234 struct arcbatch
*abnext
;
236 assert(s
->no
== FREESTATE
);
237 for (ab
= s
->oas
.next
; ab
!= NULL
; ab
= abnext
)
249 * newarc - set up a new arc within an NFA
252 newarc(struct nfa
* nfa
,
260 assert(from
!= NULL
&& to
!= NULL
);
262 /* check for duplicates */
263 for (a
= from
->outs
; a
!= NULL
; a
= a
->outchain
)
264 if (a
->to
== to
&& a
->co
== co
&& a
->type
== t
)
267 a
= allocarc(nfa
, from
);
278 * Put the new arc on the beginning, not the end, of the chains. Not
279 * only is this easier, it has the very useful side effect that
280 * deleting the most-recently-added arc is the cheapest case rather
281 * than the most expensive one.
283 a
->inchain
= to
->ins
;
285 a
->outchain
= from
->outs
;
291 if (COLORED(a
) && nfa
->parent
== NULL
)
292 colorchain(nfa
->cm
, a
);
298 * allocarc - allocate a new out-arc within a state
300 static struct arc
* /* NULL for failure */
301 allocarc(struct nfa
* nfa
,
305 struct arcbatch
*new;
309 if (s
->free
== NULL
&& s
->noas
< ABSIZE
)
311 a
= &s
->oas
.a
[s
->noas
];
316 /* if none at hand, get more */
319 new = (struct arcbatch
*) MALLOC(sizeof(struct arcbatch
));
325 new->next
= s
->oas
.next
;
328 for (i
= 0; i
< ABSIZE
; i
++)
331 new->a
[i
].freechain
= &new->a
[i
+ 1];
333 new->a
[ABSIZE
- 1].freechain
= NULL
;
334 s
->free
= &new->a
[0];
336 assert(s
->free
!= NULL
);
339 s
->free
= a
->freechain
;
344 * freearc - free an arc
347 freearc(struct nfa
* nfa
,
350 struct state
*from
= victim
->from
;
351 struct state
*to
= victim
->to
;
354 assert(victim
->type
!= 0);
356 /* take it off color chain if necessary */
357 if (COLORED(victim
) && nfa
->parent
== NULL
)
358 uncolorchain(nfa
->cm
, victim
);
360 /* take it off source's out-chain */
361 assert(from
!= NULL
);
362 assert(from
->outs
!= NULL
);
364 if (a
== victim
) /* simple case: first in chain */
365 from
->outs
= victim
->outchain
;
368 for (; a
!= NULL
&& a
->outchain
!= victim
; a
= a
->outchain
)
371 a
->outchain
= victim
->outchain
;
375 /* take it off target's in-chain */
377 assert(to
->ins
!= NULL
);
379 if (a
== victim
) /* simple case: first in chain */
380 to
->ins
= victim
->inchain
;
383 for (; a
!= NULL
&& a
->inchain
!= victim
; a
= a
->inchain
)
386 a
->inchain
= victim
->inchain
;
390 /* clean up and place on free list */
392 victim
->from
= NULL
; /* precautions... */
394 victim
->inchain
= NULL
;
395 victim
->outchain
= NULL
;
396 victim
->freechain
= from
->free
;
401 * findarc - find arc, if any, from given source with given type and color
402 * If there is more than one such arc, the result is random.
405 findarc(struct state
* s
,
411 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
412 if (a
->type
== type
&& a
->co
== co
)
418 * cparc - allocate a new arc within an NFA, copying details from old one
421 cparc(struct nfa
* nfa
,
426 newarc(nfa
, oa
->type
, oa
->co
, from
, to
);
430 * moveins - move all in arcs of a state to another state
432 * You might think this could be done better by just updating the
433 * existing arcs, and you would be right if it weren't for the desire
434 * for duplicate suppression, which makes it easier to just make new
435 * ones to exploit the suppression built into newarc.
438 moveins(struct nfa
* nfa
,
446 while ((a
= old
->ins
) != NULL
)
448 cparc(nfa
, a
, a
->from
, new);
451 assert(old
->nins
== 0);
452 assert(old
->ins
== NULL
);
456 * copyins - copy all in arcs of a state to another state
459 copyins(struct nfa
* nfa
,
467 for (a
= old
->ins
; a
!= NULL
; a
= a
->inchain
)
468 cparc(nfa
, a
, a
->from
, new);
472 * moveouts - move all out arcs of a state to another state
475 moveouts(struct nfa
* nfa
,
483 while ((a
= old
->outs
) != NULL
)
485 cparc(nfa
, a
, new, a
->to
);
491 * copyouts - copy all out arcs of a state to another state
494 copyouts(struct nfa
* nfa
,
502 for (a
= old
->outs
; a
!= NULL
; a
= a
->outchain
)
503 cparc(nfa
, a
, new, a
->to
);
507 * cloneouts - copy out arcs of a state to another state pair, modifying type
510 cloneouts(struct nfa
* nfa
,
520 for (a
= old
->outs
; a
!= NULL
; a
= a
->outchain
)
521 newarc(nfa
, type
, a
->co
, from
, to
);
525 * delsub - delete a sub-NFA, updating subre pointers if necessary
527 * This uses a recursive traversal of the sub-NFA, marking already-seen
528 * states using their tmp pointer.
531 delsub(struct nfa
* nfa
,
532 struct state
* lp
, /* the sub-NFA goes from here... */
533 struct state
* rp
) /* ...to here, *not* inclusive */
537 rp
->tmp
= rp
; /* mark end */
539 deltraverse(nfa
, lp
, lp
);
540 assert(lp
->nouts
== 0 && rp
->nins
== 0); /* did the job */
541 assert(lp
->no
!= FREESTATE
&& rp
->no
!= FREESTATE
); /* no more */
543 rp
->tmp
= NULL
; /* unmark end */
544 lp
->tmp
= NULL
; /* and begin, marked by deltraverse */
548 * deltraverse - the recursive heart of delsub
549 * This routine's basic job is to destroy all out-arcs of the state.
552 deltraverse(struct nfa
* nfa
,
553 struct state
* leftend
,
560 return; /* nothing to do */
562 return; /* already in progress */
564 s
->tmp
= s
; /* mark as in progress */
566 while ((a
= s
->outs
) != NULL
)
569 deltraverse(nfa
, leftend
, to
);
570 assert(to
->nouts
== 0 || to
->tmp
!= NULL
);
572 if (to
->nins
== 0 && to
->tmp
== NULL
)
574 assert(to
->nouts
== 0);
579 assert(s
->no
!= FREESTATE
); /* we're still here */
580 assert(s
== leftend
|| s
->nins
!= 0); /* and still reachable */
581 assert(s
->nouts
== 0); /* but have no outarcs */
583 s
->tmp
= NULL
; /* we're done here */
587 * dupnfa - duplicate sub-NFA
589 * Another recursive traversal, this time using tmp to point to duplicates
590 * as well as mark already-seen states. (You knew there was a reason why
591 * it's a state pointer, didn't you? :-))
594 dupnfa(struct nfa
* nfa
,
595 struct state
* start
, /* duplicate of subNFA starting here */
596 struct state
* stop
, /* and stopping here */
597 struct state
* from
, /* stringing duplicate from here */
598 struct state
* to
) /* to here */
602 newarc(nfa
, EMPTY
, 0, from
, to
);
607 duptraverse(nfa
, start
, from
);
608 /* done, except for clearing out the tmp pointers */
611 cleartraverse(nfa
, start
);
615 * duptraverse - recursive heart of dupnfa
618 duptraverse(struct nfa
* nfa
,
620 struct state
* stmp
) /* s's duplicate, or NULL */
625 return; /* already done */
627 s
->tmp
= (stmp
== NULL
) ? newstate(nfa
) : stmp
;
634 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= a
->outchain
)
636 duptraverse(nfa
, a
->to
, (struct state
*) NULL
);
637 assert(a
->to
->tmp
!= NULL
);
638 cparc(nfa
, a
, s
->tmp
, a
->to
->tmp
);
643 * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
646 cleartraverse(struct nfa
* nfa
,
655 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
656 cleartraverse(nfa
, a
->to
);
660 * specialcolors - fill in special colors for an NFA
663 specialcolors(struct nfa
* nfa
)
665 /* false colors for BOS, BOL, EOS, EOL */
666 if (nfa
->parent
== NULL
)
668 nfa
->bos
[0] = pseudocolor(nfa
->cm
);
669 nfa
->bos
[1] = pseudocolor(nfa
->cm
);
670 nfa
->eos
[0] = pseudocolor(nfa
->cm
);
671 nfa
->eos
[1] = pseudocolor(nfa
->cm
);
675 assert(nfa
->parent
->bos
[0] != COLORLESS
);
676 nfa
->bos
[0] = nfa
->parent
->bos
[0];
677 assert(nfa
->parent
->bos
[1] != COLORLESS
);
678 nfa
->bos
[1] = nfa
->parent
->bos
[1];
679 assert(nfa
->parent
->eos
[0] != COLORLESS
);
680 nfa
->eos
[0] = nfa
->parent
->eos
[0];
681 assert(nfa
->parent
->eos
[1] != COLORLESS
);
682 nfa
->eos
[1] = nfa
->parent
->eos
[1];
687 * optimize - optimize an NFA
689 static long /* re_info bits */
690 optimize(struct nfa
* nfa
,
691 FILE *f
) /* for debug output; NULL none */
694 int verbose
= (f
!= NULL
) ? 1 : 0;
697 fprintf(f
, "\ninitial cleanup:\n");
699 cleanup(nfa
); /* may simplify situation */
704 fprintf(f
, "\nempties:\n");
706 fixempties(nfa
, f
); /* get rid of EMPTY arcs */
709 fprintf(f
, "\nconstraints:\n");
711 pullback(nfa
, f
); /* pull back constraints backward */
712 pushfwd(nfa
, f
); /* push fwd constraints forward */
715 fprintf(f
, "\nfinal cleanup:\n");
717 cleanup(nfa
); /* final tidying */
718 return analyze(nfa
); /* and analysis */
722 * pullback - pull back constraints backward to (with luck) eliminate them
725 pullback(struct nfa
* nfa
,
726 FILE *f
) /* for debug output; NULL none */
734 /* find and pull until there are no more */
738 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
741 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= nexta
)
744 if (a
->type
== '^' || a
->type
== BEHIND
)
747 assert(nexta
== NULL
|| s
->no
!= FREESTATE
);
750 if (progress
&& f
!= NULL
)
752 } while (progress
&& !NISERR());
756 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= nexta
)
761 assert(a
->co
== 0 || a
->co
== 1);
762 newarc(nfa
, PLAIN
, nfa
->bos
[a
->co
], a
->from
, a
->to
);
769 * pull - pull a back constraint backward past its source state
770 * A significant property of this function is that it deletes at most
771 * one state -- the constraint's from state -- and only if the constraint
772 * was that state's last outarc.
774 static int /* 0 couldn't, 1 could */
775 pull(struct nfa
* nfa
,
778 struct state
*from
= con
->from
;
779 struct state
*to
= con
->to
;
785 { /* circular constraint is pointless */
789 if (from
->flag
) /* can't pull back beyond start */
797 /* first, clone from state if necessary to avoid other outarcs */
803 assert(to
!= from
); /* con is not an inarc */
804 copyins(nfa
, from
, s
); /* duplicate inarcs */
805 cparc(nfa
, con
, s
, to
); /* move constraint arc */
810 assert(from
->nouts
== 1);
812 /* propagate the constraint into the from state's inarcs */
813 for (a
= from
->ins
; a
!= NULL
; a
= nexta
)
816 switch (combine(con
, a
))
818 case INCOMPATIBLE
: /* destroy the arc */
821 case SATISFIED
: /* no action needed */
823 case COMPATIBLE
: /* swap the two arcs, more or less */
827 cparc(nfa
, a
, s
, to
); /* anticipate move */
828 cparc(nfa
, con
, a
->from
, s
);
839 /* remaining inarcs, if any, incorporate the constraint */
840 moveins(nfa
, from
, to
);
841 dropstate(nfa
, from
); /* will free the constraint */
846 * pushfwd - push forward constraints forward to (with luck) eliminate them
849 pushfwd(struct nfa
* nfa
,
850 FILE *f
) /* for debug output; NULL none */
858 /* find and push until there are no more */
862 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
865 for (a
= s
->ins
; a
!= NULL
&& !NISERR(); a
= nexta
)
868 if (a
->type
== '$' || a
->type
== AHEAD
)
871 assert(nexta
== NULL
|| s
->no
!= FREESTATE
);
874 if (progress
&& f
!= NULL
)
876 } while (progress
&& !NISERR());
880 for (a
= nfa
->post
->ins
; a
!= NULL
; a
= nexta
)
885 assert(a
->co
== 0 || a
->co
== 1);
886 newarc(nfa
, PLAIN
, nfa
->eos
[a
->co
], a
->from
, a
->to
);
893 * push - push a forward constraint forward past its destination state
894 * A significant property of this function is that it deletes at most
895 * one state -- the constraint's to state -- and only if the constraint
896 * was that state's last inarc.
898 static int /* 0 couldn't, 1 could */
899 push(struct nfa
* nfa
,
902 struct state
*from
= con
->from
;
903 struct state
*to
= con
->to
;
909 { /* circular constraint is pointless */
913 if (to
->flag
) /* can't push forward beyond end */
921 /* first, clone to state if necessary to avoid other inarcs */
927 copyouts(nfa
, to
, s
); /* duplicate outarcs */
928 cparc(nfa
, con
, from
, s
); /* move constraint */
933 assert(to
->nins
== 1);
935 /* propagate the constraint into the to state's outarcs */
936 for (a
= to
->outs
; a
!= NULL
; a
= nexta
)
939 switch (combine(con
, a
))
941 case INCOMPATIBLE
: /* destroy the arc */
944 case SATISFIED
: /* no action needed */
946 case COMPATIBLE
: /* swap the two arcs, more or less */
950 cparc(nfa
, con
, s
, a
->to
); /* anticipate move */
951 cparc(nfa
, a
, from
, s
);
962 /* remaining outarcs, if any, incorporate the constraint */
963 moveouts(nfa
, to
, from
);
964 dropstate(nfa
, to
); /* will free the constraint */
969 * combine - constraint lands on an arc, what happens?
971 * #def INCOMPATIBLE 1 // destroys arc
972 * #def SATISFIED 2 // constraint satisfied
973 * #def COMPATIBLE 3 // compatible but not satisfied yet
976 combine(struct arc
* con
,
979 #define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
981 switch (CA(con
->type
, a
->type
))
983 case CA('^', PLAIN
): /* newlines are handled separately */
987 case CA(AHEAD
, PLAIN
): /* color constraints meet colors */
988 case CA(BEHIND
, PLAIN
):
989 if (con
->co
== a
->co
)
993 case CA('^', '^'): /* collision, similar constraints */
995 case CA(AHEAD
, AHEAD
):
996 case CA(BEHIND
, BEHIND
):
997 if (con
->co
== a
->co
) /* true duplication */
1001 case CA('^', BEHIND
): /* collision, dissimilar constraints */
1002 case CA(BEHIND
, '^'):
1003 case CA('$', AHEAD
):
1004 case CA(AHEAD
, '$'):
1005 return INCOMPATIBLE
;
1007 case CA('^', '$'): /* constraints passing each other */
1008 case CA('^', AHEAD
):
1009 case CA(BEHIND
, '$'):
1010 case CA(BEHIND
, AHEAD
):
1012 case CA('$', BEHIND
):
1013 case CA(AHEAD
, '^'):
1014 case CA(AHEAD
, BEHIND
):
1015 case CA('^', LACON
):
1016 case CA(BEHIND
, LACON
):
1017 case CA('$', LACON
):
1018 case CA(AHEAD
, LACON
):
1023 return INCOMPATIBLE
; /* for benefit of blind compilers */
1027 * fixempties - get rid of EMPTY arcs
1030 fixempties(struct nfa
* nfa
,
1031 FILE *f
) /* for debug output; NULL none */
1034 struct state
*nexts
;
1039 /* find and eliminate empties until there are no more */
1043 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
)
1046 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= nexta
)
1048 nexta
= a
->outchain
;
1049 if (a
->type
== EMPTY
&& unempty(nfa
, a
))
1051 assert(nexta
== NULL
|| s
->no
!= FREESTATE
);
1054 if (progress
&& f
!= NULL
)
1056 } while (progress
&& !NISERR());
1060 * unempty - optimize out an EMPTY arc, if possible
1062 * Actually, as it stands this function always succeeds, but the return
1063 * value is kept with an eye on possible future changes.
1065 static int /* 0 couldn't, 1 could */
1066 unempty(struct nfa
* nfa
,
1069 struct state
*from
= a
->from
;
1070 struct state
*to
= a
->to
;
1071 int usefrom
; /* work on from, as opposed to to? */
1073 assert(a
->type
== EMPTY
);
1074 assert(from
!= nfa
->pre
&& to
!= nfa
->post
);
1077 { /* vacuous loop */
1082 /* decide which end to work on */
1083 usefrom
= 1; /* default: attack from */
1084 if (from
->nouts
> to
->nins
)
1086 else if (from
->nouts
== to
->nins
)
1088 /* decide on secondary issue: move/copy fewest arcs */
1089 if (from
->nins
> to
->nouts
)
1096 if (from
->nouts
== 0)
1098 /* was the state's only outarc */
1099 moveins(nfa
, from
, to
);
1100 freestate(nfa
, from
);
1103 copyins(nfa
, from
, to
);
1109 /* was the state's only inarc */
1110 moveouts(nfa
, to
, from
);
1114 copyouts(nfa
, to
, from
);
1121 * cleanup - clean up NFA after optimizations
1124 cleanup(struct nfa
* nfa
)
1127 struct state
*nexts
;
1130 /* clear out unreachable or dead-end states */
1131 /* use pre to mark reachable, then post to mark can-reach-post */
1132 markreachable(nfa
, nfa
->pre
, (struct state
*) NULL
, nfa
->pre
);
1133 markcanreach(nfa
, nfa
->post
, nfa
->pre
, nfa
->post
);
1134 for (s
= nfa
->states
; s
!= NULL
; s
= nexts
)
1137 if (s
->tmp
!= nfa
->post
&& !s
->flag
)
1140 assert(nfa
->post
->nins
== 0 || nfa
->post
->tmp
== nfa
->post
);
1141 cleartraverse(nfa
, nfa
->pre
);
1142 assert(nfa
->post
->nins
== 0 || nfa
->post
->tmp
== NULL
);
1143 /* the nins==0 (final unreachable) case will be caught later */
1145 /* renumber surviving states */
1147 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1153 * markreachable - recursive marking of reachable states
1156 markreachable(struct nfa
* nfa
,
1158 struct state
* okay
, /* consider only states with this
1160 struct state
* mark
) /* the value to mark with */
1168 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
1169 markreachable(nfa
, a
->to
, okay
, mark
);
1173 * markcanreach - recursive marking of states which can reach here
1176 markcanreach(struct nfa
* nfa
,
1178 struct state
* okay
, /* consider only states with this
1180 struct state
* mark
) /* the value to mark with */
1188 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
1189 markcanreach(nfa
, a
->from
, okay
, mark
);
1193 * analyze - ascertain potentially-useful facts about an optimized NFA
1195 static long /* re_info bits to be ORed in */
1196 analyze(struct nfa
* nfa
)
1201 if (nfa
->pre
->outs
== NULL
)
1202 return REG_UIMPOSSIBLE
;
1203 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= a
->outchain
)
1204 for (aa
= a
->to
->outs
; aa
!= NULL
; aa
= aa
->outchain
)
1205 if (aa
->to
== nfa
->post
)
1206 return REG_UEMPTYMATCH
;
1211 * compact - compact an NFA
1214 compact(struct nfa
* nfa
,
1228 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1231 narcs
+= 1 + s
->nouts
+ 1;
1232 /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
1235 cnfa
->states
= (struct carc
**) MALLOC(nstates
* sizeof(struct carc
*));
1236 cnfa
->arcs
= (struct carc
*) MALLOC(narcs
* sizeof(struct carc
));
1237 if (cnfa
->states
== NULL
|| cnfa
->arcs
== NULL
)
1239 if (cnfa
->states
!= NULL
)
1241 if (cnfa
->arcs
!= NULL
)
1246 cnfa
->nstates
= nstates
;
1247 cnfa
->pre
= nfa
->pre
->no
;
1248 cnfa
->post
= nfa
->post
->no
;
1249 cnfa
->bos
[0] = nfa
->bos
[0];
1250 cnfa
->bos
[1] = nfa
->bos
[1];
1251 cnfa
->eos
[0] = nfa
->eos
[0];
1252 cnfa
->eos
[1] = nfa
->eos
[1];
1253 cnfa
->ncolors
= maxcolor(nfa
->cm
) + 1;
1257 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1259 assert((size_t) s
->no
< nstates
);
1260 cnfa
->states
[s
->no
] = ca
;
1261 ca
->co
= 0; /* clear and skip flags "arc" */
1264 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
1273 assert(s
->no
!= cnfa
->pre
);
1274 ca
->co
= (color
) (cnfa
->ncolors
+ a
->co
);
1277 cnfa
->flags
|= HASLACONS
;
1283 carcsort(first
, ca
- 1);
1288 assert(ca
== &cnfa
->arcs
[narcs
]);
1289 assert(cnfa
->nstates
!= 0);
1291 /* mark no-progress states */
1292 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= a
->outchain
)
1293 cnfa
->states
[a
->to
->no
]->co
= 1;
1294 cnfa
->states
[nfa
->pre
->no
]->co
= 1;
1298 * carcsort - sort compacted-NFA arcs by color
1300 * Really dumb algorithm, but if the list is long enough for that to matter,
1301 * you're in real trouble anyway.
1304 carcsort(struct carc
* first
,
1311 if (last
- first
<= 1)
1314 for (p
= first
; p
<= last
; p
++)
1315 for (q
= p
; q
<= last
; q
++)
1316 if (p
->co
> q
->co
||
1317 (p
->co
== q
->co
&& p
->to
> q
->to
))
1327 * freecnfa - free a compacted NFA
1330 freecnfa(struct cnfa
* cnfa
)
1332 assert(cnfa
->nstates
!= 0); /* not empty already */
1339 * dumpnfa - dump an NFA in human-readable form
1342 dumpnfa(struct nfa
* nfa
,
1348 fprintf(f
, "pre %d, post %d", nfa
->pre
->no
, nfa
->post
->no
);
1349 if (nfa
->bos
[0] != COLORLESS
)
1350 fprintf(f
, ", bos [%ld]", (long) nfa
->bos
[0]);
1351 if (nfa
->bos
[1] != COLORLESS
)
1352 fprintf(f
, ", bol [%ld]", (long) nfa
->bos
[1]);
1353 if (nfa
->eos
[0] != COLORLESS
)
1354 fprintf(f
, ", eos [%ld]", (long) nfa
->eos
[0]);
1355 if (nfa
->eos
[1] != COLORLESS
)
1356 fprintf(f
, ", eol [%ld]", (long) nfa
->eos
[1]);
1358 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1360 if (nfa
->parent
== NULL
)
1361 dumpcolors(nfa
->cm
, f
);
1366 #ifdef REG_DEBUG /* subordinates of dumpnfa */
1369 * dumpstate - dump an NFA state in human-readable form
1372 dumpstate(struct state
* s
,
1377 fprintf(f
, "%d%s%c", s
->no
, (s
->tmp
!= NULL
) ? "T" : "",
1378 (s
->flag
) ? s
->flag
: '.');
1379 if (s
->prev
!= NULL
&& s
->prev
->next
!= s
)
1380 fprintf(f
, "\tstate chain bad\n");
1382 fprintf(f
, "\tno out arcs\n");
1386 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
1389 fprintf(f
, "\tlink from %d to %d on %d's in-chain\n",
1390 a
->from
->no
, a
->to
->no
, s
->no
);
1395 * dumparcs - dump out-arcs in human-readable form
1398 dumparcs(struct state
* s
,
1403 assert(s
->nouts
> 0);
1404 /* printing arcs in reverse order is usually clearer */
1405 pos
= dumprarcs(s
->outs
, s
, f
, 1);
1411 * dumprarcs - dump remaining outarcs, recursively, in reverse order
1413 static int /* resulting print position */
1414 dumprarcs(struct arc
* a
,
1417 int pos
) /* initial print position */
1419 if (a
->outchain
!= NULL
)
1420 pos
= dumprarcs(a
->outchain
, s
, f
, pos
);
1433 * dumparc - dump one outarc in readable form, including prefixing tab
1436 dumparc(struct arc
* a
,
1441 struct arcbatch
*ab
;
1447 fprintf(f
, "[%ld]", (long) a
->co
);
1450 fprintf(f
, ">%ld>", (long) a
->co
);
1453 fprintf(f
, "<%ld<", (long) a
->co
);
1456 fprintf(f
, ":%ld:", (long) a
->co
);
1460 fprintf(f
, "%c%d", a
->type
, (int) a
->co
);
1465 fprintf(f
, "0x%x/0%lo", a
->type
, (long) a
->co
);
1469 fprintf(f
, "?%d?", a
->from
->no
);
1470 for (ab
= &a
->from
->oas
; ab
!= NULL
; ab
= ab
->next
)
1472 for (aa
= &ab
->a
[0]; aa
< &ab
->a
[ABSIZE
]; aa
++)
1474 break; /* NOTE BREAK OUT */
1475 if (aa
< &ab
->a
[ABSIZE
]) /* propagate break */
1476 break; /* NOTE BREAK OUT */
1479 fprintf(f
, "?!?"); /* not in allocated space */
1486 fprintf(f
, "%d", a
->to
->no
);
1487 for (aa
= a
->to
->ins
; aa
!= NULL
; aa
= aa
->inchain
)
1489 break; /* NOTE BREAK OUT */
1491 fprintf(f
, "?!?"); /* missing from in-chain */
1493 #endif /* REG_DEBUG */
1496 * dumpcnfa - dump a compacted NFA in human-readable form
1500 dumpcnfa(struct cnfa
* cnfa
,
1505 fprintf(f
, "pre %d, post %d", cnfa
->pre
, cnfa
->post
);
1506 if (cnfa
->bos
[0] != COLORLESS
)
1507 fprintf(f
, ", bos [%ld]", (long) cnfa
->bos
[0]);
1508 if (cnfa
->bos
[1] != COLORLESS
)
1509 fprintf(f
, ", bol [%ld]", (long) cnfa
->bos
[1]);
1510 if (cnfa
->eos
[0] != COLORLESS
)
1511 fprintf(f
, ", eos [%ld]", (long) cnfa
->eos
[0]);
1512 if (cnfa
->eos
[1] != COLORLESS
)
1513 fprintf(f
, ", eol [%ld]", (long) cnfa
->eos
[1]);
1514 if (cnfa
->flags
& HASLACONS
)
1515 fprintf(f
, ", haslacons");
1517 for (st
= 0; st
< cnfa
->nstates
; st
++)
1518 dumpcstate(st
, cnfa
->states
[st
], cnfa
, f
);
1523 #ifdef REG_DEBUG /* subordinates of dumpcnfa */
1526 * dumpcstate - dump a compacted-NFA state in human-readable form
1537 fprintf(f
, "%d%s", st
, (ca
[0].co
) ? ":" : ".");
1539 for (i
= 1; ca
[i
].co
!= COLORLESS
; i
++)
1541 if (ca
[i
].co
< cnfa
->ncolors
)
1542 fprintf(f
, "\t[%ld]->%d", (long) ca
[i
].co
, ca
[i
].to
);
1544 fprintf(f
, "\t:%ld:->%d", (long) ca
[i
].co
- cnfa
->ncolors
,
1554 if (i
== 1 || pos
!= 1)
1559 #endif /* REG_DEBUG */