3 * This file is #included by regcomp.c.
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * One or two things that technically ought to be in here
34 * are actually in color.c, thanks to some incestuous relationships in
38 #define NISERR() VISERR(nfa->v)
39 #define NERR(e) (void)VERR(nfa->v, (e))
43 - newnfa - set up an NFA
44 ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
46 static struct nfa
* /* the NFA, or NULL */
50 struct nfa
*parent
; /* NULL if primary NFA */
54 nfa
= (struct nfa
*)MALLOC(sizeof(struct nfa
));
64 nfa
->bos
[0] = nfa
->bos
[1] = COLORLESS
;
65 nfa
->eos
[0] = nfa
->eos
[1] = COLORLESS
;
66 nfa
->post
= newfstate(nfa
, '@'); /* number 0 */
67 nfa
->pre
= newfstate(nfa
, '>'); /* number 1 */
70 nfa
->init
= newstate(nfa
); /* may become invalid later */
71 nfa
->final
= newstate(nfa
);
76 rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->pre
, nfa
->init
);
77 newarc(nfa
, '^', 1, nfa
->pre
, nfa
->init
);
78 newarc(nfa
, '^', 0, nfa
->pre
, nfa
->init
);
79 rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->final
, nfa
->post
);
80 newarc(nfa
, '$', 1, nfa
->final
, nfa
->post
);
81 newarc(nfa
, '$', 0, nfa
->final
, nfa
->post
);
91 - freenfa - free an entire NFA
92 ^ static VOID freenfa(struct nfa *);
100 while ((s
= nfa
->states
) != NULL
) {
101 s
->nins
= s
->nouts
= 0; /* don't worry about arcs */
104 while ((s
= nfa
->free
) != NULL
) {
106 destroystate(nfa
, s
);
117 - newstate - allocate an NFA state, with zero flag value
118 ^ static struct state *newstate(struct nfa *);
120 static struct state
* /* NULL on error */
126 if (nfa
->free
!= NULL
) {
130 s
= (struct state
*)MALLOC(sizeof(struct state
));
140 assert(nfa
->nstates
>= 0);
141 s
->no
= nfa
->nstates
++;
143 if (nfa
->states
== NULL
)
151 if (nfa
->slast
!= NULL
) {
152 assert(nfa
->slast
->next
== NULL
);
153 nfa
->slast
->next
= s
;
155 s
->prev
= nfa
->slast
;
161 - newfstate - allocate an NFA state with a specified flag value
162 ^ static struct state *newfstate(struct nfa *, int flag);
164 static struct state
* /* NULL on error */
173 s
->flag
= (char)flag
;
178 - dropstate - delete a state's inarcs and outarcs and free it
179 ^ static VOID dropstate(struct nfa *, struct state *);
188 while ((a
= s
->ins
) != NULL
)
190 while ((a
= s
->outs
) != NULL
)
196 - freestate - free a state, which has no in-arcs or out-arcs
197 ^ static VOID freestate(struct nfa *, struct state *);
205 assert(s
->nins
== 0 && s
->nouts
== 0);
210 s
->next
->prev
= s
->prev
;
212 assert(s
== nfa
->slast
);
213 nfa
->slast
= s
->prev
;
216 s
->prev
->next
= s
->next
;
218 assert(s
== nfa
->states
);
219 nfa
->states
= s
->next
;
222 s
->next
= nfa
->free
; /* don't delete it, put it on the free list */
227 - destroystate - really get rid of an already-freed state
228 ^ static VOID destroystate(struct nfa *, struct state *);
236 struct arcbatch
*abnext
;
238 assert(s
->no
== FREESTATE
);
239 for (ab
= s
->oas
.next
; ab
!= NULL
; ab
= abnext
) {
250 - newarc - set up a new arc within an NFA
251 ^ static VOID newarc(struct nfa *, int, pcolor, struct state *,
255 newarc(nfa
, t
, co
, from
, to
)
264 assert(from
!= NULL
&& to
!= NULL
);
266 /* check for duplicates */
267 for (a
= from
->outs
; a
!= NULL
; a
= a
->outchain
)
268 if (a
->to
== to
&& a
->co
== co
&& a
->type
== t
)
271 a
= allocarc(nfa
, from
);
282 * Put the new arc on the beginning, not the end, of the chains.
283 * Not only is this easier, it has the very useful side effect that
284 * deleting the most-recently-added arc is the cheapest case rather
285 * than the most expensive one.
287 a
->inchain
= to
->ins
;
289 a
->outchain
= from
->outs
;
295 if (COLORED(a
) && nfa
->parent
== NULL
)
296 colorchain(nfa
->cm
, a
);
302 - allocarc - allocate a new out-arc within a state
303 ^ static struct arc *allocarc(struct nfa *, struct state *);
305 static struct arc
* /* NULL for failure */
311 struct arcbatch
*new;
315 if (s
->free
== NULL
&& s
->noas
< ABSIZE
) {
316 a
= &s
->oas
.a
[s
->noas
];
321 /* if none at hand, get more */
322 if (s
->free
== NULL
) {
323 new = (struct arcbatch
*)MALLOC(sizeof(struct arcbatch
));
328 new->next
= s
->oas
.next
;
331 for (i
= 0; i
< ABSIZE
; i
++) {
333 new->a
[i
].freechain
= &new->a
[i
+1];
335 new->a
[ABSIZE
-1].freechain
= NULL
;
336 s
->free
= &new->a
[0];
338 assert(s
->free
!= NULL
);
341 s
->free
= a
->freechain
;
346 - freearc - free an arc
347 ^ static VOID freearc(struct nfa *, struct arc *);
354 struct state
*from
= victim
->from
;
355 struct state
*to
= victim
->to
;
358 assert(victim
->type
!= 0);
360 /* take it off color chain if necessary */
361 if (COLORED(victim
) && nfa
->parent
== NULL
)
362 uncolorchain(nfa
->cm
, victim
);
364 /* take it off source's out-chain */
365 assert(from
!= NULL
);
366 assert(from
->outs
!= NULL
);
368 if (a
== victim
) /* simple case: first in chain */
369 from
->outs
= victim
->outchain
;
371 for (; a
!= NULL
&& a
->outchain
!= victim
; a
= a
->outchain
)
374 a
->outchain
= victim
->outchain
;
378 /* take it off target's in-chain */
380 assert(to
->ins
!= NULL
);
382 if (a
== victim
) /* simple case: first in chain */
383 to
->ins
= victim
->inchain
;
385 for (; a
!= NULL
&& a
->inchain
!= victim
; a
= a
->inchain
)
388 a
->inchain
= victim
->inchain
;
392 /* clean up and place on free list */
394 victim
->from
= NULL
; /* precautions... */
396 victim
->inchain
= NULL
;
397 victim
->outchain
= NULL
;
398 victim
->freechain
= from
->free
;
403 - findarc - find arc, if any, from given source with given type and color
404 * If there is more than one such arc, the result is random.
405 ^ static struct arc *findarc(struct state *, int, pcolor);
415 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
416 if (a
->type
== type
&& a
->co
== co
)
422 - cparc - allocate a new arc within an NFA, copying details from old one
423 ^ static VOID cparc(struct nfa *, struct arc *, struct state *,
427 cparc(nfa
, oa
, from
, to
)
433 newarc(nfa
, oa
->type
, oa
->co
, from
, to
);
437 - moveins - move all in arcs of a state to another state
438 * You might think this could be done better by just updating the
439 * existing arcs, and you would be right if it weren't for the desire
440 * for duplicate suppression, which makes it easier to just make new
441 * ones to exploit the suppression built into newarc.
442 ^ static VOID moveins(struct nfa *, struct state *, struct state *);
445 moveins(nfa
, old
, new)
454 while ((a
= old
->ins
) != NULL
) {
455 cparc(nfa
, a
, a
->from
, new);
458 assert(old
->nins
== 0);
459 assert(old
->ins
== NULL
);
463 - copyins - copy all in arcs of a state to another state
464 ^ static VOID copyins(struct nfa *, struct state *, struct state *);
467 copyins(nfa
, old
, new)
476 for (a
= old
->ins
; a
!= NULL
; a
= a
->inchain
)
477 cparc(nfa
, a
, a
->from
, new);
481 - moveouts - move all out arcs of a state to another state
482 ^ static VOID moveouts(struct nfa *, struct state *, struct state *);
485 moveouts(nfa
, old
, new)
494 while ((a
= old
->outs
) != NULL
) {
495 cparc(nfa
, a
, new, a
->to
);
501 - copyouts - copy all out arcs of a state to another state
502 ^ static VOID copyouts(struct nfa *, struct state *, struct state *);
505 copyouts(nfa
, old
, new)
514 for (a
= old
->outs
; a
!= NULL
; a
= a
->outchain
)
515 cparc(nfa
, a
, new, a
->to
);
519 - cloneouts - copy out arcs of a state to another state pair, modifying type
520 ^ static VOID cloneouts(struct nfa *, struct state *, struct state *,
521 ^ struct state *, int);
524 cloneouts(nfa
, old
, from
, to
, type
)
535 for (a
= old
->outs
; a
!= NULL
; a
= a
->outchain
)
536 newarc(nfa
, type
, a
->co
, from
, to
);
540 - delsub - delete a sub-NFA, updating subre pointers if necessary
541 * This uses a recursive traversal of the sub-NFA, marking already-seen
542 * states using their tmp pointer.
543 ^ static VOID delsub(struct nfa *, struct state *, struct state *);
548 struct state
*lp
; /* the sub-NFA goes from here... */
549 struct state
*rp
; /* ...to here, *not* inclusive */
553 rp
->tmp
= rp
; /* mark end */
555 deltraverse(nfa
, lp
, lp
);
556 assert(lp
->nouts
== 0 && rp
->nins
== 0); /* did the job */
557 assert(lp
->no
!= FREESTATE
&& rp
->no
!= FREESTATE
); /* no more */
559 rp
->tmp
= NULL
; /* unmark end */
560 lp
->tmp
= NULL
; /* and begin, marked by deltraverse */
564 - deltraverse - the recursive heart of delsub
565 * This routine's basic job is to destroy all out-arcs of the state.
566 ^ static VOID deltraverse(struct nfa *, struct state *, struct state *);
569 deltraverse(nfa
, leftend
, s
)
571 struct state
*leftend
;
578 return; /* nothing to do */
580 return; /* already in progress */
582 s
->tmp
= s
; /* mark as in progress */
584 while ((a
= s
->outs
) != NULL
) {
586 deltraverse(nfa
, leftend
, to
);
587 assert(to
->nouts
== 0 || to
->tmp
!= NULL
);
589 if (to
->nins
== 0 && to
->tmp
== NULL
) {
590 assert(to
->nouts
== 0);
595 assert(s
->no
!= FREESTATE
); /* we're still here */
596 assert(s
== leftend
|| s
->nins
!= 0); /* and still reachable */
597 assert(s
->nouts
== 0); /* but have no outarcs */
599 s
->tmp
= NULL
; /* we're done here */
603 - dupnfa - duplicate sub-NFA
604 * Another recursive traversal, this time using tmp to point to duplicates
605 * as well as mark already-seen states. (You knew there was a reason why
606 * it's a state pointer, didn't you? :-))
607 ^ static VOID dupnfa(struct nfa *, struct state *, struct state *,
608 ^ struct state *, struct state *);
611 dupnfa(nfa
, start
, stop
, from
, to
)
613 struct state
*start
; /* duplicate of subNFA starting here */
614 struct state
*stop
; /* and stopping here */
615 struct state
*from
; /* stringing duplicate from here */
616 struct state
*to
; /* to here */
619 newarc(nfa
, EMPTY
, 0, from
, to
);
624 duptraverse(nfa
, start
, from
);
625 /* done, except for clearing out the tmp pointers */
628 cleartraverse(nfa
, start
);
632 - duptraverse - recursive heart of dupnfa
633 ^ static VOID duptraverse(struct nfa *, struct state *, struct state *);
636 duptraverse(nfa
, s
, stmp
)
639 struct state
*stmp
; /* s's duplicate, or NULL */
644 return; /* already done */
646 s
->tmp
= (stmp
== NULL
) ? newstate(nfa
) : stmp
;
647 if (s
->tmp
== NULL
) {
652 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= a
->outchain
) {
653 duptraverse(nfa
, a
->to
, (struct state
*)NULL
);
654 assert(a
->to
->tmp
!= NULL
);
655 cparc(nfa
, a
, s
->tmp
, a
->to
->tmp
);
660 - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
661 ^ static VOID cleartraverse(struct nfa *, struct state *);
664 cleartraverse(nfa
, s
)
674 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
675 cleartraverse(nfa
, a
->to
);
679 - specialcolors - fill in special colors for an NFA
680 ^ static VOID specialcolors(struct nfa *);
686 /* false colors for BOS, BOL, EOS, EOL */
687 if (nfa
->parent
== NULL
) {
688 nfa
->bos
[0] = pseudocolor(nfa
->cm
);
689 nfa
->bos
[1] = pseudocolor(nfa
->cm
);
690 nfa
->eos
[0] = pseudocolor(nfa
->cm
);
691 nfa
->eos
[1] = pseudocolor(nfa
->cm
);
693 assert(nfa
->parent
->bos
[0] != COLORLESS
);
694 nfa
->bos
[0] = nfa
->parent
->bos
[0];
695 assert(nfa
->parent
->bos
[1] != COLORLESS
);
696 nfa
->bos
[1] = nfa
->parent
->bos
[1];
697 assert(nfa
->parent
->eos
[0] != COLORLESS
);
698 nfa
->eos
[0] = nfa
->parent
->eos
[0];
699 assert(nfa
->parent
->eos
[1] != COLORLESS
);
700 nfa
->eos
[1] = nfa
->parent
->eos
[1];
705 - optimize - optimize an NFA
706 ^ static long optimize(struct nfa *, FILE *);
708 static long /* re_info bits */
711 FILE *f
; /* for debug output; NULL none */
713 int verbose
= (f
!= NULL
) ? 1 : 0;
716 fprintf(f
, "\ninitial cleanup:\n");
717 cleanup(nfa
); /* may simplify situation */
721 fprintf(f
, "\nempties:\n");
722 fixempties(nfa
, f
); /* get rid of EMPTY arcs */
724 fprintf(f
, "\nconstraints:\n");
725 pullback(nfa
, f
); /* pull back constraints backward */
726 pushfwd(nfa
, f
); /* push fwd constraints forward */
728 fprintf(f
, "\nfinal cleanup:\n");
729 cleanup(nfa
); /* final tidying */
730 return analyze(nfa
); /* and analysis */
734 - pullback - pull back constraints backward to (with luck) eliminate them
735 ^ static VOID pullback(struct nfa *, FILE *);
740 FILE *f
; /* for debug output; NULL none */
748 /* find and pull until there are no more */
751 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
) {
753 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= nexta
) {
755 if (a
->type
== '^' || a
->type
== BEHIND
)
758 assert(nexta
== NULL
|| s
->no
!= FREESTATE
);
761 if (progress
&& f
!= NULL
)
763 } while (progress
&& !NISERR());
767 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= nexta
) {
769 if (a
->type
== '^') {
770 assert(a
->co
== 0 || a
->co
== 1);
771 newarc(nfa
, PLAIN
, nfa
->bos
[a
->co
], a
->from
, a
->to
);
778 - pull - pull a back constraint backward past its source state
779 * A significant property of this function is that it deletes at most
780 * one state -- the constraint's from state -- and only if the constraint
781 * was that state's last outarc.
782 ^ static int pull(struct nfa *, struct arc *);
784 static int /* 0 couldn't, 1 could */
789 struct state
*from
= con
->from
;
790 struct state
*to
= con
->to
;
795 if (from
== to
) { /* circular constraint is pointless */
799 if (from
->flag
) /* can't pull back beyond start */
801 if (from
->nins
== 0) { /* unreachable */
806 /* first, clone from state if necessary to avoid other outarcs */
807 if (from
->nouts
> 1) {
811 assert(to
!= from
); /* con is not an inarc */
812 copyins(nfa
, from
, s
); /* duplicate inarcs */
813 cparc(nfa
, con
, s
, to
); /* move constraint arc */
818 assert(from
->nouts
== 1);
820 /* propagate the constraint into the from state's inarcs */
821 for (a
= from
->ins
; a
!= NULL
; a
= nexta
) {
823 switch (combine(con
, a
)) {
824 case INCOMPATIBLE
: /* destroy the arc */
827 case SATISFIED
: /* no action needed */
829 case COMPATIBLE
: /* swap the two arcs, more or less */
833 cparc(nfa
, a
, s
, to
); /* anticipate move */
834 cparc(nfa
, con
, a
->from
, s
);
845 /* remaining inarcs, if any, incorporate the constraint */
846 moveins(nfa
, from
, to
);
847 dropstate(nfa
, from
); /* will free the constraint */
852 - pushfwd - push forward constraints forward to (with luck) eliminate them
853 ^ static VOID pushfwd(struct nfa *, FILE *);
858 FILE *f
; /* for debug output; NULL none */
866 /* find and push until there are no more */
869 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
) {
871 for (a
= s
->ins
; a
!= NULL
&& !NISERR(); a
= nexta
) {
873 if (a
->type
== '$' || a
->type
== AHEAD
)
876 assert(nexta
== NULL
|| s
->no
!= FREESTATE
);
879 if (progress
&& f
!= NULL
)
881 } while (progress
&& !NISERR());
885 for (a
= nfa
->post
->ins
; a
!= NULL
; a
= nexta
) {
887 if (a
->type
== '$') {
888 assert(a
->co
== 0 || a
->co
== 1);
889 newarc(nfa
, PLAIN
, nfa
->eos
[a
->co
], a
->from
, a
->to
);
896 - push - push a forward constraint forward past its destination state
897 * A significant property of this function is that it deletes at most
898 * one state -- the constraint's to state -- and only if the constraint
899 * was that state's last inarc.
900 ^ static int push(struct nfa *, struct arc *);
902 static int /* 0 couldn't, 1 could */
907 struct state
*from
= con
->from
;
908 struct state
*to
= con
->to
;
913 if (to
== from
) { /* circular constraint is pointless */
917 if (to
->flag
) /* can't push forward beyond end */
919 if (to
->nouts
== 0) { /* dead end */
924 /* first, clone to state if necessary to avoid other inarcs */
929 copyouts(nfa
, to
, s
); /* duplicate outarcs */
930 cparc(nfa
, con
, from
, s
); /* move constraint */
935 assert(to
->nins
== 1);
937 /* propagate the constraint into the to state's outarcs */
938 for (a
= to
->outs
; a
!= NULL
; a
= nexta
) {
940 switch (combine(con
, a
)) {
941 case INCOMPATIBLE
: /* destroy the arc */
944 case SATISFIED
: /* no action needed */
946 case COMPATIBLE
: /* swap the two arcs, more or less */
950 cparc(nfa
, con
, s
, a
->to
); /* anticipate move */
951 cparc(nfa
, a
, from
, s
);
962 /* remaining outarcs, if any, incorporate the constraint */
963 moveouts(nfa
, to
, from
);
964 dropstate(nfa
, to
); /* will free the constraint */
969 - combine - constraint lands on an arc, what happens?
970 ^ #def INCOMPATIBLE 1 // destroys arc
971 ^ #def SATISFIED 2 // constraint satisfied
972 ^ #def COMPATIBLE 3 // compatible but not satisfied yet
973 ^ static int combine(struct arc *, struct arc *);
976 /* FIXME Required for CW 8 on Mac since it's not in limits.h */
978 #define __CHAR_BIT__ 8
987 # define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
989 switch (CA(con
->type
, a
->type
)) {
990 case CA('^', PLAIN
): /* newlines are handled separately */
994 case CA(AHEAD
, PLAIN
): /* color constraints meet colors */
995 case CA(BEHIND
, PLAIN
):
996 if (con
->co
== a
->co
)
1000 case CA('^', '^'): /* collision, similar constraints */
1002 case CA(AHEAD
, AHEAD
):
1003 case CA(BEHIND
, BEHIND
):
1004 if (con
->co
== a
->co
) /* true duplication */
1006 return INCOMPATIBLE
;
1008 case CA('^', BEHIND
): /* collision, dissimilar constraints */
1009 case CA(BEHIND
, '^'):
1010 case CA('$', AHEAD
):
1011 case CA(AHEAD
, '$'):
1012 return INCOMPATIBLE
;
1014 case CA('^', '$'): /* constraints passing each other */
1015 case CA('^', AHEAD
):
1016 case CA(BEHIND
, '$'):
1017 case CA(BEHIND
, AHEAD
):
1019 case CA('$', BEHIND
):
1020 case CA(AHEAD
, '^'):
1021 case CA(AHEAD
, BEHIND
):
1022 case CA('^', LACON
):
1023 case CA(BEHIND
, LACON
):
1024 case CA('$', LACON
):
1025 case CA(AHEAD
, LACON
):
1030 return INCOMPATIBLE
; /* for benefit of blind compilers */
1034 - fixempties - get rid of EMPTY arcs
1035 ^ static VOID fixempties(struct nfa *, FILE *);
1040 FILE *f
; /* for debug output; NULL none */
1043 struct state
*nexts
;
1048 /* find and eliminate empties until there are no more */
1051 for (s
= nfa
->states
; s
!= NULL
&& !NISERR(); s
= nexts
) {
1053 for (a
= s
->outs
; a
!= NULL
&& !NISERR(); a
= nexta
) {
1054 nexta
= a
->outchain
;
1055 if (a
->type
== EMPTY
&& unempty(nfa
, a
))
1057 assert(nexta
== NULL
|| s
->no
!= FREESTATE
);
1060 if (progress
&& f
!= NULL
)
1062 } while (progress
&& !NISERR());
1066 - unempty - optimize out an EMPTY arc, if possible
1067 * Actually, as it stands this function always succeeds, but the return
1068 * value is kept with an eye on possible future changes.
1069 ^ static int unempty(struct nfa *, struct arc *);
1071 static int /* 0 couldn't, 1 could */
1076 struct state
*from
= a
->from
;
1077 struct state
*to
= a
->to
;
1078 int usefrom
; /* work on from, as opposed to to? */
1080 assert(a
->type
== EMPTY
);
1081 assert(from
!= nfa
->pre
&& to
!= nfa
->post
);
1083 if (from
== to
) { /* vacuous loop */
1088 /* decide which end to work on */
1089 usefrom
= 1; /* default: attack from */
1090 if (from
->nouts
> to
->nins
)
1092 else if (from
->nouts
== to
->nins
) {
1093 /* decide on secondary issue: move/copy fewest arcs */
1094 if (from
->nins
> to
->nouts
)
1100 if (from
->nouts
== 0) {
1101 /* was the state's only outarc */
1102 moveins(nfa
, from
, to
);
1103 freestate(nfa
, from
);
1105 copyins(nfa
, from
, to
);
1107 if (to
->nins
== 0) {
1108 /* was the state's only inarc */
1109 moveouts(nfa
, to
, from
);
1112 copyouts(nfa
, to
, from
);
1119 - cleanup - clean up NFA after optimizations
1120 ^ static VOID cleanup(struct nfa *);
1127 struct state
*nexts
;
1130 /* clear out unreachable or dead-end states */
1131 /* use pre to mark reachable, then post to mark can-reach-post */
1132 markreachable(nfa
, nfa
->pre
, (struct state
*)NULL
, nfa
->pre
);
1133 markcanreach(nfa
, nfa
->post
, nfa
->pre
, nfa
->post
);
1134 for (s
= nfa
->states
; s
!= NULL
; s
= nexts
) {
1136 if (s
->tmp
!= nfa
->post
&& !s
->flag
)
1139 assert(nfa
->post
->nins
== 0 || nfa
->post
->tmp
== nfa
->post
);
1140 cleartraverse(nfa
, nfa
->pre
);
1141 assert(nfa
->post
->nins
== 0 || nfa
->post
->tmp
== NULL
);
1142 /* the nins==0 (final unreachable) case will be caught later */
1144 /* renumber surviving states */
1146 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1152 - markreachable - recursive marking of reachable states
1153 ^ static VOID markreachable(struct nfa *, struct state *, struct state *,
1157 markreachable(nfa
, s
, okay
, mark
)
1160 struct state
*okay
; /* consider only states with this mark */
1161 struct state
*mark
; /* the value to mark with */
1169 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
1170 markreachable(nfa
, a
->to
, okay
, mark
);
1174 - markcanreach - recursive marking of states which can reach here
1175 ^ static VOID markcanreach(struct nfa *, struct state *, struct state *,
1179 markcanreach(nfa
, s
, okay
, mark
)
1182 struct state
*okay
; /* consider only states with this mark */
1183 struct state
*mark
; /* the value to mark with */
1191 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
)
1192 markcanreach(nfa
, a
->from
, okay
, mark
);
1196 - analyze - ascertain potentially-useful facts about an optimized NFA
1197 ^ static long analyze(struct nfa *);
1199 static long /* re_info bits to be ORed in */
1206 if (nfa
->pre
->outs
== NULL
)
1207 return REG_UIMPOSSIBLE
;
1208 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= a
->outchain
)
1209 for (aa
= a
->to
->outs
; aa
!= NULL
; aa
= aa
->outchain
)
1210 if (aa
->to
== nfa
->post
)
1211 return REG_UEMPTYMATCH
;
1216 - compact - compact an NFA
1217 ^ static VOID compact(struct nfa *, struct cnfa *);
1235 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
) {
1237 narcs
+= 1 + s
->nouts
+ 1;
1238 /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
1241 cnfa
->states
= (struct carc
**)MALLOC(nstates
* sizeof(struct carc
*));
1242 cnfa
->arcs
= (struct carc
*)MALLOC(narcs
* sizeof(struct carc
));
1243 if (cnfa
->states
== NULL
|| cnfa
->arcs
== NULL
) {
1244 if (cnfa
->states
!= NULL
)
1246 if (cnfa
->arcs
!= NULL
)
1251 cnfa
->nstates
= nstates
;
1252 cnfa
->pre
= nfa
->pre
->no
;
1253 cnfa
->post
= nfa
->post
->no
;
1254 cnfa
->bos
[0] = nfa
->bos
[0];
1255 cnfa
->bos
[1] = nfa
->bos
[1];
1256 cnfa
->eos
[0] = nfa
->eos
[0];
1257 cnfa
->eos
[1] = nfa
->eos
[1];
1258 cnfa
->ncolors
= maxcolor(nfa
->cm
) + 1;
1262 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
) {
1263 assert((size_t)s
->no
< nstates
);
1264 cnfa
->states
[s
->no
] = ca
;
1265 ca
->co
= 0; /* clear and skip flags "arc" */
1268 for (a
= s
->outs
; a
!= NULL
; a
= a
->outchain
)
1276 assert(s
->no
!= cnfa
->pre
);
1277 ca
->co
= (color
)(cnfa
->ncolors
+ a
->co
);
1280 cnfa
->flags
|= HASLACONS
;
1286 carcsort(first
, ca
-1);
1291 assert(ca
== &cnfa
->arcs
[narcs
]);
1292 assert(cnfa
->nstates
!= 0);
1294 /* mark no-progress states */
1295 for (a
= nfa
->pre
->outs
; a
!= NULL
; a
= a
->outchain
)
1296 cnfa
->states
[a
->to
->no
]->co
= 1;
1297 cnfa
->states
[nfa
->pre
->no
]->co
= 1;
1301 - carcsort - sort compacted-NFA arcs by color
1302 * Really dumb algorithm, but if the list is long enough for that to matter,
1303 * you're in real trouble anyway.
1304 ^ static VOID carcsort(struct carc *, struct carc *);
1307 carcsort(first
, last
)
1315 if (last
- first
<= 1)
1318 for (p
= first
; p
<= last
; p
++)
1319 for (q
= p
; q
<= last
; q
++)
1320 if (p
->co
> q
->co
||
1321 (p
->co
== q
->co
&& p
->to
> q
->to
)) {
1330 - freecnfa - free a compacted NFA
1331 ^ static VOID freecnfa(struct cnfa *);
1337 assert(cnfa
->nstates
!= 0); /* not empty already */
1344 - dumpnfa - dump an NFA in human-readable form
1345 ^ static VOID dumpnfa(struct nfa *, FILE *);
1355 fprintf(f
, "pre %d, post %d", nfa
->pre
->no
, nfa
->post
->no
);
1356 if (nfa
->bos
[0] != COLORLESS
)
1357 fprintf(f
, ", bos [%ld]", (long)nfa
->bos
[0]);
1358 if (nfa
->bos
[1] != COLORLESS
)
1359 fprintf(f
, ", bol [%ld]", (long)nfa
->bos
[1]);
1360 if (nfa
->eos
[0] != COLORLESS
)
1361 fprintf(f
, ", eos [%ld]", (long)nfa
->eos
[0]);
1362 if (nfa
->eos
[1] != COLORLESS
)
1363 fprintf(f
, ", eol [%ld]", (long)nfa
->eos
[1]);
1365 for (s
= nfa
->states
; s
!= NULL
; s
= s
->next
)
1367 if (nfa
->parent
== NULL
)
1368 dumpcolors(nfa
->cm
, f
);
1373 #ifdef REG_DEBUG /* subordinates of dumpnfa */
1379 - dumpstate - dump an NFA state in human-readable form
1380 ^ static VOID dumpstate(struct state *, FILE *);
1389 fprintf(f
, "%d%s%c", s
->no
, (s
->tmp
!= NULL
) ? "T" : "",
1390 (s
->flag
) ? s
->flag
: '.');
1391 if (s
->prev
!= NULL
&& s
->prev
->next
!= s
)
1392 fprintf(f
, "\tstate chain bad\n");
1394 fprintf(f
, "\tno out arcs\n");
1398 for (a
= s
->ins
; a
!= NULL
; a
= a
->inchain
) {
1400 fprintf(f
, "\tlink from %d to %d on %d's in-chain\n",
1401 a
->from
->no
, a
->to
->no
, s
->no
);
1406 - dumparcs - dump out-arcs in human-readable form
1407 ^ static VOID dumparcs(struct state *, FILE *);
1416 assert(s
->nouts
> 0);
1417 /* printing arcs in reverse order is usually clearer */
1418 pos
= dumprarcs(s
->outs
, s
, f
, 1);
1424 - dumprarcs - dump remaining outarcs, recursively, in reverse order
1425 ^ static int dumprarcs(struct arc *, struct state *, FILE *, int);
1427 static int /* resulting print position */
1428 dumprarcs(a
, s
, f
, pos
)
1432 int pos
; /* initial print position */
1434 if (a
->outchain
!= NULL
)
1435 pos
= dumprarcs(a
->outchain
, s
, f
, pos
);
1446 - dumparc - dump one outarc in readable form, including prefixing tab
1447 ^ static VOID dumparc(struct arc *, struct state *, FILE *);
1456 struct arcbatch
*ab
;
1461 fprintf(f
, "[%ld]", (long)a
->co
);
1464 fprintf(f
, ">%ld>", (long)a
->co
);
1467 fprintf(f
, "<%ld<", (long)a
->co
);
1470 fprintf(f
, ":%ld:", (long)a
->co
);
1474 fprintf(f
, "%c%d", a
->type
, (int)a
->co
);
1479 fprintf(f
, "0x%x/0%lo", a
->type
, (long)a
->co
);
1483 fprintf(f
, "?%d?", a
->from
->no
);
1484 for (ab
= &a
->from
->oas
; ab
!= NULL
; ab
= ab
->next
) {
1485 for (aa
= &ab
->a
[0]; aa
< &ab
->a
[ABSIZE
]; aa
++)
1487 break; /* NOTE BREAK OUT */
1488 if (aa
< &ab
->a
[ABSIZE
]) /* propagate break */
1489 break; /* NOTE BREAK OUT */
1492 fprintf(f
, "?!?"); /* not in allocated space */
1494 if (a
->to
== NULL
) {
1498 fprintf(f
, "%d", a
->to
->no
);
1499 for (aa
= a
->to
->ins
; aa
!= NULL
; aa
= aa
->inchain
)
1501 break; /* NOTE BREAK OUT */
1503 fprintf(f
, "?!?"); /* missing from in-chain */
1509 #endif /* ifdef REG_DEBUG */
1512 - dumpcnfa - dump a compacted NFA in human-readable form
1513 ^ static VOID dumpcnfa(struct cnfa *, FILE *);
1523 fprintf(f
, "pre %d, post %d", cnfa
->pre
, cnfa
->post
);
1524 if (cnfa
->bos
[0] != COLORLESS
)
1525 fprintf(f
, ", bos [%ld]", (long)cnfa
->bos
[0]);
1526 if (cnfa
->bos
[1] != COLORLESS
)
1527 fprintf(f
, ", bol [%ld]", (long)cnfa
->bos
[1]);
1528 if (cnfa
->eos
[0] != COLORLESS
)
1529 fprintf(f
, ", eos [%ld]", (long)cnfa
->eos
[0]);
1530 if (cnfa
->eos
[1] != COLORLESS
)
1531 fprintf(f
, ", eol [%ld]", (long)cnfa
->eos
[1]);
1532 if (cnfa
->flags
&HASLACONS
)
1533 fprintf(f
, ", haslacons");
1535 for (st
= 0; st
< cnfa
->nstates
; st
++)
1536 dumpcstate(st
, cnfa
->states
[st
], cnfa
, f
);
1541 #ifdef REG_DEBUG /* subordinates of dumpcnfa */
1547 - dumpcstate - dump a compacted-NFA state in human-readable form
1548 ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *);
1551 dumpcstate(st
, ca
, cnfa
, f
)
1560 fprintf(f
, "%d%s", st
, (ca
[0].co
) ? ":" : ".");
1562 for (i
= 1; ca
[i
].co
!= COLORLESS
; i
++) {
1563 if (ca
[i
].co
< cnfa
->ncolors
)
1564 fprintf(f
, "\t[%ld]->%d", (long)ca
[i
].co
, ca
[i
].to
);
1566 fprintf(f
, "\t:%ld:->%d", (long)ca
[i
].co
-cnfa
->ncolors
,
1574 if (i
== 1 || pos
!= 1)
1582 #endif /* ifdef REG_DEBUG */