]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regc_nfa.c
   3  * This file is #included by regcomp.c. 
   5  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved. 
   7  * Development of this software was funded, in part, by Cray Research Inc., 
   8  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 
   9  * Corporation, none of whom are responsible for the results.  The author 
  12  * Redistribution and use in source and binary forms -- with or without 
  13  * modification -- are permitted for any purpose, provided that 
  14  * redistributions in source form retain this entire copyright notice and 
  15  * indicate the origin and nature of any modifications. 
  17  * I'd appreciate being given credit for this package in the documentation 
  18  * of software which uses it, but that is not a requirement. 
  20  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 
  21  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
  22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
  23  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  26  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
  28  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
  29  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  33  * One or two things that technically ought to be in here 
  34  * are actually in color.c, thanks to some incestuous relationships in 
  38 #define NISERR()        VISERR(nfa->v) 
  39 #define NERR(e)         VERR(nfa->v, (e)) 
  43  - newnfa - set up an NFA 
  44  ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); 
  46 static struct nfa 
*             /* the NFA, or NULL */ 
  50 struct nfa 
*parent
;             /* NULL if primary NFA */ 
  54         nfa 
= (struct nfa 
*)MALLOC(sizeof(struct nfa
)); 
  64         nfa
->bos
[0] = nfa
->bos
[1] = COLORLESS
; 
  65         nfa
->eos
[0] = nfa
->eos
[1] = COLORLESS
; 
  66         nfa
->post 
= newfstate(nfa
, '@');        /* number 0 */ 
  67         nfa
->pre 
= newfstate(nfa
, '>');         /* number 1 */ 
  70         nfa
->init 
= newstate(nfa
);              /* may become invalid later */ 
  71         nfa
->final 
= newstate(nfa
); 
  76         rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->pre
, nfa
->init
); 
  77         newarc(nfa
, '^', 1, nfa
->pre
, nfa
->init
); 
  78         newarc(nfa
, '^', 0, nfa
->pre
, nfa
->init
); 
  79         rainbow(nfa
, nfa
->cm
, PLAIN
, COLORLESS
, nfa
->final
, nfa
->post
); 
  80         newarc(nfa
, '$', 1, nfa
->final
, nfa
->post
); 
  81         newarc(nfa
, '$', 0, nfa
->final
, nfa
->post
); 
  91  - freenfa - free an entire NFA 
  92  ^ static VOID freenfa(struct nfa *); 
 100         while ((s 
= nfa
->states
) != NULL
) { 
 101                 s
->nins 
= s
->nouts 
= 0;         /* don't worry about arcs */ 
 104         while ((s 
= nfa
->free
) != NULL
) { 
 106                 destroystate(nfa
, s
); 
 117  - newstate - allocate an NFA state, with zero flag value 
 118  ^ static struct state *newstate(struct nfa *); 
 120 static struct state 
*           /* NULL on error */ 
 126         if (nfa
->free 
!= NULL
) { 
 130                 s 
= (struct state 
*)MALLOC(sizeof(struct state
)); 
 140         assert(nfa
->nstates 
>= 0); 
 141         s
->no 
= nfa
->nstates
++; 
 143         if (nfa
->states 
== NULL
) 
 151         if (nfa
->slast 
!= NULL
) { 
 152                 assert(nfa
->slast
->next 
== NULL
); 
 153                 nfa
->slast
->next 
= s
; 
 155         s
->prev 
= nfa
->slast
; 
 161  - newfstate - allocate an NFA state with a specified flag value 
 162  ^ static struct state *newfstate(struct nfa *, int flag); 
 164 static struct state 
*           /* NULL on error */ 
 173                 s
->flag 
= (char)flag
; 
 178  - dropstate - delete a state's inarcs and outarcs and free it 
 179  ^ static VOID dropstate(struct nfa *, struct state *); 
 188         while ((a 
= s
->ins
) != NULL
) 
 190         while ((a 
= s
->outs
) != NULL
) 
 196  - freestate - free a state, which has no in-arcs or out-arcs 
 197  ^ static VOID freestate(struct nfa *, struct state *); 
 205         assert(s
->nins 
== 0 && s
->nouts 
== 0); 
 210                 s
->next
->prev 
= s
->prev
; 
 212                 assert(s 
== nfa
->slast
); 
 213                 nfa
->slast 
= s
->prev
; 
 216                 s
->prev
->next 
= s
->next
; 
 218                 assert(s 
== nfa
->states
); 
 219                 nfa
->states 
= s
->next
; 
 222         s
->next 
= nfa
->free
;    /* don't delete it, put it on the free list */ 
 227  - destroystate - really get rid of an already-freed state 
 228  ^ static VOID destroystate(struct nfa *, struct state *); 
 236         struct arcbatch 
*abnext
; 
 238         assert(s
->no 
== FREESTATE
); 
 239         for (ab 
= s
->oas
.next
; ab 
!= NULL
; ab 
= abnext
) { 
 250  - newarc - set up a new arc within an NFA 
 251  ^ static VOID newarc(struct nfa *, int, pcolor, struct state *,  
 255 newarc(nfa
, t
, co
, from
, to
) 
 264         assert(from 
!= NULL 
&& to 
!= NULL
); 
 266         /* check for duplicates */ 
 267         for (a 
= from
->outs
; a 
!= NULL
; a 
= a
->outchain
) 
 268                 if (a
->to 
== to 
&& a
->co 
== co 
&& a
->type 
== t
) 
 271         a 
= allocarc(nfa
, from
); 
 282          * Put the new arc on the beginning, not the end, of the chains. 
 283          * Not only is this easier, it has the very useful side effect that  
 284          * deleting the most-recently-added arc is the cheapest case rather 
 285          * than the most expensive one. 
 287         a
->inchain 
= to
->ins
; 
 289         a
->outchain 
= from
->outs
; 
 295         if (COLORED(a
) && nfa
->parent 
== NULL
) 
 296                 colorchain(nfa
->cm
, a
); 
 302  - allocarc - allocate a new out-arc within a state 
 303  ^ static struct arc *allocarc(struct nfa *, struct state *); 
 305 static struct arc 
*             /* NULL for failure */ 
 311         struct arcbatch 
*new; 
 315         if (s
->free 
== NULL 
&& s
->noas 
< ABSIZE
) { 
 316                 a 
= &s
->oas
.a
[s
->noas
]; 
 321         /* if none at hand, get more */ 
 322         if (s
->free 
== NULL
) { 
 323                 new = (struct arcbatch 
*)MALLOC(sizeof(struct arcbatch
)); 
 328                 new->next 
= s
->oas
.next
; 
 331                 for (i 
= 0; i 
< ABSIZE
; i
++) { 
 333                         new->a
[i
].freechain 
= &new->a
[i
+1]; 
 335                 new->a
[ABSIZE
-1].freechain 
= NULL
; 
 336                 s
->free 
= &new->a
[0]; 
 338         assert(s
->free 
!= NULL
); 
 341         s
->free 
= a
->freechain
; 
 346  - freearc - free an arc 
 347  ^ static VOID freearc(struct nfa *, struct arc *); 
 354         struct state 
*from 
= victim
->from
; 
 355         struct state 
*to 
= victim
->to
; 
 358         assert(victim
->type 
!= 0); 
 360         /* take it off color chain if necessary */ 
 361         if (COLORED(victim
) && nfa
->parent 
== NULL
) 
 362                 uncolorchain(nfa
->cm
, victim
); 
 364         /* take it off source's out-chain */ 
 365         assert(from 
!= NULL
); 
 366         assert(from
->outs 
!= NULL
); 
 368         if (a 
== victim
)                /* simple case:  first in chain */ 
 369                 from
->outs 
= victim
->outchain
; 
 371                 for (; a 
!= NULL 
&& a
->outchain 
!= victim
; a 
= a
->outchain
) 
 374                 a
->outchain 
= victim
->outchain
; 
 378         /* take it off target's in-chain */ 
 380         assert(to
->ins 
!= NULL
); 
 382         if (a 
== victim
)                /* simple case:  first in chain */ 
 383                 to
->ins 
= victim
->inchain
; 
 385                 for (; a 
!= NULL 
&& a
->inchain 
!= victim
; a 
= a
->inchain
) 
 388                 a
->inchain 
= victim
->inchain
; 
 392         /* clean up and place on free list */ 
 394         victim
->from 
= NULL
;            /* precautions... */ 
 396         victim
->inchain 
= NULL
; 
 397         victim
->outchain 
= NULL
; 
 398         victim
->freechain 
= from
->free
; 
 403  - findarc - find arc, if any, from given source with given type and color 
 404  * If there is more than one such arc, the result is random. 
 405  ^ static struct arc *findarc(struct state *, int, pcolor); 
 415         for (a 
= s
->outs
; a 
!= NULL
; a 
= a
->outchain
) 
 416                 if (a
->type 
== type 
&& a
->co 
== co
) 
 422  - cparc - allocate a new arc within an NFA, copying details from old one 
 423  ^ static VOID cparc(struct nfa *, struct arc *, struct state *,  
 427 cparc(nfa
, oa
, from
, to
) 
 433         newarc(nfa
, oa
->type
, oa
->co
, from
, to
); 
 437  - moveins - move all in arcs of a state to another state 
 438  * You might think this could be done better by just updating the 
 439  * existing arcs, and you would be right if it weren't for the desire 
 440  * for duplicate suppression, which makes it easier to just make new 
 441  * ones to exploit the suppression built into newarc. 
 442  ^ static VOID moveins(struct nfa *, struct state *, struct state *); 
 445 moveins(nfa
, old
, new) 
 454         while ((a 
= old
->ins
) != NULL
) { 
 455                 cparc(nfa
, a
, a
->from
, new); 
 458         assert(old
->nins 
== 0); 
 459         assert(old
->ins 
== NULL
); 
 463  - copyins - copy all in arcs of a state to another state 
 464  ^ static VOID copyins(struct nfa *, struct state *, struct state *); 
 467 copyins(nfa
, old
, new) 
 476         for (a 
= old
->ins
; a 
!= NULL
; a 
= a
->inchain
) 
 477                 cparc(nfa
, a
, a
->from
, new); 
 481  - moveouts - move all out arcs of a state to another state 
 482  ^ static VOID moveouts(struct nfa *, struct state *, struct state *); 
 485 moveouts(nfa
, old
, new) 
 494         while ((a 
= old
->outs
) != NULL
) { 
 495                 cparc(nfa
, a
, new, a
->to
); 
 501  - copyouts - copy all out arcs of a state to another state 
 502  ^ static VOID copyouts(struct nfa *, struct state *, struct state *); 
 505 copyouts(nfa
, old
, new) 
 514         for (a 
= old
->outs
; a 
!= NULL
; a 
= a
->outchain
) 
 515                 cparc(nfa
, a
, new, a
->to
); 
 519  - cloneouts - copy out arcs of a state to another state pair, modifying type 
 520  ^ static VOID cloneouts(struct nfa *, struct state *, struct state *, 
 521  ^      struct state *, int); 
 524 cloneouts(nfa
, old
, from
, to
, type
) 
 535         for (a 
= old
->outs
; a 
!= NULL
; a 
= a
->outchain
) 
 536                 newarc(nfa
, type
, a
->co
, from
, to
); 
 540  - delsub - delete a sub-NFA, updating subre pointers if necessary 
 541  * This uses a recursive traversal of the sub-NFA, marking already-seen 
 542  * states using their tmp pointer. 
 543  ^ static VOID delsub(struct nfa *, struct state *, struct state *); 
 548 struct state 
*lp
;       /* the sub-NFA goes from here... */ 
 549 struct state 
*rp
;       /* ...to here, *not* inclusive */ 
 553         rp
->tmp 
= rp
;                   /* mark end */ 
 555         deltraverse(nfa
, lp
, lp
); 
 556         assert(lp
->nouts 
== 0 && rp
->nins 
== 0);        /* did the job */ 
 557         assert(lp
->no 
!= FREESTATE 
&& rp
->no 
!= FREESTATE
);     /* no more */ 
 559         rp
->tmp 
= NULL
;                 /* unmark end */ 
 560         lp
->tmp 
= NULL
;                 /* and begin, marked by deltraverse */ 
 564  - deltraverse - the recursive heart of delsub 
 565  * This routine's basic job is to destroy all out-arcs of the state. 
 566  ^ static VOID deltraverse(struct nfa *, struct state *, struct state *); 
 569 deltraverse(nfa
, leftend
, s
) 
 571 struct state 
*leftend
; 
 578                 return;                 /* nothing to do */ 
 580                 return;                 /* already in progress */ 
 582         s
->tmp 
= s
;                     /* mark as in progress */ 
 584         while ((a 
= s
->outs
) != NULL
) { 
 586                 deltraverse(nfa
, leftend
, to
); 
 587                 assert(to
->nouts 
== 0 || to
->tmp 
!= NULL
); 
 589                 if (to
->nins 
== 0 && to
->tmp 
== NULL
) { 
 590                         assert(to
->nouts 
== 0); 
 595         assert(s
->no 
!= FREESTATE
);     /* we're still here */ 
 596         assert(s 
== leftend 
|| s
->nins 
!= 0);   /* and still reachable */ 
 597         assert(s
->nouts 
== 0);          /* but have no outarcs */ 
 599         s
->tmp 
= NULL
;                  /* we're done here */ 
 603  - dupnfa - duplicate sub-NFA 
 604  * Another recursive traversal, this time using tmp to point to duplicates 
 605  * as well as mark already-seen states.  (You knew there was a reason why 
 606  * it's a state pointer, didn't you? :-)) 
 607  ^ static VOID dupnfa(struct nfa *, struct state *, struct state *,  
 608  ^      struct state *, struct state *); 
 611 dupnfa(nfa
, start
, stop
, from
, to
) 
 613 struct state 
*start
;            /* duplicate of subNFA starting here */ 
 614 struct state 
*stop
;             /* and stopping here */ 
 615 struct state 
*from
;             /* stringing duplicate from here */ 
 616 struct state 
*to
;               /* to here */ 
 619                 newarc(nfa
, EMPTY
, 0, from
, to
); 
 624         duptraverse(nfa
, start
, from
); 
 625         /* done, except for clearing out the tmp pointers */ 
 628         cleartraverse(nfa
, start
); 
 632  - duptraverse - recursive heart of dupnfa 
 633  ^ static VOID duptraverse(struct nfa *, struct state *, struct state *); 
 636 duptraverse(nfa
, s
, stmp
) 
 639 struct state 
*stmp
;             /* s's duplicate, or NULL */ 
 644                 return;         /* already done */ 
 646         s
->tmp 
= (stmp 
== NULL
) ? newstate(nfa
) : stmp
; 
 647         if (s
->tmp 
== NULL
) { 
 652         for (a 
= s
->outs
; a 
!= NULL 
&& !NISERR(); a 
= a
->outchain
) { 
 653                 duptraverse(nfa
, a
->to
, (struct state 
*)NULL
); 
 654                 assert(a
->to
->tmp 
!= NULL
); 
 655                 cparc(nfa
, a
, s
->tmp
, a
->to
->tmp
); 
 660  - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set 
 661  ^ static VOID cleartraverse(struct nfa *, struct state *); 
 664 cleartraverse(nfa
, s
) 
 674         for (a 
= s
->outs
; a 
!= NULL
; a 
= a
->outchain
) 
 675                 cleartraverse(nfa
, a
->to
); 
 679  - specialcolors - fill in special colors for an NFA 
 680  ^ static VOID specialcolors(struct nfa *); 
 686         /* false colors for BOS, BOL, EOS, EOL */ 
 687         if (nfa
->parent 
== NULL
) { 
 688                 nfa
->bos
[0] = pseudocolor(nfa
->cm
); 
 689                 nfa
->bos
[1] = pseudocolor(nfa
->cm
); 
 690                 nfa
->eos
[0] = pseudocolor(nfa
->cm
); 
 691                 nfa
->eos
[1] = pseudocolor(nfa
->cm
); 
 693                 assert(nfa
->parent
->bos
[0] != COLORLESS
); 
 694                 nfa
->bos
[0] = nfa
->parent
->bos
[0]; 
 695                 assert(nfa
->parent
->bos
[1] != COLORLESS
); 
 696                 nfa
->bos
[1] = nfa
->parent
->bos
[1]; 
 697                 assert(nfa
->parent
->eos
[0] != COLORLESS
); 
 698                 nfa
->eos
[0] = nfa
->parent
->eos
[0]; 
 699                 assert(nfa
->parent
->eos
[1] != COLORLESS
); 
 700                 nfa
->eos
[1] = nfa
->parent
->eos
[1]; 
 705  - optimize - optimize an NFA 
 706  ^ static long optimize(struct nfa *, FILE *); 
 708 static long                     /* re_info bits */ 
 711 FILE *f
;                        /* for debug output; NULL none */ 
 713         int verbose 
= (f 
!= NULL
) ? 1 : 0; 
 716                 fprintf(f
, "\ninitial cleanup:\n"); 
 717         cleanup(nfa
);           /* may simplify situation */ 
 721                 fprintf(f
, "\nempties:\n"); 
 722         fixempties(nfa
, f
);     /* get rid of EMPTY arcs */ 
 724                 fprintf(f
, "\nconstraints:\n"); 
 725         pullback(nfa
, f
);       /* pull back constraints backward */ 
 726         pushfwd(nfa
, f
);        /* push fwd constraints forward */ 
 728                 fprintf(f
, "\nfinal cleanup:\n"); 
 729         cleanup(nfa
);           /* final tidying */ 
 730         return analyze(nfa
);    /* and analysis */ 
 734  - pullback - pull back constraints backward to (with luck) eliminate them 
 735  ^ static VOID pullback(struct nfa *, FILE *); 
 740 FILE *f
;                        /* for debug output; NULL none */ 
 748         /* find and pull until there are no more */ 
 751                 for (s 
= nfa
->states
; s 
!= NULL 
&& !NISERR(); s 
= nexts
) { 
 753                         for (a 
= s
->outs
; a 
!= NULL 
&& !NISERR(); a 
= nexta
) { 
 755                                 if (a
->type 
== '^' || a
->type 
== BEHIND
) 
 758                                 assert(nexta 
== NULL 
|| s
->no 
!= FREESTATE
); 
 761                 if (progress 
&& f 
!= NULL
) 
 763         } while (progress 
&& !NISERR()); 
 767         for (a 
= nfa
->pre
->outs
; a 
!= NULL
; a 
= nexta
) { 
 769                 if (a
->type 
== '^') { 
 770                         assert(a
->co 
== 0 || a
->co 
== 1); 
 771                         newarc(nfa
, PLAIN
, nfa
->bos
[a
->co
], a
->from
, a
->to
); 
 778  - pull - pull a back constraint backward past its source state 
 779  * A significant property of this function is that it deletes at most 
 780  * one state -- the constraint's from state -- and only if the constraint 
 781  * was that state's last outarc. 
 782  ^ static int pull(struct nfa *, struct arc *); 
 784 static int                      /* 0 couldn't, 1 could */ 
 789         struct state 
*from 
= con
->from
; 
 790         struct state 
*to 
= con
->to
; 
 795         if (from 
== to
) {       /* circular constraint is pointless */ 
 799         if (from
->flag
)         /* can't pull back beyond start */ 
 801         if (from
->nins 
== 0) {  /* unreachable */ 
 806         /* first, clone from state if necessary to avoid other outarcs */ 
 807         if (from
->nouts 
> 1) { 
 811                 assert(to 
!= from
);             /* con is not an inarc */ 
 812                 copyins(nfa
, from
, s
);          /* duplicate inarcs */ 
 813                 cparc(nfa
, con
, s
, to
);         /* move constraint arc */ 
 818         assert(from
->nouts 
== 1); 
 820         /* propagate the constraint into the from state's inarcs */ 
 821         for (a 
= from
->ins
; a 
!= NULL
; a 
= nexta
) { 
 823                 switch (combine(con
, a
)) { 
 824                 case INCOMPATIBLE
:      /* destroy the arc */ 
 827                 case SATISFIED
:         /* no action needed */ 
 829                 case COMPATIBLE
:        /* swap the two arcs, more or less */ 
 833                         cparc(nfa
, a
, s
, to
);           /* anticipate move */ 
 834                         cparc(nfa
, con
, a
->from
, s
); 
 845         /* remaining inarcs, if any, incorporate the constraint */ 
 846         moveins(nfa
, from
, to
); 
 847         dropstate(nfa
, from
);           /* will free the constraint */ 
 852  - pushfwd - push forward constraints forward to (with luck) eliminate them 
 853  ^ static VOID pushfwd(struct nfa *, FILE *); 
 858 FILE *f
;                        /* for debug output; NULL none */ 
 866         /* find and push until there are no more */ 
 869                 for (s 
= nfa
->states
; s 
!= NULL 
&& !NISERR(); s 
= nexts
) { 
 871                         for (a 
= s
->ins
; a 
!= NULL 
&& !NISERR(); a 
= nexta
) { 
 873                                 if (a
->type 
== '$' || a
->type 
== AHEAD
) 
 876                                 assert(nexta 
== NULL 
|| s
->no 
!= FREESTATE
); 
 879                 if (progress 
&& f 
!= NULL
) 
 881         } while (progress 
&& !NISERR()); 
 885         for (a 
= nfa
->post
->ins
; a 
!= NULL
; a 
= nexta
) { 
 887                 if (a
->type 
== '$') { 
 888                         assert(a
->co 
== 0 || a
->co 
== 1); 
 889                         newarc(nfa
, PLAIN
, nfa
->eos
[a
->co
], a
->from
, a
->to
); 
 896  - push - push a forward constraint forward past its destination state 
 897  * A significant property of this function is that it deletes at most 
 898  * one state -- the constraint's to state -- and only if the constraint 
 899  * was that state's last inarc. 
 900  ^ static int push(struct nfa *, struct arc *); 
 902 static int                      /* 0 couldn't, 1 could */ 
 907         struct state 
*from 
= con
->from
; 
 908         struct state 
*to 
= con
->to
; 
 913         if (to 
== from
) {       /* circular constraint is pointless */ 
 917         if (to
->flag
)           /* can't push forward beyond end */ 
 919         if (to
->nouts 
== 0) {   /* dead end */ 
 924         /* first, clone to state if necessary to avoid other inarcs */ 
 929                 copyouts(nfa
, to
, s
);           /* duplicate outarcs */ 
 930                 cparc(nfa
, con
, from
, s
);       /* move constraint */ 
 935         assert(to
->nins 
== 1); 
 937         /* propagate the constraint into the to state's outarcs */ 
 938         for (a 
= to
->outs
; a 
!= NULL
; a 
= nexta
) { 
 940                 switch (combine(con
, a
)) { 
 941                 case INCOMPATIBLE
:      /* destroy the arc */ 
 944                 case SATISFIED
:         /* no action needed */ 
 946                 case COMPATIBLE
:        /* swap the two arcs, more or less */ 
 950                         cparc(nfa
, con
, s
, a
->to
);      /* anticipate move */ 
 951                         cparc(nfa
, a
, from
, s
); 
 962         /* remaining outarcs, if any, incorporate the constraint */ 
 963         moveouts(nfa
, to
, from
); 
 964         dropstate(nfa
, to
);             /* will free the constraint */ 
 969  - combine - constraint lands on an arc, what happens? 
 970  ^ #def INCOMPATIBLE    1       // destroys arc 
 971  ^ #def SATISFIED       2       // constraint satisfied 
 972  ^ #def COMPATIBLE      3       // compatible but not satisfied yet 
 973  ^ static int combine(struct arc *, struct arc *); 
 976 /* FIXME Required for CW 8 on Mac since it's not in limits.h */ 
 978 #define __CHAR_BIT__ 8 
 987 #       define  CA(ct,at)       (((ct)<<CHAR_BIT) | (at)) 
 989         switch (CA(con
->type
, a
->type
)) { 
 990         case CA('^', PLAIN
):            /* newlines are handled separately */ 
 994         case CA(AHEAD
, PLAIN
):          /* color constraints meet colors */ 
 995         case CA(BEHIND
, PLAIN
): 
 996                 if (con
->co 
== a
->co
) 
1000         case CA('^', '^'):              /* collision, similar constraints */ 
1002         case CA(AHEAD
, AHEAD
): 
1003         case CA(BEHIND
, BEHIND
): 
1004                 if (con
->co 
== a
->co
)           /* true duplication */ 
1006                 return INCOMPATIBLE
; 
1008         case CA('^', BEHIND
):           /* collision, dissimilar constraints */ 
1009         case CA(BEHIND
, '^'): 
1010         case CA('$', AHEAD
): 
1011         case CA(AHEAD
, '$'): 
1012                 return INCOMPATIBLE
; 
1014         case CA('^', '$'):              /* constraints passing each other */ 
1015         case CA('^', AHEAD
): 
1016         case CA(BEHIND
, '$'): 
1017         case CA(BEHIND
, AHEAD
): 
1019         case CA('$', BEHIND
): 
1020         case CA(AHEAD
, '^'): 
1021         case CA(AHEAD
, BEHIND
): 
1022         case CA('^', LACON
): 
1023         case CA(BEHIND
, LACON
): 
1024         case CA('$', LACON
): 
1025         case CA(AHEAD
, LACON
): 
1030         return INCOMPATIBLE
;            /* for benefit of blind compilers */ 
1034  - fixempties - get rid of EMPTY arcs 
1035  ^ static VOID fixempties(struct nfa *, FILE *); 
1040 FILE *f
;                        /* for debug output; NULL none */ 
1043         struct state 
*nexts
; 
1048         /* find and eliminate empties until there are no more */ 
1051                 for (s 
= nfa
->states
; s 
!= NULL 
&& !NISERR(); s 
= nexts
) { 
1053                         for (a 
= s
->outs
; a 
!= NULL 
&& !NISERR(); a 
= nexta
) { 
1054                                 nexta 
= a
->outchain
; 
1055                                 if (a
->type 
== EMPTY 
&& unempty(nfa
, a
)) 
1057                                 assert(nexta 
== NULL 
|| s
->no 
!= FREESTATE
); 
1060                 if (progress 
&& f 
!= NULL
) 
1062         } while (progress 
&& !NISERR()); 
1066  - unempty - optimize out an EMPTY arc, if possible 
1067  * Actually, as it stands this function always succeeds, but the return 
1068  * value is kept with an eye on possible future changes. 
1069  ^ static int unempty(struct nfa *, struct arc *); 
1071 static int                      /* 0 couldn't, 1 could */ 
1076         struct state 
*from 
= a
->from
; 
1077         struct state 
*to 
= a
->to
; 
1078         int usefrom
;            /* work on from, as opposed to to? */ 
1080         assert(a
->type 
== EMPTY
); 
1081         assert(from 
!= nfa
->pre 
&& to 
!= nfa
->post
); 
1083         if (from 
== to
) {               /* vacuous loop */ 
1088         /* decide which end to work on */ 
1089         usefrom 
= 1;                    /* default:  attack from */ 
1090         if (from
->nouts 
> to
->nins
) 
1092         else if (from
->nouts 
== to
->nins
) { 
1093                 /* decide on secondary issue:  move/copy fewest arcs */ 
1094                 if (from
->nins 
> to
->nouts
) 
1100                 if (from
->nouts 
== 0) { 
1101                         /* was the state's only outarc */ 
1102                         moveins(nfa
, from
, to
); 
1103                         freestate(nfa
, from
); 
1105                         copyins(nfa
, from
, to
); 
1107                 if (to
->nins 
== 0) { 
1108                         /* was the state's only inarc */ 
1109                         moveouts(nfa
, to
, from
); 
1112                         copyouts(nfa
, to
, from
); 
1119  - cleanup - clean up NFA after optimizations 
1120  ^ static VOID cleanup(struct nfa *); 
1127         struct state 
*nexts
; 
1130         /* clear out unreachable or dead-end states */ 
1131         /* use pre to mark reachable, then post to mark can-reach-post */ 
1132         markreachable(nfa
, nfa
->pre
, (struct state 
*)NULL
, nfa
->pre
); 
1133         markcanreach(nfa
, nfa
->post
, nfa
->pre
, nfa
->post
); 
1134         for (s 
= nfa
->states
; s 
!= NULL
; s 
= nexts
) { 
1136                 if (s
->tmp 
!= nfa
->post 
&& !s
->flag
) 
1139         assert(nfa
->post
->nins 
== 0 || nfa
->post
->tmp 
== nfa
->post
); 
1140         cleartraverse(nfa
, nfa
->pre
); 
1141         assert(nfa
->post
->nins 
== 0 || nfa
->post
->tmp 
== NULL
); 
1142         /* the nins==0 (final unreachable) case will be caught later */ 
1144         /* renumber surviving states */ 
1146         for (s 
= nfa
->states
; s 
!= NULL
; s 
= s
->next
) 
1152  - markreachable - recursive marking of reachable states 
1153  ^ static VOID markreachable(struct nfa *, struct state *, struct state *, 
1157 markreachable(nfa
, s
, okay
, mark
) 
1160 struct state 
*okay
;             /* consider only states with this mark */ 
1161 struct state 
*mark
;             /* the value to mark with */ 
1169         for (a 
= s
->outs
; a 
!= NULL
; a 
= a
->outchain
) 
1170                 markreachable(nfa
, a
->to
, okay
, mark
); 
1174  - markcanreach - recursive marking of states which can reach here 
1175  ^ static VOID markcanreach(struct nfa *, struct state *, struct state *, 
1179 markcanreach(nfa
, s
, okay
, mark
) 
1182 struct state 
*okay
;             /* consider only states with this mark */ 
1183 struct state 
*mark
;             /* the value to mark with */ 
1191         for (a 
= s
->ins
; a 
!= NULL
; a 
= a
->inchain
) 
1192                 markcanreach(nfa
, a
->from
, okay
, mark
); 
1196  - analyze - ascertain potentially-useful facts about an optimized NFA 
1197  ^ static long analyze(struct nfa *); 
1199 static long                     /* re_info bits to be ORed in */ 
1206         if (nfa
->pre
->outs 
== NULL
) 
1207                 return REG_UIMPOSSIBLE
; 
1208         for (a 
= nfa
->pre
->outs
; a 
!= NULL
; a 
= a
->outchain
) 
1209                 for (aa 
= a
->to
->outs
; aa 
!= NULL
; aa 
= aa
->outchain
) 
1210                         if (aa
->to 
== nfa
->post
) 
1211                                 return REG_UEMPTYMATCH
; 
1216  - compact - compact an NFA 
1217  ^ static VOID compact(struct nfa *, struct cnfa *); 
1235         for (s 
= nfa
->states
; s 
!= NULL
; s 
= s
->next
) { 
1237                 narcs 
+= 1 + s
->nouts 
+ 1; 
1238                 /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ 
1241         cnfa
->states 
= (struct carc 
**)MALLOC(nstates 
* sizeof(struct carc 
*)); 
1242         cnfa
->arcs 
= (struct carc 
*)MALLOC(narcs 
* sizeof(struct carc
)); 
1243         if (cnfa
->states 
== NULL 
|| cnfa
->arcs 
== NULL
) { 
1244                 if (cnfa
->states 
!= NULL
) 
1246                 if (cnfa
->arcs 
!= NULL
) 
1251         cnfa
->nstates 
= nstates
; 
1252         cnfa
->pre 
= nfa
->pre
->no
; 
1253         cnfa
->post 
= nfa
->post
->no
; 
1254         cnfa
->bos
[0] = nfa
->bos
[0]; 
1255         cnfa
->bos
[1] = nfa
->bos
[1]; 
1256         cnfa
->eos
[0] = nfa
->eos
[0]; 
1257         cnfa
->eos
[1] = nfa
->eos
[1]; 
1258         cnfa
->ncolors 
= maxcolor(nfa
->cm
) + 1; 
1262         for (s 
= nfa
->states
; s 
!= NULL
; s 
= s
->next
) { 
1263                 assert((size_t)s
->no 
< nstates
); 
1264                 cnfa
->states
[s
->no
] = ca
; 
1265                 ca
->co 
= 0;             /* clear and skip flags "arc" */ 
1268                 for (a 
= s
->outs
; a 
!= NULL
; a 
= a
->outchain
) 
1276                                 assert(s
->no 
!= cnfa
->pre
); 
1277                                 ca
->co 
= (color
)(cnfa
->ncolors 
+ a
->co
); 
1280                                 cnfa
->flags 
|= HASLACONS
; 
1286                 carcsort(first
, ca
-1); 
1291         assert(ca 
== &cnfa
->arcs
[narcs
]); 
1292         assert(cnfa
->nstates 
!= 0); 
1294         /* mark no-progress states */ 
1295         for (a 
= nfa
->pre
->outs
; a 
!= NULL
; a 
= a
->outchain
) 
1296                 cnfa
->states
[a
->to
->no
]->co 
= 1; 
1297         cnfa
->states
[nfa
->pre
->no
]->co 
= 1; 
1301  - carcsort - sort compacted-NFA arcs by color 
1302  * Really dumb algorithm, but if the list is long enough for that to matter, 
1303  * you're in real trouble anyway. 
1304  ^ static VOID carcsort(struct carc *, struct carc *); 
1307 carcsort(first
, last
) 
1315         if (last 
- first 
<= 1) 
1318         for (p 
= first
; p 
<= last
; p
++) 
1319                 for (q 
= p
; q 
<= last
; q
++) 
1320                         if (p
->co 
> q
->co 
|| 
1321                                         (p
->co 
== q
->co 
&& p
->to 
> q
->to
)) { 
1330  - freecnfa - free a compacted NFA 
1331  ^ static VOID freecnfa(struct cnfa *); 
1337         assert(cnfa
->nstates 
!= 0);     /* not empty already */ 
1344  - dumpnfa - dump an NFA in human-readable form 
1345  ^ static VOID dumpnfa(struct nfa *, FILE *); 
1355         fprintf(f
, "pre %d, post %d", nfa
->pre
->no
, nfa
->post
->no
); 
1356         if (nfa
->bos
[0] != COLORLESS
) 
1357                 fprintf(f
, ", bos [%ld]", (long)nfa
->bos
[0]); 
1358         if (nfa
->bos
[1] != COLORLESS
) 
1359                 fprintf(f
, ", bol [%ld]", (long)nfa
->bos
[1]); 
1360         if (nfa
->eos
[0] != COLORLESS
) 
1361                 fprintf(f
, ", eos [%ld]", (long)nfa
->eos
[0]); 
1362         if (nfa
->eos
[1] != COLORLESS
) 
1363                 fprintf(f
, ", eol [%ld]", (long)nfa
->eos
[1]); 
1365         for (s 
= nfa
->states
; s 
!= NULL
; s 
= s
->next
) 
1367         if (nfa
->parent 
== NULL
) 
1368                 dumpcolors(nfa
->cm
, f
); 
1373 #ifdef REG_DEBUG                /* subordinates of dumpnfa */ 
1379  - dumpstate - dump an NFA state in human-readable form 
1380  ^ static VOID dumpstate(struct state *, FILE *); 
1389         fprintf(f
, "%d%s%c", s
->no
, (s
->tmp 
!= NULL
) ? "T" : "", 
1390                                         (s
->flag
) ? s
->flag 
: '.'); 
1391         if (s
->prev 
!= NULL 
&& s
->prev
->next 
!= s
) 
1392                 fprintf(f
, "\tstate chain bad\n"); 
1394                 fprintf(f
, "\tno out arcs\n"); 
1398         for (a 
= s
->ins
; a 
!= NULL
; a 
= a
->inchain
) { 
1400                         fprintf(f
, "\tlink from %d to %d on %d's in-chain\n", 
1401                                         a
->from
->no
, a
->to
->no
, s
->no
); 
1406  - dumparcs - dump out-arcs in human-readable form 
1407  ^ static VOID dumparcs(struct state *, FILE *); 
1416         assert(s
->nouts 
> 0); 
1417         /* printing arcs in reverse order is usually clearer */ 
1418         pos 
= dumprarcs(s
->outs
, s
, f
, 1); 
1424  - dumprarcs - dump remaining outarcs, recursively, in reverse order 
1425  ^ static int dumprarcs(struct arc *, struct state *, FILE *, int); 
1427 static int                      /* resulting print position */ 
1428 dumprarcs(a
, s
, f
, pos
) 
1432 int pos
;                        /* initial print position */ 
1434         if (a
->outchain 
!= NULL
) 
1435                 pos 
= dumprarcs(a
->outchain
, s
, f
, pos
); 
1446  - dumparc - dump one outarc in readable form, including prefixing tab 
1447  ^ static VOID dumparc(struct arc *, struct state *, FILE *); 
1456         struct arcbatch 
*ab
; 
1461                 fprintf(f
, "[%ld]", (long)a
->co
); 
1464                 fprintf(f
, ">%ld>", (long)a
->co
); 
1467                 fprintf(f
, "<%ld<", (long)a
->co
); 
1470                 fprintf(f
, ":%ld:", (long)a
->co
); 
1474                 fprintf(f
, "%c%d", a
->type
, (int)a
->co
); 
1479                 fprintf(f
, "0x%x/0%lo", a
->type
, (long)a
->co
); 
1483                 fprintf(f
, "?%d?", a
->from
->no
); 
1484         for (ab 
= &a
->from
->oas
; ab 
!= NULL
; ab 
= ab
->next
) { 
1485                 for (aa 
= &ab
->a
[0]; aa 
< &ab
->a
[ABSIZE
]; aa
++) 
1487                                 break;          /* NOTE BREAK OUT */ 
1488                 if (aa 
< &ab
->a
[ABSIZE
])        /* propagate break */ 
1489                                 break;          /* NOTE BREAK OUT */ 
1492                 fprintf(f
, "?!?");      /* not in allocated space */ 
1494         if (a
->to 
== NULL
) { 
1498         fprintf(f
, "%d", a
->to
->no
); 
1499         for (aa 
= a
->to
->ins
; aa 
!= NULL
; aa 
= aa
->inchain
) 
1501                         break;          /* NOTE BREAK OUT */ 
1503                 fprintf(f
, "?!?");      /* missing from in-chain */ 
1509 #endif                          /* ifdef REG_DEBUG */ 
1512  - dumpcnfa - dump a compacted NFA in human-readable form 
1513  ^ static VOID dumpcnfa(struct cnfa *, FILE *); 
1523         fprintf(f
, "pre %d, post %d", cnfa
->pre
, cnfa
->post
); 
1524         if (cnfa
->bos
[0] != COLORLESS
) 
1525                 fprintf(f
, ", bos [%ld]", (long)cnfa
->bos
[0]); 
1526         if (cnfa
->bos
[1] != COLORLESS
) 
1527                 fprintf(f
, ", bol [%ld]", (long)cnfa
->bos
[1]); 
1528         if (cnfa
->eos
[0] != COLORLESS
) 
1529                 fprintf(f
, ", eos [%ld]", (long)cnfa
->eos
[0]); 
1530         if (cnfa
->eos
[1] != COLORLESS
) 
1531                 fprintf(f
, ", eol [%ld]", (long)cnfa
->eos
[1]); 
1532         if (cnfa
->flags
&HASLACONS
) 
1533                 fprintf(f
, ", haslacons"); 
1535         for (st 
= 0; st 
< cnfa
->nstates
; st
++) 
1536                 dumpcstate(st
, cnfa
->states
[st
], cnfa
, f
); 
1541 #ifdef REG_DEBUG                /* subordinates of dumpcnfa */ 
1547  - dumpcstate - dump a compacted-NFA state in human-readable form 
1548  ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); 
1551 dumpcstate(st
, ca
, cnfa
, f
) 
1560         fprintf(f
, "%d%s", st
, (ca
[0].co
) ? ":" : "."); 
1562         for (i 
= 1; ca
[i
].co 
!= COLORLESS
; i
++) { 
1563                 if (ca
[i
].co 
< cnfa
->ncolors
) 
1564                         fprintf(f
, "\t[%ld]->%d", (long)ca
[i
].co
, ca
[i
].to
); 
1566                         fprintf(f
, "\t:%ld:->%d", (long)ca
[i
].co
-cnfa
->ncolors
, 
1574         if (i 
== 1 || pos 
!= 1) 
1582 #endif                          /* ifdef REG_DEBUG */