]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/engine.c
   2  * The matching engine and friends.  This file is #included by regexec.c 
   3  * after suitable #defines of a variety of macros used herein, so that 
   4  * different state representations can be used without duplicating masses 
   9 #define matcher smatcher 
  12 #define dissect sdissect 
  13 #define backref sbackref 
  20 #define matcher lmatcher 
  23 #define dissect ldissect 
  24 #define backref lbackref 
  31 /* another structure passed up and down to avoid zillions of parameters */ 
  35         regmatch_t 
*pmatch
;     /* [nsub+1] (0 element unused) */ 
  36         char *offp
;             /* offsets work from here */ 
  37         char *beginp
;           /* start of string -- virtual NUL precedes */ 
  38         char *endp
;             /* end of string -- virtual NUL here */ 
  39         char *coldp
;            /* can be no match starting before here */ 
  40         char **lastpos
;         /* [nplus+1] */ 
  42         states st
;              /* current states */ 
  43         states fresh
;           /* states for a fresh start */ 
  44         states tmp
;             /* temporary */ 
  45         states empty
;           /* empty set of states */ 
  51 #define SP(t, s, c)     print(m, t, s, c, stdout) 
  52 #define AT(t, p1, p2, s1, s2)   at(m, t, p1, p2, s1, s2) 
  53 #define NOTE(str)       { if (m->eflags®_TRACE) printf("=%s\n", (str)); } 
  55 #define SP(t, s, c)     /* nothing */ 
  56 #define AT(t, p1, p2, s1, s2)   /* nothing */ 
  57 #define NOTE(s) /* nothing */ 
  61  - matcher - the actual matching engine 
  62  == static int matcher(register struct re_guts *g, char *string, \ 
  63  ==     size_t nmatch, regmatch_t pmatch[], int eflags); 
  65 static int                      /* 0 success, REG_NOMATCH failure */ 
  66 matcher(g
, string
, nmatch
, pmatch
, eflags
) 
  67 register struct re_guts 
*g
; 
  76         register struct match 
*m 
= &mv
; 
  78         const register sopno gf 
= g
->firststate
+1;      /* +1 for OEND */ 
  79         const register sopno gl 
= g
->laststate
; 
  83         /* simplify the situation where possible */ 
  84         if (g
->cflags
®_NOSUB
) 
  86         if (eflags
®_STARTEND
) { 
  87                 start 
= string 
+ pmatch
[0].rm_so
; 
  88                 stop 
= string 
+ pmatch
[0].rm_eo
; 
  91                 stop 
= start 
+ strlen(start
); 
  96         /* prescreening; this does wonders for this rather slow code */ 
  97         if (g
->must 
!= NULL
) { 
  98                 for (dp 
= start
; dp 
< stop
; dp
++) 
  99                         if (*dp 
== g
->must
[0] && stop 
- dp 
>= g
->mlen 
&& 
 100                                 memcmp(dp
, g
->must
, (size_t)g
->mlen
) == 0) 
 102                 if (dp 
== stop
)         /* we didn't find g->must */ 
 106         /* match struct setup */ 
 121         /* this loop does only one repetition except for backrefs */ 
 123                 endp 
= fast(m
, start
, stop
, gf
, gl
); 
 124                 if (endp 
== NULL
) {             /* a miss */ 
 128                 if (nmatch 
== 0 && !g
->backrefs
) 
 129                         break;          /* no further info needed */ 
 132                 assert(m
->coldp 
!= NULL
); 
 134                         NOTE("finding start"); 
 135                         endp 
= slow(m
, m
->coldp
, stop
, gf
, gl
); 
 138                         assert(m
->coldp 
< m
->endp
); 
 141                 if (nmatch 
== 1 && !g
->backrefs
) 
 142                         break;          /* no further info needed */ 
 144                 /* oh my, he wants the subexpressions... */ 
 145                 if (m
->pmatch 
== NULL
) 
 146                         m
->pmatch 
= (regmatch_t 
*)malloc((m
->g
->nsub 
+ 1) * 
 148                 if (m
->pmatch 
== NULL
) { 
 152                 for (i 
= 1; i 
<= m
->g
->nsub
; i
++) 
 153                         m
->pmatch
[i
].rm_so 
= m
->pmatch
[i
].rm_eo 
= -1; 
 154                 if (!g
->backrefs 
&& !(m
->eflags
®_BACKR
)) { 
 156                         dp 
= dissect(m
, m
->coldp
, endp
, gf
, gl
); 
 158                         if (g
->nplus 
> 0 && m
->lastpos 
== NULL
) 
 159                                 m
->lastpos 
= (char **)malloc((g
->nplus
+1) * 
 161                         if (g
->nplus 
> 0 && m
->lastpos 
== NULL
) { 
 166                         NOTE("backref dissect"); 
 167                         dp 
= backref(m
, m
->coldp
, endp
, gf
, gl
, (sopno
)0); 
 172                 /* uh-oh... we couldn't find a subexpression-level match */ 
 173                 assert(g
->backrefs
);    /* must be back references doing it */ 
 174                 assert(g
->nplus 
== 0 || m
->lastpos 
!= NULL
); 
 176                         if (dp 
!= NULL 
|| endp 
<= m
->coldp
) 
 179                         endp 
= slow(m
, m
->coldp
, endp
-1, gf
, gl
); 
 182                         /* try it on a shorter possibility */ 
 184                         for (i 
= 1; i 
<= m
->g
->nsub
; i
++) { 
 185                                 assert(m
->pmatch
[i
].rm_so 
== -1); 
 186                                 assert(m
->pmatch
[i
].rm_eo 
== -1); 
 189                         NOTE("backoff dissect"); 
 190                         dp 
= backref(m
, m
->coldp
, endp
, gf
, gl
, (sopno
)0); 
 192                 assert(dp 
== NULL 
|| dp 
== endp
); 
 193                 if (dp 
!= NULL
)         /* found a shorter one */ 
 196                 /* despite initial appearances, there is no match here */ 
 198                 start 
= m
->coldp 
+ 1;   /* recycle starting later */ 
 199                 assert(start 
<= stop
); 
 202         /* fill in the details if requested */ 
 204                 pmatch
[0].rm_so 
= m
->coldp 
- m
->offp
; 
 205                 pmatch
[0].rm_eo 
= endp 
- m
->offp
; 
 208                 assert(m
->pmatch 
!= NULL
); 
 209                 for (i 
= 1; i 
< nmatch
; i
++) 
 211                                 pmatch
[i
] = m
->pmatch
[i
]; 
 213                                 pmatch
[i
].rm_so 
= -1; 
 214                                 pmatch
[i
].rm_eo 
= -1; 
 218         if (m
->pmatch 
!= NULL
) 
 219                 free((char *)m
->pmatch
); 
 220         if (m
->lastpos 
!= NULL
) 
 221                 free((char *)m
->lastpos
); 
 227  - dissect - figure out what matched what, no back references 
 228  == static char *dissect(register struct match *m, char *start, \ 
 229  ==     char *stop, sopno startst, sopno stopst); 
 231 static char *                   /* == stop (success) always */ 
 232 dissect(m
, start
, stop
, startst
, stopst
) 
 233 register struct match 
*m
; 
 240         register sopno ss
;      /* start sop of current subRE */ 
 241         register sopno es
;      /* end sop of current subRE */ 
 242         register char *sp
;      /* start of string matched by it */ 
 243         register char *stp
;     /* string matched by it cannot pass here */ 
 244         register char *rest
;    /* start of rest of string */ 
 245         register char *tail
;    /* string unmatched by rest of RE */ 
 246         register sopno ssub
;    /* start sop of subsubRE */ 
 247         register sopno esub
;    /* end sop of subsubRE */ 
 248         register char *ssp
;     /* start of string matched by subsubRE */ 
 249         register char *sep
;     /* end of string matched by subsubRE */ 
 250         register char *oldssp
;  /* previous ssp */ 
 253         AT("diss", start
, stop
, startst
, stopst
); 
 255         for (ss 
= startst
; ss 
< stopst
; ss 
= es
) { 
 256                 /* identify end of subRE */ 
 258                 switch (OP(m
->g
->strip
[es
])) { 
 261                         es 
+= OPND(m
->g
->strip
[es
]); 
 264                         while (OP(m
->g
->strip
[es
]) != O_CH
) 
 265                                 es 
+= OPND(m
->g
->strip
[es
]); 
 270                 /* figure out what it matched */ 
 271                 switch (OP(m
->g
->strip
[ss
])) { 
 291                 /* cases where length of match is hard to find */ 
 295                                 /* how long could this one be? */ 
 296                                 rest 
= slow(m
, sp
, stp
, ss
, es
); 
 297                                 assert(rest 
!= NULL
);   /* it did match */ 
 298                                 /* could the rest match the rest? */ 
 299                                 tail 
= slow(m
, rest
, stop
, es
, stopst
); 
 302                                 /* no -- try a shorter match for this one */ 
 304                                 assert(stp 
>= sp
);      /* it did work */ 
 308                         /* did innards match? */ 
 309                         if (slow(m
, sp
, rest
, ssub
, esub
) != NULL
) { 
 310                                 dp 
= dissect(m
, sp
, rest
, ssub
, esub
); 
 319                                 /* how long could this one be? */ 
 320                                 rest 
= slow(m
, sp
, stp
, ss
, es
); 
 321                                 assert(rest 
!= NULL
);   /* it did match */ 
 322                                 /* could the rest match the rest? */ 
 323                                 tail 
= slow(m
, rest
, stop
, es
, stopst
); 
 326                                 /* no -- try a shorter match for this one */ 
 328                                 assert(stp 
>= sp
);      /* it did work */ 
 334                         for (;;) {      /* find last match of innards */ 
 335                                 sep 
= slow(m
, ssp
, rest
, ssub
, esub
); 
 336                                 if (sep 
== NULL 
|| sep 
== ssp
) 
 337                                         break;  /* failed or matched null */ 
 338                                 oldssp 
= ssp
;   /* on to next try */ 
 342                                 /* last successful match */ 
 346                         assert(sep 
== rest
);    /* must exhaust substring */ 
 347                         assert(slow(m
, ssp
, sep
, ssub
, esub
) == rest
); 
 348                         dp 
= dissect(m
, ssp
, sep
, ssub
, esub
); 
 355                                 /* how long could this one be? */ 
 356                                 rest 
= slow(m
, sp
, stp
, ss
, es
); 
 357                                 assert(rest 
!= NULL
);   /* it did match */ 
 358                                 /* could the rest match the rest? */ 
 359                                 tail 
= slow(m
, rest
, stop
, es
, stopst
); 
 362                                 /* no -- try a shorter match for this one */ 
 364                                 assert(stp 
>= sp
);      /* it did work */ 
 367                         esub 
= ss 
+ OPND(m
->g
->strip
[ss
]) - 1; 
 368                         assert(OP(m
->g
->strip
[esub
]) == OOR1
); 
 369                         for (;;) {      /* find first matching branch */ 
 370                                 if (slow(m
, sp
, rest
, ssub
, esub
) == rest
) 
 371                                         break;  /* it matched all of it */ 
 372                                 /* that one missed, try next one */ 
 373                                 assert(OP(m
->g
->strip
[esub
]) == OOR1
); 
 375                                 assert(OP(m
->g
->strip
[esub
]) == OOR2
); 
 377                                 esub 
+= OPND(m
->g
->strip
[esub
]); 
 378                                 if (OP(m
->g
->strip
[esub
]) == OOR2
) 
 381                                         assert(OP(m
->g
->strip
[esub
]) == O_CH
); 
 383                         dp 
= dissect(m
, sp
, rest
, ssub
, esub
); 
 395                         i 
= OPND(m
->g
->strip
[ss
]); 
 396                         assert(0 < i 
&& i 
<= m
->g
->nsub
); 
 397                         m
->pmatch
[i
].rm_so 
= sp 
- m
->offp
; 
 400                         i 
= OPND(m
->g
->strip
[ss
]); 
 401                         assert(0 < i 
&& i 
<= m
->g
->nsub
); 
 402                         m
->pmatch
[i
].rm_eo 
= sp 
- m
->offp
; 
 415  - backref - figure out what matched what, figuring in back references 
 416  == static char *backref(register struct match *m, char *start, \ 
 417  ==     char *stop, sopno startst, sopno stopst, sopno lev); 
 419 static char *                   /* == stop (success) or NULL (failure) */ 
 420 backref(m
, start
, stop
, startst
, stopst
, lev
) 
 421 register struct match 
*m
; 
 426 sopno lev
;                      /* PLUS nesting level */ 
 429         register sopno ss
;      /* start sop of current subRE */ 
 430         register char *sp
;      /* start of string matched by it */ 
 431         register sopno ssub
;    /* start sop of subsubRE */ 
 432         register sopno esub
;    /* end sop of subsubRE */ 
 433         register char *ssp
;     /* start of string matched by subsubRE */ 
 438         register regoff_t offsave
; 
 441         AT("back", start
, stop
, startst
, stopst
); 
 444         /* get as far as we can with easy stuff */ 
 446         for (ss 
= startst
; !hard 
&& ss 
< stopst
; ss
++) 
 447                 switch (OP(s 
= m
->g
->strip
[ss
])) { 
 449                         if (sp 
== stop 
|| *sp
++ != (char)OPND(s
)) 
 458                         cs 
= &m
->g
->sets
[OPND(s
)]; 
 459                         if (sp 
== stop 
|| !CHIN(cs
, *sp
++)) 
 463                         if ( (sp 
== m
->beginp 
&& !(m
->eflags
®_NOTBOL
)) || 
 464                                         (sp 
< m
->endp 
&& *(sp
-1) == '\n' && 
 465                                                 (m
->g
->cflags
®_NEWLINE
)) ) 
 471                         if ( (sp 
== m
->endp 
&& !(m
->eflags
®_NOTEOL
)) || 
 472                                         (sp 
< m
->endp 
&& *sp 
== '\n' && 
 473                                                 (m
->g
->cflags
®_NEWLINE
)) ) 
 479                         if (( (sp 
== m
->beginp 
&& !(m
->eflags
®_NOTBOL
)) || 
 480                                         (sp 
< m
->endp 
&& *(sp
-1) == '\n' && 
 481                                                 (m
->g
->cflags
®_NEWLINE
)) || 
 483                                                         !ISWORD(*(sp
-1))) ) && 
 484                                         (sp 
< m
->endp 
&& ISWORD(*sp
)) ) 
 490                         if (( (sp 
== m
->endp 
&& !(m
->eflags
®_NOTEOL
)) || 
 491                                         (sp 
< m
->endp 
&& *sp 
== '\n' && 
 492                                                 (m
->g
->cflags
®_NEWLINE
)) || 
 493                                         (sp 
< m
->endp 
&& !ISWORD(*sp
)) ) && 
 494                                         (sp 
> m
->beginp 
&& ISWORD(*(sp
-1))) ) 
 501                 case OOR1
:      /* matches null but needs to skip */ 
 505                                 assert(OP(s
) == OOR2
); 
 507                         } while (OP(s 
= m
->g
->strip
[ss
]) != O_CH
); 
 508                         /* note that the ss++ gets us past the O_CH */ 
 510                 default:        /* have to make a choice */ 
 514         if (!hard
) {            /* that was it! */ 
 519         ss
--;                   /* adjust for the for's final increment */ 
 522         AT("hard", sp
, stop
, ss
, stopst
); 
 525         case OBACK_
:            /* the vilest depths */ 
 527                 assert(0 < i 
&& i 
<= m
->g
->nsub
); 
 528                 if (m
->pmatch
[i
].rm_eo 
== -1) 
 530                 assert(m
->pmatch
[i
].rm_so 
!= -1); 
 531                 len 
= m
->pmatch
[i
].rm_eo 
- m
->pmatch
[i
].rm_so
; 
 532                 assert(stop 
- m
->beginp 
>= len
); 
 534                         return(NULL
);   /* not enough left to match */ 
 535                 ssp 
= m
->offp 
+ m
->pmatch
[i
].rm_so
; 
 536                 if (memcmp(sp
, ssp
, len
) != 0) 
 538                 while (m
->g
->strip
[ss
] != SOP(O_BACK
, i
)) 
 540                 return(backref(m
, sp
+len
, stop
, ss
+1, stopst
, lev
)); 
 542         case OQUEST_
:           /* to null or not */ 
 543                 dp 
= backref(m
, sp
, stop
, ss
+1, stopst
, lev
); 
 545                         return(dp
);     /* not */ 
 546                 return(backref(m
, sp
, stop
, ss
+OPND(s
)+1, stopst
, lev
)); 
 549                 assert(m
->lastpos 
!= NULL
); 
 550                 assert(lev
+1 <= m
->g
->nplus
); 
 551                 m
->lastpos
[lev
+1] = sp
; 
 552                 return(backref(m
, sp
, stop
, ss
+1, stopst
, lev
+1)); 
 555                 if (sp 
== m
->lastpos
[lev
])      /* last pass matched null */ 
 556                         return(backref(m
, sp
, stop
, ss
+1, stopst
, lev
-1)); 
 557                 /* try another pass */ 
 558                 m
->lastpos
[lev
] = sp
; 
 559                 dp 
= backref(m
, sp
, stop
, ss
-OPND(s
)+1, stopst
, lev
); 
 561                         return(backref(m
, sp
, stop
, ss
+1, stopst
, lev
-1)); 
 565         case OCH_
:              /* find the right one, if any */ 
 567                 esub 
= ss 
+ OPND(s
) - 1; 
 568                 assert(OP(m
->g
->strip
[esub
]) == OOR1
); 
 569                 for (;;) {      /* find first matching branch */ 
 570                         dp 
= backref(m
, sp
, stop
, ssub
, esub
, lev
); 
 573                         /* that one missed, try next one */ 
 574                         if (OP(m
->g
->strip
[esub
]) == O_CH
) 
 575                                 return(NULL
);   /* there is none */ 
 577                         assert(OP(m
->g
->strip
[esub
]) == OOR2
); 
 579                         esub 
+= OPND(m
->g
->strip
[esub
]); 
 580                         if (OP(m
->g
->strip
[esub
]) == OOR2
) 
 583                                 assert(OP(m
->g
->strip
[esub
]) == O_CH
); 
 586         case OLPAREN
:           /* must undo assignment if rest fails */ 
 588                 assert(0 < i 
&& i 
<= m
->g
->nsub
); 
 589                 offsave 
= m
->pmatch
[i
].rm_so
; 
 590                 m
->pmatch
[i
].rm_so 
= sp 
- m
->offp
; 
 591                 dp 
= backref(m
, sp
, stop
, ss
+1, stopst
, lev
); 
 594                 m
->pmatch
[i
].rm_so 
= offsave
; 
 597         case ORPAREN
:           /* must undo assignment if rest fails */ 
 599                 assert(0 < i 
&& i 
<= m
->g
->nsub
); 
 600                 offsave 
= m
->pmatch
[i
].rm_eo
; 
 601                 m
->pmatch
[i
].rm_eo 
= sp 
- m
->offp
; 
 602                 dp 
= backref(m
, sp
, stop
, ss
+1, stopst
, lev
); 
 605                 m
->pmatch
[i
].rm_eo 
= offsave
; 
 616         return((char *)NULL
);   /* dummy */ 
 620  - fast - step through the string at top speed 
 621  == static char *fast(register struct match *m, char *start, \ 
 622  ==     char *stop, sopno startst, sopno stopst); 
 624 static char *                   /* where tentative match ended, or NULL */ 
 625 fast(m
, start
, stop
, startst
, stopst
) 
 626 register struct match 
*m
; 
 632         register states st 
= m
->st
; 
 633         register states fresh 
= m
->fresh
; 
 634         register states tmp 
= m
->tmp
; 
 635         register char *p 
= start
; 
 636         register int c 
= (start 
== m
->beginp
) ? OUT 
: *(start
-1); 
 637         register int lastc
;     /* previous c */ 
 640         register char *coldp
;   /* last p after which no match was underway */ 
 644         st 
= step(m
->g
, startst
, stopst
, st
, NOTHING
, st
); 
 651                 c 
= (p 
== m
->endp
) ? OUT 
: *p
; 
 655                 /* is there an EOL and/or BOL between lastc and c? */ 
 658                 if ( (lastc 
== '\n' && m
->g
->cflags
®_NEWLINE
) || 
 659                                 (lastc 
== OUT 
&& !(m
->eflags
®_NOTBOL
)) ) { 
 663                 if ( (c 
== '\n' && m
->g
->cflags
®_NEWLINE
) || 
 664                                 (c 
== OUT 
&& !(m
->eflags
®_NOTEOL
)) ) { 
 665                         flagch 
= (flagch 
== BOL
) ? BOLEOL 
: EOL
; 
 670                                 st 
= step(m
->g
, startst
, stopst
, st
, flagch
, st
); 
 674                 /* how about a word boundary? */ 
 675                 if ( (flagch 
== BOL 
|| (lastc 
!= OUT 
&& !ISWORD(lastc
))) && 
 676                                         (c 
!= OUT 
&& ISWORD(c
)) ) { 
 679                 if ( (lastc 
!= OUT 
&& ISWORD(lastc
)) && 
 680                                 (flagch 
== EOL 
|| (c 
!= OUT 
&& !ISWORD(c
))) ) { 
 683                 if (flagch 
== BOW 
|| flagch 
== EOW
) { 
 684                         st 
= step(m
->g
, startst
, stopst
, st
, flagch
, st
); 
 689                 if (ISSET(st
, stopst
) || p 
== stop
) 
 690                         break;          /* NOTE BREAK OUT */ 
 692                 /* no, we must deal with this character */ 
 696                 st 
= step(m
->g
, startst
, stopst
, tmp
, c
, st
); 
 698                 assert(EQ(step(m
->g
, startst
, stopst
, st
, NOTHING
, st
), st
)); 
 702         assert(coldp 
!= NULL
); 
 704         if (ISSET(st
, stopst
)) 
 711  - slow - step through the string more deliberately 
 712  == static char *slow(register struct match *m, char *start, \ 
 713  ==     char *stop, sopno startst, sopno stopst); 
 715 static char *                   /* where it ended */ 
 716 slow(m
, start
, stop
, startst
, stopst
) 
 717 register struct match 
*m
; 
 723         register states st 
= m
->st
; 
 724         register states empty 
= m
->empty
; 
 725         register states tmp 
= m
->tmp
; 
 726         register char *p 
= start
; 
 727         register int c 
= (start 
== m
->beginp
) ? OUT 
: *(start
-1); 
 728         register int lastc
;     /* previous c */ 
 731         register char *matchp
;  /* last p at which a match ended */ 
 733         AT("slow", start
, stop
, startst
, stopst
); 
 736         SP("sstart", st
, *p
); 
 737         st 
= step(m
->g
, startst
, stopst
, st
, NOTHING
, st
); 
 742                 c 
= (p 
== m
->endp
) ? OUT 
: *p
; 
 744                 /* is there an EOL and/or BOL between lastc and c? */ 
 747                 if ( (lastc 
== '\n' && m
->g
->cflags
®_NEWLINE
) || 
 748                                 (lastc 
== OUT 
&& !(m
->eflags
®_NOTBOL
)) ) { 
 752                 if ( (c 
== '\n' && m
->g
->cflags
®_NEWLINE
) || 
 753                                 (c 
== OUT 
&& !(m
->eflags
®_NOTEOL
)) ) { 
 754                         flagch 
= (flagch 
== BOL
) ? BOLEOL 
: EOL
; 
 759                                 st 
= step(m
->g
, startst
, stopst
, st
, flagch
, st
); 
 760                         SP("sboleol", st
, c
); 
 763                 /* how about a word boundary? */ 
 764                 if ( (flagch 
== BOL 
|| (lastc 
!= OUT 
&& !ISWORD(lastc
))) && 
 765                                         (c 
!= OUT 
&& ISWORD(c
)) ) { 
 768                 if ( (lastc 
!= OUT 
&& ISWORD(lastc
)) && 
 769                                 (flagch 
== EOL 
|| (c 
!= OUT 
&& !ISWORD(c
))) ) { 
 772                 if (flagch 
== BOW 
|| flagch 
== EOW
) { 
 773                         st 
= step(m
->g
, startst
, stopst
, st
, flagch
, st
); 
 774                         SP("sboweow", st
, c
); 
 778                 if (ISSET(st
, stopst
)) 
 780                 if (EQ(st
, empty
) || p 
== stop
) 
 781                         break;          /* NOTE BREAK OUT */ 
 783                 /* no, we must deal with this character */ 
 787                 st 
= step(m
->g
, startst
, stopst
, tmp
, c
, st
); 
 789                 assert(EQ(step(m
->g
, startst
, stopst
, st
, NOTHING
, st
), st
)); 
 798  - step - map set of states reachable before char to set reachable after 
 799  == static states step(register struct re_guts *g, sopno start, sopno stop, \ 
 800  ==     register states bef, int ch, register states aft); 
 801  == #define     BOL     (OUT+1) 
 802  == #define     EOL     (BOL+1) 
 803  == #define     BOLEOL  (BOL+2) 
 804  == #define     NOTHING (BOL+3) 
 805  == #define     BOW     (BOL+4) 
 806  == #define     EOW     (BOL+5) 
 807  == #define     CODEMAX (BOL+5)         // highest code used 
 808  == #define     NONCHAR(c)      ((c) > CHAR_MAX) 
 809  == #define     NNONCHAR        (CODEMAX-CHAR_MAX) 
 812 step(g
, start
, stop
, bef
, ch
, aft
) 
 813 register struct re_guts 
*g
; 
 814 sopno start
;                    /* start state within strip */ 
 815 sopno stop
;                     /* state after stop state within strip */ 
 816 register states bef
;            /* states reachable before */ 
 817 int ch
;                         /* character or NONCHAR code */ 
 818 register states aft
;            /* states already known reachable after */ 
 823         register onestate here
;         /* note, macros know this name */ 
 827         for (pc 
= start
, INIT(here
, pc
); pc 
!= stop
; pc
++, INC(here
)) { 
 831                         assert(pc 
== stop
-1); 
 834                         /* only characters can match */ 
 835                         assert(!NONCHAR(ch
) || ch 
!= (char)OPND(s
)); 
 836                         if (ch 
== (char)OPND(s
)) 
 840                         if (ch 
== BOL 
|| ch 
== BOLEOL
) 
 844                         if (ch 
== EOL 
|| ch 
== BOLEOL
) 
 860                         cs 
= &g
->sets
[OPND(s
)]; 
 861                         if (!NONCHAR(ch
) && CHIN(cs
, ch
)) 
 864                 case OBACK_
:            /* ignored here */ 
 868                 case OPLUS_
:            /* forward, this is just an empty */ 
 871                 case O_PLUS
:            /* both forward and back */ 
 873                         i 
= ISSETBACK(aft
, OPND(s
)); 
 874                         BACK(aft
, aft
, OPND(s
)); 
 875                         if (!i 
&& ISSETBACK(aft
, OPND(s
))) { 
 876                                 /* oho, must reconsider loop body */ 
 881                 case OQUEST_
:           /* two branches, both forward */ 
 883                         FWD(aft
, aft
, OPND(s
)); 
 885                 case O_QUEST
:           /* just an empty */ 
 888                 case OLPAREN
:           /* not significant here */ 
 892                 case OCH_
:              /* mark the first two branches */ 
 894                         assert(OP(g
->strip
[pc
+OPND(s
)]) == OOR2
); 
 895                         FWD(aft
, aft
, OPND(s
)); 
 897                 case OOR1
:              /* done a branch, find the O_CH */ 
 898                         if (ISSTATEIN(aft
, here
)) { 
 900                                                 OP(s 
= g
->strip
[pc
+look
]) != O_CH
; 
 902                                         assert(OP(s
) == OOR2
); 
 906                 case OOR2
:              /* propagate OCH_'s marking */ 
 908                         if (OP(g
->strip
[pc
+OPND(s
)]) != O_CH
) { 
 909                                 assert(OP(g
->strip
[pc
+OPND(s
)]) == OOR2
); 
 910                                 FWD(aft
, aft
, OPND(s
)); 
 913                 case O_CH
:              /* just empty */ 
 916                 default:                /* ooooops... */ 
 927  - print - print a set of states 
 929  == static void print(struct match *m, char *caption, states st, \ 
 934 print(m
, caption
, st
, ch
, d
) 
 941         register struct re_guts 
*g 
= m
->g
; 
 943         register int first 
= 1; 
 945         if (!(m
->eflags
®_TRACE
)) 
 948         fprintf(d
, "%s", caption
); 
 950                 fprintf(d
, " %s", pchar(ch
)); 
 951         for (i 
= 0; i 
< g
->nstates
; i
++) 
 953                         fprintf(d
, "%s%d", (first
) ? "\t" : ", ", i
); 
 960  - at - print current situation 
 962  == static void at(struct match *m, char *title, char *start, char *stop, \ 
 963  ==                                             sopno startst, sopno stopst); 
 967 at(m
, title
, start
, stop
, startst
, stopst
) 
 975         if (!(m
->eflags
®_TRACE
)) 
 978         printf("%s %s-", title
, pchar(*start
)); 
 979         printf("%s ", pchar(*stop
)); 
 980         printf("%ld-%ld\n", (long)startst
, (long)stopst
); 
 984 #define PCHARDONE       /* never again */ 
 986  - pchar - make a character printable 
 988  == static char *pchar(int ch); 
 991  * Is this identical to regchar() over in debug.c?  Well, yes.  But a 
 992  * duplicate here avoids having a debugging-capable regexec.o tied to 
 993  * a matching debug.o, and this is convenient.  It all disappears in 
 994  * the non-debug compilation anyway, so it doesn't matter much. 
 996 static char *                   /* -> representation */ 
1000         static char pbuf
[10]; 
1002         if (isprint(ch
) || ch 
== ' ') 
1003                 sprintf(pbuf
, "%c", ch
); 
1005                 sprintf(pbuf
, "\\%o", ch
);