]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regexec.c
   2  * re_*exec and friends - match REs 
   4  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved. 
   6  * Development of this software was funded, in part, by Cray Research Inc., 
   7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 
   8  * Corporation, none of whom are responsible for the results.  The author 
  11  * Redistribution and use in source and binary forms -- with or without 
  12  * modification -- are permitted for any purpose, provided that 
  13  * redistributions in source form retain this entire copyright notice and 
  14  * indicate the origin and nature of any modifications. 
  16  * I'd appreciate being given credit for this package in the documentation 
  17  * of software which uses it, but that is not a requirement. 
  19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 
  20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
  21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
  22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
  27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
  28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  36 /* lazy-DFA representation */ 
  37 struct arcp 
{                   /* "pointer" to an outarc */ 
  42 struct sset 
{                   /* state set */ 
  43         unsigned *states
;       /* pointer to bitvector */ 
  44         unsigned hash
;          /* hash of bitvector */ 
  45 #               define  HASH(bv, nw)    (((nw) == 1) ? *(bv) : hash(bv, nw)) 
  46 #       define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ 
  47                 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) 
  49 #               define  STARTER         01      /* the initial state set */ 
  50 #               define  POSTSTATE       02      /* includes the goal state */ 
  51 #               define  LOCKED          04      /* locked in cache */ 
  52 #               define  NOPROGRESS      010     /* zero-progress state set */ 
  53         struct arcp ins
;        /* chain of inarcs pointing here */ 
  54         chr 
*lastseen
;          /* last entered on arrival here */ 
  55         struct sset 
**outs
;     /* outarc vector indexed by color */ 
  56         struct arcp 
*inchain
;   /* chain-pointer vector for outarcs */ 
  60         int nssets
;             /* size of cache */ 
  61         int nssused
;            /* how many entries occupied yet */ 
  62         int nstates
;            /* number of states */ 
  63         int ncolors
;            /* length of outarc and inchain vectors */ 
  64         int wordsper
;           /* length of state-set bitvectors */ 
  65         struct sset 
*ssets
;     /* state-set cache */ 
  66         unsigned *statesarea
;   /* bitvector storage */ 
  67         unsigned *work
;         /* pointer to work area within statesarea */ 
  68         struct sset 
**outsarea
; /* outarc-vector storage */ 
  69         struct arcp 
*incarea
;   /* inchain storage */ 
  72         chr 
*lastpost
;          /* location of last cache-flushed success */ 
  73         chr 
*lastnopr
;          /* location of last cache-flushed NOPROGRESS */ 
  74         struct sset 
*search
;    /* replacement-search-pointer memory */ 
  75         int cptsmalloced
;       /* were the areas individually malloced? */ 
  76         char *mallocarea
;       /* self, or master malloced area, or NULL */ 
  79 #define WORK    1               /* number of work bitvectors needed */ 
  81 /* setup for non-malloc allocation for small cases */ 
  82 #define FEWSTATES       20      /* must be less than UBITS */ 
  86         struct sset ssets
[FEWSTATES
*2]; 
  87         unsigned statesarea
[FEWSTATES
*2 + WORK
]; 
  88         struct sset 
*outsarea
[FEWSTATES
*2 * FEWCOLORS
]; 
  89         struct arcp incarea
[FEWSTATES
*2 * FEWCOLORS
]; 
  91 #define DOMALLOC        ((struct smalldfa *)NULL)       /* force malloc */ 
  95 /* internal variables, bundled for easy passing around */ 
  99         int eflags
;             /* copies of arguments */ 
 102         rm_detail_t 
*details
; 
 103         chr 
*start
;             /* start of string */ 
 104         chr 
*stop
;              /* just past end of string */ 
 105         int err
;                /* error code if any (0 none) */ 
 106         regoff_t 
*mem
;          /* memory vector for backtracking */ 
 107         struct smalldfa dfa1
; 
 108         struct smalldfa dfa2
; 
 110 #define VISERR(vv)      ((vv)->err != 0)        /* have we seen an error yet? */ 
 111 #define ISERR() VISERR(v) 
 112 #define VERR(vv,e)      (((vv)->err) ? (vv)->err : ((vv)->err = (e))) 
 113 #define ERR(e)  VERR(v, e)              /* record an error */ 
 114 #define NOERR() {if (ISERR()) return v->err;}   /* if error seen, return it */ 
 115 #define OFF(p)  ((p) - v->start) 
 116 #define LOFF(p) ((long)OFF(p)) 
 121  * forward declarations 
 123 /* =====^!^===== begin forwards =====^!^===== */ 
 124 /* automatically gathered by fwd; do not hand-edit */ 
 125 /* === regexec.c === */ 
 126 int exec 
_ANSI_ARGS_((regex_t 
*, CONST chr 
*, size_t, rm_detail_t 
*, size_t, regmatch_t 
[], int)); 
 127 static int find 
_ANSI_ARGS_((struct vars 
*, struct cnfa 
*, struct colormap 
*)); 
 128 static int cfind 
_ANSI_ARGS_((struct vars 
*, struct cnfa 
*, struct colormap 
*)); 
 129 static int cfindloop 
_ANSI_ARGS_((struct vars 
*, struct cnfa 
*, struct colormap 
*, struct dfa 
*, struct dfa 
*, chr 
**)); 
 130 static VOID zapsubs 
_ANSI_ARGS_((regmatch_t 
*, size_t)); 
 131 static VOID zapmem 
_ANSI_ARGS_((struct vars 
*, struct subre 
*)); 
 132 static VOID subset 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, chr 
*, chr 
*)); 
 133 static int dissect 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, chr 
*, chr 
*)); 
 134 static int condissect 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, chr 
*, chr 
*)); 
 135 static int altdissect 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, chr 
*, chr 
*)); 
 136 static int cdissect 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, chr 
*, chr 
*)); 
 137 static int ccondissect 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, chr 
*, chr 
*)); 
 138 static int crevdissect 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, chr 
*, chr 
*)); 
 139 static int cbrdissect 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, chr 
*, chr 
*)); 
 140 static int caltdissect 
_ANSI_ARGS_((struct vars 
*, struct subre 
*, chr 
*, chr 
*)); 
 141 /* === rege_dfa.c === */ 
 142 static chr 
*longest 
_ANSI_ARGS_((struct vars 
*, struct dfa 
*, chr 
*, chr 
*, int *)); 
 143 static chr 
*shortest 
_ANSI_ARGS_((struct vars 
*, struct dfa 
*, chr 
*, chr 
*, chr 
*, chr 
**, int *)); 
 144 static chr 
*lastcold 
_ANSI_ARGS_((struct vars 
*, struct dfa 
*)); 
 145 static struct dfa 
*newdfa 
_ANSI_ARGS_((struct vars 
*, struct cnfa 
*, struct colormap 
*, struct smalldfa 
*)); 
 146 static VOID freedfa 
_ANSI_ARGS_((struct dfa 
*)); 
 147 static unsigned hash 
_ANSI_ARGS_((unsigned *, int)); 
 148 static struct sset 
*initialize 
_ANSI_ARGS_((struct vars 
*, struct dfa 
*, chr 
*)); 
 149 static struct sset 
*miss 
_ANSI_ARGS_((struct vars 
*, struct dfa 
*, struct sset 
*, pcolor
, chr 
*, chr 
*)); 
 150 static int lacon 
_ANSI_ARGS_((struct vars 
*, struct cnfa 
*, chr 
*, pcolor
)); 
 151 static struct sset 
*getvacant 
_ANSI_ARGS_((struct vars 
*, struct dfa 
*, chr 
*, chr 
*)); 
 152 static struct sset 
*pickss 
_ANSI_ARGS_((struct vars 
*, struct dfa 
*, chr 
*, chr 
*)); 
 153 /* automatically gathered by fwd; do not hand-edit */ 
 154 /* =====^!^===== end forwards =====^!^===== */ 
 159  - exec - match regular expression 
 160  ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, 
 161  ^                                      size_t, regmatch_t [], int); 
 164 exec(re
, string
, len
, details
, nmatch
, pmatch
, flags
) 
 168 rm_detail_t 
*details
; 
 174         register struct vars 
*v 
= &var
; 
 179         regmatch_t mat
[LOCALMAT
]; 
 181         regoff_t mem
[LOCALMEM
]; 
 184         if (re 
== NULL 
|| string 
== NULL 
|| re
->re_magic 
!= REMAGIC
) 
 186         if (re
->re_csize 
!= sizeof(chr
)) 
 191         v
->g 
= (struct guts 
*)re
->re_guts
; 
 192         if ((v
->g
->cflags
®_EXPECT
) && details 
== NULL
) 
 194         if (v
->g
->info
®_UIMPOSSIBLE
) 
 196         backref 
= (v
->g
->info
®_UBACKREF
) ? 1 : 0; 
 198         if (v
->g
->cflags
®_NOSUB
) 
 199                 nmatch 
= 0;             /* override client */ 
 203                 if (v
->g
->nsub 
+ 1 <= LOCALMAT
) 
 206                         v
->pmatch 
= (regmatch_t 
*)MALLOC((v
->g
->nsub 
+ 1) * 
 208                 if (v
->pmatch 
== NULL
) 
 210                 v
->nmatch 
= v
->g
->nsub 
+ 1; 
 213         v
->details 
= details
; 
 214         v
->start 
= (chr 
*)string
; 
 215         v
->stop 
= (chr 
*)string 
+ len
; 
 218                 /* need retry memory */ 
 219                 assert(v
->g
->ntree 
>= 0); 
 220                 n 
= (size_t)v
->g
->ntree
; 
 224                         v
->mem 
= (regoff_t 
*)MALLOC(n
*sizeof(regoff_t
)); 
 225                 if (v
->mem 
== NULL
) { 
 226                         if (v
->pmatch 
!= pmatch 
&& v
->pmatch 
!= mat
) 
 234         assert(v
->g
->tree 
!= NULL
); 
 236                 st 
= cfind(v
, &v
->g
->tree
->cnfa
, &v
->g
->cmap
); 
 238                 st 
= find(v
, &v
->g
->tree
->cnfa
, &v
->g
->cmap
); 
 240         /* copy (portion of) match vector over if necessary */ 
 241         if (st 
== REG_OKAY 
&& v
->pmatch 
!= pmatch 
&& nmatch 
> 0) { 
 242                 zapsubs(pmatch
, nmatch
); 
 243                 n 
= (nmatch 
< v
->nmatch
) ? nmatch 
: v
->nmatch
; 
 244                 memcpy(VS(pmatch
), VS(v
->pmatch
), n
*sizeof(regmatch_t
)); 
 248         if (v
->pmatch 
!= pmatch 
&& v
->pmatch 
!= mat
) 
 250         if (v
->mem 
!= NULL 
&& v
->mem 
!= mem
) 
 256  - find - find a match for the main NFA (no-complications case) 
 257  ^ static int find(struct vars *, struct cnfa *, struct colormap *); 
 270         chr 
*open
;              /* open and close of range of possible starts */ 
 273         int shorter 
= (v
->g
->tree
->flags
&SHORTER
) ? 1 : 0; 
 275         /* first, a shot with the search RE */ 
 276         s 
= newdfa(v
, &v
->g
->search
, cm
, &v
->dfa1
); 
 277         assert(!(ISERR() && s 
!= NULL
)); 
 279         MDEBUG(("\nsearch at %ld\n", LOFF(v
->start
))); 
 281         close 
= shortest(v
, s
, v
->start
, v
->start
, v
->stop
, &cold
, (int *)NULL
); 
 284         if (v
->g
->cflags
®_EXPECT
) { 
 285                 assert(v
->details 
!= NULL
); 
 287                         v
->details
->rm_extend
.rm_so 
= OFF(cold
); 
 289                         v
->details
->rm_extend
.rm_so 
= OFF(v
->stop
); 
 290                 v
->details
->rm_extend
.rm_eo 
= OFF(v
->stop
);     /* unknown */ 
 292         if (close 
== NULL
)              /* not found */ 
 294         if (v
->nmatch 
== 0)             /* found, don't need exact location */ 
 297         /* find starting point and match */ 
 298         assert(cold 
!= NULL
); 
 301         MDEBUG(("between %ld and %ld\n", LOFF(open
), LOFF(close
))); 
 302         d 
= newdfa(v
, cnfa
, cm
, &v
->dfa1
); 
 303         assert(!(ISERR() && d 
!= NULL
)); 
 305         for (begin 
= open
; begin 
<= close
; begin
++) { 
 306                 MDEBUG(("\nfind trying at %ld\n", LOFF(begin
))); 
 308                         end 
= shortest(v
, d
, begin
, begin
, v
->stop
, 
 309                                                         (chr 
**)NULL
, &hitend
); 
 311                         end 
= longest(v
, d
, begin
, v
->stop
, &hitend
); 
 313                 if (hitend 
&& cold 
== NULL
) 
 316                         break;          /* NOTE BREAK OUT */ 
 318         assert(end 
!= NULL
);            /* search RE succeeded so loop should */ 
 321         /* and pin down details */ 
 322         assert(v
->nmatch 
> 0); 
 323         v
->pmatch
[0].rm_so 
= OFF(begin
); 
 324         v
->pmatch
[0].rm_eo 
= OFF(end
); 
 325         if (v
->g
->cflags
®_EXPECT
) { 
 327                         v
->details
->rm_extend
.rm_so 
= OFF(cold
); 
 329                         v
->details
->rm_extend
.rm_so 
= OFF(v
->stop
); 
 330                 v
->details
->rm_extend
.rm_eo 
= OFF(v
->stop
);     /* unknown */ 
 332         if (v
->nmatch 
== 1)             /* no need for submatches */ 
 336         zapsubs(v
->pmatch
, v
->nmatch
); 
 337         return dissect(v
, v
->g
->tree
, begin
, end
); 
 341  - cfind - find a match for the main NFA (with complications) 
 342  ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); 
 355         s 
= newdfa(v
, &v
->g
->search
, cm
, &v
->dfa1
); 
 357         d 
= newdfa(v
, cnfa
, cm
, &v
->dfa2
); 
 364         ret 
= cfindloop(v
, cnfa
, cm
, d
, s
, &cold
); 
 369         if (v
->g
->cflags
®_EXPECT
) { 
 370                 assert(v
->details 
!= NULL
); 
 372                         v
->details
->rm_extend
.rm_so 
= OFF(cold
); 
 374                         v
->details
->rm_extend
.rm_so 
= OFF(v
->stop
); 
 375                 v
->details
->rm_extend
.rm_eo 
= OFF(v
->stop
);     /* unknown */ 
 381  - cfindloop - the heart of cfind 
 382  ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *, 
 383  ^      struct dfa *, struct dfa *, chr **); 
 386 cfindloop(v
, cnfa
, cm
, d
, s
, coldp
) 
 392 chr 
**coldp
;                    /* where to put coldstart pointer */ 
 397         chr 
*open
;              /* open and close of range of possible starts */ 
 402         int shorter 
= v
->g
->tree
->flags
&SHORTER
; 
 405         assert(d 
!= NULL 
&& s 
!= NULL
); 
 409                 MDEBUG(("\ncsearch at %ld\n", LOFF(close
))); 
 410                 close 
= shortest(v
, s
, close
, close
, v
->stop
, &cold
, (int *)NULL
); 
 412                         break;                          /* NOTE BREAK */ 
 413                 assert(cold 
!= NULL
); 
 416                 MDEBUG(("cbetween %ld and %ld\n", LOFF(open
), LOFF(close
))); 
 417                 for (begin 
= open
; begin 
<= close
; begin
++) { 
 418                         MDEBUG(("\ncfind trying at %ld\n", LOFF(begin
))); 
 423                                         end 
= shortest(v
, d
, begin
, estart
, 
 424                                                 estop
, (chr 
**)NULL
, &hitend
); 
 426                                         end 
= longest(v
, d
, begin
, estop
, 
 428                                 if (hitend 
&& cold 
== NULL
) 
 431                                         break;          /* NOTE BREAK OUT */ 
 432                                 MDEBUG(("tentative end %ld\n", LOFF(end
))); 
 433                                 zapsubs(v
->pmatch
, v
->nmatch
); 
 434                                 zapmem(v
, v
->g
->tree
); 
 435                                 er 
= cdissect(v
, v
->g
->tree
, begin
, end
); 
 436                                 if (er 
== REG_OKAY
) { 
 438                                                 v
->pmatch
[0].rm_so 
= OFF(begin
); 
 439                                                 v
->pmatch
[0].rm_eo 
= OFF(end
); 
 444                                 if (er 
!= REG_NOMATCH
) { 
 448                                 if ((shorter
) ? end 
== estop 
: end 
== begin
) { 
 449                                         /* no point in trying again */ 
 453                                 /* go around and try again */ 
 460         } while (close 
< v
->stop
); 
 467  - zapsubs - initialize the subexpression matches to "no match" 
 468  ^ static VOID zapsubs(regmatch_t *, size_t); 
 477         for (i 
= n
-1; i 
> 0; i
--) { 
 484  - zapmem - initialize the retry memory of a subtree to zeros 
 485  ^ static VOID zapmem(struct vars *, struct subre *); 
 495         assert(v
->mem 
!= NULL
); 
 496         v
->mem
[t
->retry
] = 0; 
 498                 assert(t
->subno 
> 0); 
 499                 v
->pmatch
[t
->subno
].rm_so 
= -1; 
 500                 v
->pmatch
[t
->subno
].rm_eo 
= -1; 
 505         if (t
->right 
!= NULL
) 
 510  - subset - set any subexpression relevant to a successful subre 
 511  ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); 
 514 subset(v
, sub
, begin
, end
) 
 523         if ((size_t)n 
>= v
->nmatch
) 
 526         MDEBUG(("setting %d\n", n
)); 
 527         v
->pmatch
[n
].rm_so 
= OFF(begin
); 
 528         v
->pmatch
[n
].rm_eo 
= OFF(end
); 
 532  - dissect - determine subexpression matches (uncomplicated case) 
 533  ^ static int dissect(struct vars *, struct subre *, chr *, chr *); 
 535 static int                      /* regexec return code */ 
 536 dissect(v
, t
, begin
, end
) 
 539 chr 
*begin
;                     /* beginning of relevant substring */ 
 540 chr 
*end
;                       /* end of same */ 
 543         MDEBUG(("dissect %ld-%ld\n", LOFF(begin
), LOFF(end
))); 
 546         case '=':               /* terminal node */ 
 547                 assert(t
->left 
== NULL 
&& t
->right 
== NULL
); 
 548                 return REG_OKAY
;        /* no action, parent did the work */ 
 550         case '|':               /* alternation */ 
 551                 assert(t
->left 
!= NULL
); 
 552                 return altdissect(v
, t
, begin
, end
); 
 554         case 'b':               /* back ref -- shouldn't be calling us! */ 
 557         case '.':               /* concatenation */ 
 558                 assert(t
->left 
!= NULL 
&& t
->right 
!= NULL
); 
 559                 return condissect(v
, t
, begin
, end
); 
 561         case '(':               /* capturing */ 
 562                 assert(t
->left 
!= NULL 
&& t
->right 
== NULL
); 
 563                 assert(t
->subno 
> 0); 
 564                 subset(v
, t
, begin
, end
); 
 565                 return dissect(v
, t
->left
, begin
, end
); 
 574  - condissect - determine concatenation subexpression matches (uncomplicated) 
 575  ^ static int condissect(struct vars *, struct subre *, chr *, chr *); 
 577 static int                      /* regexec return code */ 
 578 condissect(v
, t
, begin
, end
) 
 581 chr 
*begin
;                     /* beginning of relevant substring */ 
 582 chr 
*end
;                       /* end of same */ 
 588         int shorter 
= (t
->left
->flags
&SHORTER
) ? 1 : 0; 
 589         chr 
*stop 
= (shorter
) ? end 
: begin
; 
 591         assert(t
->op 
== '.'); 
 592         assert(t
->left 
!= NULL 
&& t
->left
->cnfa
.nstates 
> 0); 
 593         assert(t
->right 
!= NULL 
&& t
->right
->cnfa
.nstates 
> 0); 
 595         d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, &v
->dfa1
); 
 597         d2 
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, &v
->dfa2
); 
 604         /* pick a tentative midpoint */ 
 606                 mid 
= shortest(v
, d
, begin
, begin
, end
, (chr 
**)NULL
, 
 609                 mid 
= longest(v
, d
, begin
, end
, (int *)NULL
); 
 615         MDEBUG(("tentative midpoint %ld\n", LOFF(mid
))); 
 617         /* iterate until satisfaction or failure */ 
 618         while (longest(v
, d2
, mid
, end
, (int *)NULL
) != end
) { 
 619                 /* that midpoint didn't work, find a new one */ 
 621                         /* all possibilities exhausted! */ 
 622                         MDEBUG(("no midpoint!\n")); 
 628                         mid 
= shortest(v
, d
, begin
, mid
+1, end
, (chr 
**)NULL
, 
 631                         mid 
= longest(v
, d
, begin
, mid
-1, (int *)NULL
); 
 633                         /* failed to find a new one! */ 
 634                         MDEBUG(("failed midpoint!\n")); 
 639                 MDEBUG(("new midpoint %ld\n", LOFF(mid
))); 
 643         MDEBUG(("successful\n")); 
 646         i 
= dissect(v
, t
->left
, begin
, mid
); 
 649         return dissect(v
, t
->right
, mid
, end
); 
 653  - altdissect - determine alternative subexpression matches (uncomplicated) 
 654  ^ static int altdissect(struct vars *, struct subre *, chr *, chr *); 
 656 static int                      /* regexec return code */ 
 657 altdissect(v
, t
, begin
, end
) 
 660 chr 
*begin
;                     /* beginning of relevant substring */ 
 661 chr 
*end
;                       /* end of same */ 
 667         assert(t
->op 
== '|'); 
 669         for (i 
= 0; t 
!= NULL
; t 
= t
->right
, i
++) { 
 670                 MDEBUG(("trying %dth\n", i
)); 
 671                 assert(t
->left 
!= NULL 
&& t
->left
->cnfa
.nstates 
> 0); 
 672                 d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, &v
->dfa1
); 
 675                 if (longest(v
, d
, begin
, end
, (int *)NULL
) == end
) { 
 676                         MDEBUG(("success\n")); 
 678                         return dissect(v
, t
->left
, begin
, end
); 
 682         return REG_ASSERT
;      /* none of them matched?!? */ 
 686  - cdissect - determine subexpression matches (with complications) 
 687  * The retry memory stores the offset of the trial midpoint from begin,  
 688  * plus 1 so that 0 uniquely means "clean slate". 
 689  ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); 
 691 static int                      /* regexec return code */ 
 692 cdissect(v
, t
, begin
, end
) 
 695 chr 
*begin
;                     /* beginning of relevant substring */ 
 696 chr 
*end
;                       /* end of same */ 
 701         MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin
), LOFF(end
), t
->op
)); 
 704         case '=':               /* terminal node */ 
 705                 assert(t
->left 
== NULL 
&& t
->right 
== NULL
); 
 706                 return REG_OKAY
;        /* no action, parent did the work */ 
 708         case '|':               /* alternation */ 
 709                 assert(t
->left 
!= NULL
); 
 710                 return caltdissect(v
, t
, begin
, end
); 
 712         case 'b':               /* back ref -- shouldn't be calling us! */ 
 713                 assert(t
->left 
== NULL 
&& t
->right 
== NULL
); 
 714                 return cbrdissect(v
, t
, begin
, end
); 
 716         case '.':               /* concatenation */ 
 717                 assert(t
->left 
!= NULL 
&& t
->right 
!= NULL
); 
 718                 return ccondissect(v
, t
, begin
, end
); 
 720         case '(':               /* capturing */ 
 721                 assert(t
->left 
!= NULL 
&& t
->right 
== NULL
); 
 722                 assert(t
->subno 
> 0); 
 723                 er 
= cdissect(v
, t
->left
, begin
, end
); 
 725                         subset(v
, t
, begin
, end
); 
 735  - ccondissect - concatenation subexpression matches (with complications) 
 736  * The retry memory stores the offset of the trial midpoint from begin,  
 737  * plus 1 so that 0 uniquely means "clean slate". 
 738  ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); 
 740 static int                      /* regexec return code */ 
 741 ccondissect(v
, t
, begin
, end
) 
 744 chr 
*begin
;                     /* beginning of relevant substring */ 
 745 chr 
*end
;                       /* end of same */ 
 752         assert(t
->op 
== '.'); 
 753         assert(t
->left 
!= NULL 
&& t
->left
->cnfa
.nstates 
> 0); 
 754         assert(t
->right 
!= NULL 
&& t
->right
->cnfa
.nstates 
> 0); 
 756         if (t
->left
->flags
&SHORTER
)             /* reverse scan */ 
 757                 return crevdissect(v
, t
, begin
, end
); 
 759         d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
 762         d2 
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
 767         MDEBUG(("cconcat %d\n", t
->retry
)); 
 769         /* pick a tentative midpoint */ 
 770         if (v
->mem
[t
->retry
] == 0) { 
 771                 mid 
= longest(v
, d
, begin
, end
, (int *)NULL
); 
 777                 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
))); 
 778                 v
->mem
[t
->retry
] = (mid 
- begin
) + 1; 
 780                 mid 
= begin 
+ (v
->mem
[t
->retry
] - 1); 
 781                 MDEBUG(("working midpoint %ld\n", LOFF(mid
))); 
 784         /* iterate until satisfaction or failure */ 
 786                 /* try this midpoint on for size */ 
 787                 er 
= cdissect(v
, t
->left
, begin
, mid
); 
 788                 if (er 
== REG_OKAY 
&& 
 789                                 longest(v
, d2
, mid
, end
, (int *)NULL
) == end 
&& 
 790                                 (er 
= cdissect(v
, t
->right
, mid
, end
)) ==  
 792                         break;                  /* NOTE BREAK OUT */ 
 793                 if (er 
!= REG_OKAY 
&& er 
!= REG_NOMATCH
) { 
 799                 /* that midpoint didn't work, find a new one */ 
 801                         /* all possibilities exhausted */ 
 802                         MDEBUG(("%d no midpoint\n", t
->retry
)); 
 807                 mid 
= longest(v
, d
, begin
, mid
-1, (int *)NULL
); 
 809                         /* failed to find a new one */ 
 810                         MDEBUG(("%d failed midpoint\n", t
->retry
)); 
 815                 MDEBUG(("%d: new midpoint %ld\n", t
->retry
, LOFF(mid
))); 
 816                 v
->mem
[t
->retry
] = (mid 
- begin
) + 1; 
 822         MDEBUG(("successful\n")); 
 829  - crevdissect - determine backref shortest-first subexpression matches 
 830  * The retry memory stores the offset of the trial midpoint from begin,  
 831  * plus 1 so that 0 uniquely means "clean slate". 
 832  ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *); 
 834 static int                      /* regexec return code */ 
 835 crevdissect(v
, t
, begin
, end
) 
 838 chr 
*begin
;                     /* beginning of relevant substring */ 
 839 chr 
*end
;                       /* end of same */ 
 846         assert(t
->op 
== '.'); 
 847         assert(t
->left 
!= NULL 
&& t
->left
->cnfa
.nstates 
> 0); 
 848         assert(t
->right 
!= NULL 
&& t
->right
->cnfa
.nstates 
> 0); 
 849         assert(t
->left
->flags
&SHORTER
); 
 851         /* concatenation -- need to split the substring between parts */ 
 852         d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
 855         d2 
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
 860         MDEBUG(("crev %d\n", t
->retry
)); 
 862         /* pick a tentative midpoint */ 
 863         if (v
->mem
[t
->retry
] == 0) { 
 864                 mid 
= shortest(v
, d
, begin
, begin
, end
, (chr 
**)NULL
, (int *)NULL
); 
 870                 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
))); 
 871                 v
->mem
[t
->retry
] = (mid 
- begin
) + 1; 
 873                 mid 
= begin 
+ (v
->mem
[t
->retry
] - 1); 
 874                 MDEBUG(("working midpoint %ld\n", LOFF(mid
))); 
 877         /* iterate until satisfaction or failure */ 
 879                 /* try this midpoint on for size */ 
 880                 er 
= cdissect(v
, t
->left
, begin
, mid
); 
 881                 if (er 
== REG_OKAY 
&& 
 882                                 longest(v
, d2
, mid
, end
, (int *)NULL
) == end 
&& 
 883                                 (er 
= cdissect(v
, t
->right
, mid
, end
)) ==  
 885                         break;                  /* NOTE BREAK OUT */ 
 886                 if (er 
!= REG_OKAY 
&& er 
!= REG_NOMATCH
) { 
 892                 /* that midpoint didn't work, find a new one */ 
 894                         /* all possibilities exhausted */ 
 895                         MDEBUG(("%d no midpoint\n", t
->retry
)); 
 900                 mid 
= shortest(v
, d
, begin
, mid
+1, end
, (chr 
**)NULL
, (int *)NULL
); 
 902                         /* failed to find a new one */ 
 903                         MDEBUG(("%d failed midpoint\n", t
->retry
)); 
 908                 MDEBUG(("%d: new midpoint %ld\n", t
->retry
, LOFF(mid
))); 
 909                 v
->mem
[t
->retry
] = (mid 
- begin
) + 1; 
 915         MDEBUG(("successful\n")); 
 922  - cbrdissect - determine backref subexpression matches 
 923  ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *); 
 925 static int                      /* regexec return code */ 
 926 cbrdissect(v
, t
, begin
, end
) 
 929 chr 
*begin
;                     /* beginning of relevant substring */ 
 930 chr 
*end
;                       /* end of same */ 
 942         assert(t
->op 
== 'b'); 
 944         assert((size_t)n 
< v
->nmatch
); 
 946         MDEBUG(("cbackref n%d %d{%d-%d}\n", t
->retry
, n
, min
, max
)); 
 948         if (v
->pmatch
[n
].rm_so 
== -1) 
 950         paren 
= v
->start 
+ v
->pmatch
[n
].rm_so
; 
 951         len 
= v
->pmatch
[n
].rm_eo 
- v
->pmatch
[n
].rm_so
; 
 953         /* no room to maneuver -- retries are pointless */ 
 954         if (v
->mem
[t
->retry
]) 
 956         v
->mem
[t
->retry
] = 1; 
 958         /* special-case zero-length string */ 
 965         /* and too-short string */ 
 966         assert(end 
>= begin
); 
 967         if ((size_t)(end 
- begin
) < len
) 
 971         /* count occurrences */ 
 973         for (p 
= begin
; p 
<= stop 
&& (i 
< max 
|| max 
== INFINITY
); p 
+= len
) { 
 974                 if ((*v
->g
->compare
)(paren
, p
, len
) != 0) 
 978         MDEBUG(("cbackref found %d\n", i
)); 
 980         /* and sort it out */ 
 981         if (p 
!= end
)                   /* didn't consume all of it */ 
 983         if (min 
<= i 
&& (i 
<= max 
|| max 
== INFINITY
)) 
 985         return REG_NOMATCH
;             /* out of range */ 
 989  - caltdissect - determine alternative subexpression matches (w. complications) 
 990  ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); 
 992 static int                      /* regexec return code */ 
 993 caltdissect(v
, t
, begin
, end
) 
 996 chr 
*begin
;                     /* beginning of relevant substring */ 
 997 chr 
*end
;                       /* end of same */ 
1001 #       define  UNTRIED 0       /* not yet tried at all */ 
1002 #       define  TRYING  1       /* top matched, trying submatches */ 
1003 #       define  TRIED   2       /* top didn't match or submatches exhausted */ 
1007         assert(t
->op 
== '|'); 
1008         if (v
->mem
[t
->retry
] == TRIED
) 
1009                 return caltdissect(v
, t
->right
, begin
, end
); 
1011         MDEBUG(("calt n%d\n", t
->retry
)); 
1012         assert(t
->left 
!= NULL
); 
1014         if (v
->mem
[t
->retry
] == UNTRIED
) { 
1015                 d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
1018                 if (longest(v
, d
, begin
, end
, (int *)NULL
) != end
) { 
1020                         v
->mem
[t
->retry
] = TRIED
; 
1021                         return caltdissect(v
, t
->right
, begin
, end
); 
1024                 MDEBUG(("calt matched\n")); 
1025                 v
->mem
[t
->retry
] = TRYING
; 
1028         er 
= cdissect(v
, t
->left
, begin
, end
); 
1029         if (er 
!= REG_NOMATCH
) 
1032         v
->mem
[t
->retry
] = TRIED
; 
1033         return caltdissect(v
, t
->right
, begin
, end
); 
1038 #include "rege_dfa.c"