]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regexec.c
   2  * re_*exec and friends - match REs 
   4  * Copyright (c) 1998, 1999 Henry Spencer.      All rights reserved. 
   6  * Development of this software was funded, in part, by Cray Research Inc., 
   7  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics 
   8  * Corporation, none of whom are responsible for the results.  The author 
  11  * Redistribution and use in source and binary forms -- with or without 
  12  * modification -- are permitted for any purpose, provided that 
  13  * redistributions in source form retain this entire copyright notice and 
  14  * indicate the origin and nature of any modifications. 
  16  * I'd appreciate being given credit for this package in the documentation 
  17  * of software which uses it, but that is not a requirement. 
  19  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 
  20  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
  21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL 
  22  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 
  23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  25  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 
  27  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
  28  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  30  * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regexec.c,v 1.23 2003/08/08 21:41:56 momjian Exp $ 
  38 /* lazy-DFA representation */ 
  40 {                                                               /* "pointer" to an outarc */ 
  47         unsigned   *states
;                     /* pointer to bitvector */ 
  48         unsigned        hash
;                   /* hash of bitvector */ 
  49 #define  HASH(bv, nw)    (((nw) == 1) ? *(bv) : hash(bv, nw)) 
  50 #define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ 
  51                 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) 
  53 #define  STARTER         01                     /* the initial state set */ 
  54 #define  POSTSTATE       02                     /* includes the goal state */ 
  55 #define  LOCKED          04                     /* locked in cache */ 
  56 #define  NOPROGRESS  010                /* zero-progress state set */ 
  57         struct arcp ins
;                        /* chain of inarcs pointing here */ 
  58         chr                
*lastseen
;           /* last entered on arrival here */ 
  59         struct sset 
**outs
;                     /* outarc vector indexed by color */ 
  60         struct arcp 
*inchain
;           /* chain-pointer vector for outarcs */ 
  65         int                     nssets
;                 /* size of cache */ 
  66         int                     nssused
;                /* how many entries occupied yet */ 
  67         int                     nstates
;                /* number of states */ 
  68         int                     ncolors
;                /* length of outarc and inchain vectors */ 
  69         int                     wordsper
;               /* length of state-set bitvectors */ 
  70         struct sset 
*ssets
;                     /* state-set cache */ 
  71         unsigned   *statesarea
;         /* bitvector storage */ 
  72         unsigned   *work
;                       /* pointer to work area within statesarea */ 
  73         struct sset 
**outsarea
;         /* outarc-vector storage */ 
  74         struct arcp 
*incarea
;           /* inchain storage */ 
  77         chr                
*lastpost
;           /* location of last cache-flushed success */ 
  78         chr                
*lastnopr
;           /* location of last cache-flushed 
  80         struct sset 
*search
;            /* replacement-search-pointer memory */ 
  81         int                     cptsmalloced
;   /* were the areas individually malloced? */ 
  82         char       *mallocarea
;         /* self, or master malloced area, or NULL */ 
  85 #define WORK    1                               /* number of work bitvectors needed */ 
  87 /* setup for non-malloc allocation for small cases */ 
  88 #define FEWSTATES       20                      /* must be less than UBITS */ 
  93         struct sset ssets
[FEWSTATES 
* 2]; 
  94         unsigned        statesarea
[FEWSTATES 
* 2 + WORK
]; 
  95         struct sset 
*outsarea
[FEWSTATES 
* 2 * FEWCOLORS
]; 
  96         struct arcp incarea
[FEWSTATES 
* 2 * FEWCOLORS
]; 
  99 #define DOMALLOC        ((struct smalldfa *)NULL)       /* force malloc */ 
 103 /* internal variables, bundled for easy passing around */ 
 108         int                     eflags
;                 /* copies of arguments */ 
 111         rm_detail_t 
*details
; 
 112         chr                
*start
;                      /* start of string */ 
 113         chr                
*stop
;                       /* just past end of string */ 
 114         int                     err
;                    /* error code if any (0 none) */ 
 115         regoff_t   
*mem
;                        /* memory vector for backtracking */ 
 116         struct smalldfa dfa1
; 
 117         struct smalldfa dfa2
; 
 120 #define VISERR(vv)      ((vv)->err != 0)        /* have we seen an error yet? */ 
 121 #define ISERR() VISERR(v) 
 122 #define VERR(vv,e)      (((vv)->err) ? (vv)->err : ((vv)->err = (e))) 
 123 #define ERR(e)  VERR(v, e)              /* record an error */ 
 124 #define NOERR() {if (ISERR()) return v->err;}   /* if error seen, return 
 126 #define OFF(p)  ((p) - v->start) 
 127 #define LOFF(p) ((long)OFF(p)) 
 132  * forward declarations 
 134 /* === regexec.c === */ 
 135 static int      find(struct vars 
*, struct cnfa 
*, struct colormap 
*); 
 136 static int      cfind(struct vars 
*, struct cnfa 
*, struct colormap 
*); 
 137 static int      cfindloop(struct vars 
*, struct cnfa 
*, struct colormap 
*, struct dfa 
*, struct dfa 
*, chr 
**); 
 138 static void zapsubs(regmatch_t 
*, size_t); 
 139 static void zapmem(struct vars 
*, struct subre 
*); 
 140 static void subset(struct vars 
*, struct subre 
*, chr 
*, chr 
*); 
 141 static int      dissect(struct vars 
*, struct subre 
*, chr 
*, chr 
*); 
 142 static int      condissect(struct vars 
*, struct subre 
*, chr 
*, chr 
*); 
 143 static int      altdissect(struct vars 
*, struct subre 
*, chr 
*, chr 
*); 
 144 static int      cdissect(struct vars 
*, struct subre 
*, chr 
*, chr 
*); 
 145 static int      ccondissect(struct vars 
*, struct subre 
*, chr 
*, chr 
*); 
 146 static int      crevdissect(struct vars 
*, struct subre 
*, chr 
*, chr 
*); 
 147 static int      cbrdissect(struct vars 
*, struct subre 
*, chr 
*, chr 
*); 
 148 static int      caltdissect(struct vars 
*, struct subre 
*, chr 
*, chr 
*); 
 150 /* === rege_dfa.c === */ 
 151 static chr 
*longest(struct vars 
*, struct dfa 
*, chr 
*, chr 
*, int *); 
 152 static chr 
*shortest(struct vars 
*, struct dfa 
*, chr 
*, chr 
*, chr 
*, chr 
**, int *); 
 153 static chr 
*lastcold(struct vars 
*, struct dfa 
*); 
 154 static struct dfa 
*newdfa(struct vars 
*, struct cnfa 
*, struct colormap 
*, struct smalldfa 
*); 
 155 static void freedfa(struct dfa 
*); 
 156 static unsigned hash(unsigned *, int); 
 157 static struct sset 
*initialize(struct vars 
*, struct dfa 
*, chr 
*); 
 158 static struct sset 
*miss(struct vars 
*, struct dfa 
*, struct sset 
*, pcolor
, chr 
*, chr 
*); 
 159 static int      lacon(struct vars 
*, struct cnfa 
*, chr 
*, pcolor
); 
 160 static struct sset 
*getvacant(struct vars 
*, struct dfa 
*, chr 
*, chr 
*); 
 161 static struct sset 
*pickss(struct vars 
*, struct dfa 
*, chr 
*, chr 
*); 
 165  * regexec - match regular expression 
 175     return wx_regexec(re
, string
, wx_strlen(string
), &det
, nmatch
, pmatch
, flags
); 
 178 wx_regexec(regex_t 
*re
, 
 181                    rm_detail_t 
*details
, 
 187         register struct vars 
*v 
= &var
; 
 193         regmatch_t      mat
[LOCALMAT
]; 
 196         regoff_t        mem
[LOCALMEM
]; 
 199         if (re 
== NULL 
|| string 
== NULL 
|| re
->re_magic 
!= REMAGIC
) 
 201         if (re
->re_csize 
!= sizeof(chr
)) 
 206         v
->g 
= (struct guts 
*) re
->re_guts
; 
 207         if ((v
->g
->cflags 
& REG_EXPECT
) && details 
== NULL
) 
 209         if (v
->g
->info 
& REG_UIMPOSSIBLE
) 
 211         backref 
= (v
->g
->info 
& REG_UBACKREF
) ? 1 : 0; 
 213         if (v
->g
->cflags 
& REG_NOSUB
) 
 214                 nmatch 
= 0;                             /* override client */ 
 219                 if (v
->g
->nsub 
+ 1 <= LOCALMAT
) 
 222                         v
->pmatch 
= (regmatch_t 
*) MALLOC((v
->g
->nsub 
+ 1) * 
 224                 if (v
->pmatch 
== NULL
) 
 226                 v
->nmatch 
= v
->g
->nsub 
+ 1; 
 230         v
->details 
= details
; 
 231         v
->start 
= (chr 
*) string
; 
 232         v
->stop 
= (chr 
*) string 
+ len
; 
 236                 /* need retry memory */ 
 237                 assert(v
->g
->ntree 
>= 0); 
 238                 n 
= (size_t) v
->g
->ntree
; 
 242                         v
->mem 
= (regoff_t 
*) MALLOC(n 
* sizeof(regoff_t
)); 
 245                         if (v
->pmatch 
!= pmatch 
&& v
->pmatch 
!= mat
) 
 254         assert(v
->g
->tree 
!= NULL
); 
 256                 st 
= cfind(v
, &v
->g
->tree
->cnfa
, &v
->g
->cmap
); 
 258                 st 
= find(v
, &v
->g
->tree
->cnfa
, &v
->g
->cmap
); 
 260         /* copy (portion of) match vector over if necessary */ 
 261         if (st 
== REG_OKAY 
&& v
->pmatch 
!= pmatch 
&& nmatch 
> 0) 
 263                 zapsubs(pmatch
, nmatch
); 
 264                 n 
= (nmatch 
< v
->nmatch
) ? nmatch 
: v
->nmatch
; 
 265                 memcpy(VS(pmatch
), VS(v
->pmatch
), n 
* sizeof(regmatch_t
)); 
 269         if (v
->pmatch 
!= pmatch 
&& v
->pmatch 
!= mat
) 
 271         if (v
->mem 
!= NULL 
&& v
->mem 
!= mem
) 
 277  * find - find a match for the main NFA (no-complications case) 
 280 find(struct vars 
* v
, 
 282          struct colormap 
* cm
) 
 289         chr                
*open
;                       /* open and close of range of possible 
 293         int                     shorter 
= (v
->g
->tree
->flags 
& SHORTER
) ? 1 : 0; 
 295         /* first, a shot with the search RE */ 
 296         s 
= newdfa(v
, &v
->g
->search
, cm
, &v
->dfa1
); 
 297         assert(!(ISERR() && s 
!= NULL
)); 
 299         MDEBUG(("\nsearch at %ld\n", LOFF(v
->start
))); 
 301         close 
= shortest(v
, s
, v
->start
, v
->start
, v
->stop
, &cold
, (int *) NULL
); 
 304         if (v
->g
->cflags 
& REG_EXPECT
) 
 306                 assert(v
->details 
!= NULL
); 
 308                         v
->details
->rm_extend
.rm_so 
= OFF(cold
); 
 310                         v
->details
->rm_extend
.rm_so 
= OFF(v
->stop
); 
 311                 v
->details
->rm_extend
.rm_eo 
= OFF(v
->stop
);             /* unknown */ 
 313         if (close 
== NULL
)                      /* not found */ 
 315         if (v
->nmatch 
== 0)                     /* found, don't need exact location */ 
 318         /* find starting point and match */ 
 319         assert(cold 
!= NULL
); 
 322         MDEBUG(("between %ld and %ld\n", LOFF(open
), LOFF(close
))); 
 323         d 
= newdfa(v
, cnfa
, cm
, &v
->dfa1
); 
 324         assert(!(ISERR() && d 
!= NULL
)); 
 326         for (begin 
= open
; begin 
<= close
; begin
++) 
 328                 MDEBUG(("\nfind trying at %ld\n", LOFF(begin
))); 
 330                         end 
= shortest(v
, d
, begin
, begin
, v
->stop
, 
 331                                                    (chr 
**) NULL
, &hitend
); 
 333                         end 
= longest(v
, d
, begin
, v
->stop
, &hitend
); 
 335                 if (hitend 
&& cold 
== NULL
) 
 338                         break;                          /* NOTE BREAK OUT */ 
 340         assert(end 
!= NULL
);            /* search RE succeeded so loop should */ 
 343         /* and pin down details */ 
 344         assert(v
->nmatch 
> 0); 
 345         v
->pmatch
[0].rm_so 
= OFF(begin
); 
 346         v
->pmatch
[0].rm_eo 
= OFF(end
); 
 347         if (v
->g
->cflags 
& REG_EXPECT
) 
 350                         v
->details
->rm_extend
.rm_so 
= OFF(cold
); 
 352                         v
->details
->rm_extend
.rm_so 
= OFF(v
->stop
); 
 353                 v
->details
->rm_extend
.rm_eo 
= OFF(v
->stop
);             /* unknown */ 
 355         if (v
->nmatch 
== 1)                     /* no need for submatches */ 
 359         zapsubs(v
->pmatch
, v
->nmatch
); 
 360         return dissect(v
, v
->g
->tree
, begin
, end
); 
 364  * cfind - find a match for the main NFA (with complications) 
 367 cfind(struct vars 
* v
, 
 369           struct colormap 
* cm
) 
 376         s 
= newdfa(v
, &v
->g
->search
, cm
, &v
->dfa1
); 
 378         d 
= newdfa(v
, cnfa
, cm
, &v
->dfa2
); 
 386         ret 
= cfindloop(v
, cnfa
, cm
, d
, s
, &cold
); 
 391         if (v
->g
->cflags 
& REG_EXPECT
) 
 393                 assert(v
->details 
!= NULL
); 
 395                         v
->details
->rm_extend
.rm_so 
= OFF(cold
); 
 397                         v
->details
->rm_extend
.rm_so 
= OFF(v
->stop
); 
 398                 v
->details
->rm_extend
.rm_eo 
= OFF(v
->stop
);             /* unknown */ 
 404  * cfindloop - the heart of cfind 
 407 cfindloop(struct vars 
* v
, 
 409                   struct colormap 
* cm
, 
 412                   chr 
**coldp
)                  /* where to put coldstart pointer */ 
 417         chr                
*open
;                       /* open and close of range of possible 
 423         int                     shorter 
= v
->g
->tree
->flags 
& SHORTER
; 
 426         assert(d 
!= NULL 
&& s 
!= NULL
); 
 431                 MDEBUG(("\ncsearch at %ld\n", LOFF(close
))); 
 432                 close 
= shortest(v
, s
, close
, close
, v
->stop
, &cold
, (int *) NULL
); 
 434                         break;                          /* NOTE BREAK */ 
 435                 assert(cold 
!= NULL
); 
 438                 MDEBUG(("cbetween %ld and %ld\n", LOFF(open
), LOFF(close
))); 
 439                 for (begin 
= open
; begin 
<= close
; begin
++) 
 441                         MDEBUG(("\ncfind trying at %ld\n", LOFF(begin
))); 
 447                                         end 
= shortest(v
, d
, begin
, estart
, 
 448                                                                    estop
, (chr 
**) NULL
, &hitend
); 
 450                                         end 
= longest(v
, d
, begin
, estop
, 
 452                                 if (hitend 
&& cold 
== NULL
) 
 455                                         break;          /* NOTE BREAK OUT */ 
 456                                 MDEBUG(("tentative end %ld\n", LOFF(end
))); 
 457                                 zapsubs(v
->pmatch
, v
->nmatch
); 
 458                                 zapmem(v
, v
->g
->tree
); 
 459                                 er 
= cdissect(v
, v
->g
->tree
, begin
, end
); 
 464                                                 v
->pmatch
[0].rm_so 
= OFF(begin
); 
 465                                                 v
->pmatch
[0].rm_eo 
= OFF(end
); 
 470                                 if (er 
!= REG_NOMATCH
) 
 475                                 if ((shorter
) ? end 
== estop 
: end 
== begin
) 
 477                                         /* no point in trying again */ 
 481                                 /* go around and try again */ 
 488         } while (close 
< v
->stop
); 
 495  * zapsubs - initialize the subexpression matches to "no match" 
 498 zapsubs(regmatch_t 
*p
, 
 503         for (i 
= n 
- 1; i 
> 0; i
--) 
 511  * zapmem - initialize the retry memory of a subtree to zeros 
 514 zapmem(struct vars 
* v
, 
 520         assert(v
->mem 
!= NULL
); 
 521         v
->mem
[t
->retry
] = 0; 
 524                 assert(t
->subno 
> 0); 
 525                 v
->pmatch
[t
->subno
].rm_so 
= -1; 
 526                 v
->pmatch
[t
->subno
].rm_eo 
= -1; 
 531         if (t
->right 
!= NULL
) 
 536  * subset - set any subexpression relevant to a successful subre 
 539 subset(struct vars 
* v
, 
 547         if ((size_t) n 
>= v
->nmatch
) 
 550         MDEBUG(("setting %d\n", n
)); 
 551         v
->pmatch
[n
].rm_so 
= OFF(begin
); 
 552         v
->pmatch
[n
].rm_eo 
= OFF(end
); 
 556  * dissect - determine subexpression matches (uncomplicated case) 
 558 static int                                              /* regexec return code */ 
 559 dissect(struct vars 
* v
, 
 561                 chr 
*begin
,                             /* beginning of relevant substring */ 
 562                 chr 
*end
)                               /* end of same */ 
 565         MDEBUG(("dissect %ld-%ld\n", LOFF(begin
), LOFF(end
))); 
 569                 case '=':                               /* terminal node */ 
 570                         assert(t
->left 
== NULL 
&& t
->right 
== NULL
); 
 571                         return REG_OKAY
;        /* no action, parent did the work */ 
 573                 case '|':                               /* alternation */ 
 574                         assert(t
->left 
!= NULL
); 
 575                         return altdissect(v
, t
, begin
, end
); 
 577                 case 'b':                               /* back ref -- shouldn't be calling us! */ 
 580                 case '.':                               /* concatenation */ 
 581                         assert(t
->left 
!= NULL 
&& t
->right 
!= NULL
); 
 582                         return condissect(v
, t
, begin
, end
); 
 584                 case '(':                               /* capturing */ 
 585                         assert(t
->left 
!= NULL 
&& t
->right 
== NULL
); 
 586                         assert(t
->subno 
> 0); 
 587                         subset(v
, t
, begin
, end
); 
 588                         return dissect(v
, t
->left
, begin
, end
); 
 597  * condissect - determine concatenation subexpression matches (uncomplicated) 
 599 static int                                              /* regexec return code */ 
 600 condissect(struct vars 
* v
, 
 602                    chr 
*begin
,                  /* beginning of relevant substring */ 
 603                    chr 
*end
)                    /* end of same */ 
 609         int                     shorter 
= (t
->left
->flags 
& SHORTER
) ? 1 : 0; 
 610         chr                
*stop 
= (shorter
) ? end 
: begin
; 
 612         assert(t
->op 
== '.'); 
 613         assert(t
->left 
!= NULL 
&& t
->left
->cnfa
.nstates 
> 0); 
 614         assert(t
->right 
!= NULL 
&& t
->right
->cnfa
.nstates 
> 0); 
 616         d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, &v
->dfa1
); 
 618         d2 
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, &v
->dfa2
); 
 626         /* pick a tentative midpoint */ 
 628                 mid 
= shortest(v
, d
, begin
, begin
, end
, (chr 
**) NULL
, 
 631                 mid 
= longest(v
, d
, begin
, end
, (int *) NULL
); 
 638         MDEBUG(("tentative midpoint %ld\n", LOFF(mid
))); 
 640         /* iterate until satisfaction or failure */ 
 641         while (longest(v
, d2
, mid
, end
, (int *) NULL
) != end
) 
 643                 /* that midpoint didn't work, find a new one */ 
 646                         /* all possibilities exhausted! */ 
 647                         MDEBUG(("no midpoint!\n")); 
 653                         mid 
= shortest(v
, d
, begin
, mid 
+ 1, end
, (chr 
**) NULL
, 
 656                         mid 
= longest(v
, d
, begin
, mid 
- 1, (int *) NULL
); 
 659                         /* failed to find a new one! */ 
 660                         MDEBUG(("failed midpoint!\n")); 
 665                 MDEBUG(("new midpoint %ld\n", LOFF(mid
))); 
 669         MDEBUG(("successful\n")); 
 672         i 
= dissect(v
, t
->left
, begin
, mid
); 
 675         return dissect(v
, t
->right
, mid
, end
); 
 679  * altdissect - determine alternative subexpression matches (uncomplicated) 
 681 static int                                              /* regexec return code */ 
 682 altdissect(struct vars 
* v
, 
 684                    chr 
*begin
,                  /* beginning of relevant substring */ 
 685                    chr 
*end
)                    /* end of same */ 
 691         assert(t
->op 
== '|'); 
 693         for (i 
= 0; t 
!= NULL
; t 
= t
->right
, i
++) 
 695                 MDEBUG(("trying %dth\n", i
)); 
 696                 assert(t
->left 
!= NULL 
&& t
->left
->cnfa
.nstates 
> 0); 
 697                 d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, &v
->dfa1
); 
 700                 if (longest(v
, d
, begin
, end
, (int *) NULL
) == end
) 
 702                         MDEBUG(("success\n")); 
 704                         return dissect(v
, t
->left
, begin
, end
); 
 708         return REG_ASSERT
;                      /* none of them matched?!? */ 
 712  * cdissect - determine subexpression matches (with complications) 
 713  * The retry memory stores the offset of the trial midpoint from begin, 
 714  * plus 1 so that 0 uniquely means "clean slate". 
 716 static int                                              /* regexec return code */ 
 717 cdissect(struct vars 
* v
, 
 719                  chr 
*begin
,                    /* beginning of relevant substring */ 
 720                  chr 
*end
)                              /* end of same */ 
 725         MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin
), LOFF(end
), t
->op
)); 
 729                 case '=':                               /* terminal node */ 
 730                         assert(t
->left 
== NULL 
&& t
->right 
== NULL
); 
 731                         return REG_OKAY
;        /* no action, parent did the work */ 
 733                 case '|':                               /* alternation */ 
 734                         assert(t
->left 
!= NULL
); 
 735                         return caltdissect(v
, t
, begin
, end
); 
 737                 case 'b':                               /* back ref -- shouldn't be calling us! */ 
 738                         assert(t
->left 
== NULL 
&& t
->right 
== NULL
); 
 739                         return cbrdissect(v
, t
, begin
, end
); 
 741                 case '.':                               /* concatenation */ 
 742                         assert(t
->left 
!= NULL 
&& t
->right 
!= NULL
); 
 743                         return ccondissect(v
, t
, begin
, end
); 
 745                 case '(':                               /* capturing */ 
 746                         assert(t
->left 
!= NULL 
&& t
->right 
== NULL
); 
 747                         assert(t
->subno 
> 0); 
 748                         er 
= cdissect(v
, t
->left
, begin
, end
); 
 750                                 subset(v
, t
, begin
, end
); 
 760  * ccondissect - concatenation subexpression matches (with complications) 
 761  * The retry memory stores the offset of the trial midpoint from begin, 
 762  * plus 1 so that 0 uniquely means "clean slate". 
 764 static int                                              /* regexec return code */ 
 765 ccondissect(struct vars 
* v
, 
 767                         chr 
*begin
,                     /* beginning of relevant substring */ 
 768                         chr 
*end
)                       /* end of same */ 
 775         assert(t
->op 
== '.'); 
 776         assert(t
->left 
!= NULL 
&& t
->left
->cnfa
.nstates 
> 0); 
 777         assert(t
->right 
!= NULL 
&& t
->right
->cnfa
.nstates 
> 0); 
 779         if (t
->left
->flags 
& SHORTER
)           /* reverse scan */ 
 780                 return crevdissect(v
, t
, begin
, end
); 
 782         d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
 785         d2 
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
 791         MDEBUG(("cconcat %d\n", t
->retry
)); 
 793         /* pick a tentative midpoint */ 
 794         if (v
->mem
[t
->retry
] == 0) 
 796                 mid 
= longest(v
, d
, begin
, end
, (int *) NULL
); 
 803                 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
))); 
 804                 v
->mem
[t
->retry
] = (mid 
- begin
) + 1; 
 808                 mid 
= begin 
+ (v
->mem
[t
->retry
] - 1); 
 809                 MDEBUG(("working midpoint %ld\n", LOFF(mid
))); 
 812         /* iterate until satisfaction or failure */ 
 815                 /* try this midpoint on for size */ 
 816                 er 
= cdissect(v
, t
->left
, begin
, mid
); 
 817                 if (er 
== REG_OKAY 
&& 
 818                         longest(v
, d2
, mid
, end
, (int *) NULL
) == end 
&& 
 819                         (er 
= cdissect(v
, t
->right
, mid
, end
)) == 
 821                         break;                          /* NOTE BREAK OUT */ 
 822                 if (er 
!= REG_OKAY 
&& er 
!= REG_NOMATCH
) 
 829                 /* that midpoint didn't work, find a new one */ 
 832                         /* all possibilities exhausted */ 
 833                         MDEBUG(("%d no midpoint\n", t
->retry
)); 
 838                 mid 
= longest(v
, d
, begin
, mid 
- 1, (int *) NULL
); 
 841                         /* failed to find a new one */ 
 842                         MDEBUG(("%d failed midpoint\n", t
->retry
)); 
 847                 MDEBUG(("%d: new midpoint %ld\n", t
->retry
, LOFF(mid
))); 
 848                 v
->mem
[t
->retry
] = (mid 
- begin
) + 1; 
 854         MDEBUG(("successful\n")); 
 861  * crevdissect - determine backref shortest-first subexpression matches 
 862  * The retry memory stores the offset of the trial midpoint from begin, 
 863  * plus 1 so that 0 uniquely means "clean slate". 
 865 static int                                              /* regexec return code */ 
 866 crevdissect(struct vars 
* v
, 
 868                         chr 
*begin
,                     /* beginning of relevant substring */ 
 869                         chr 
*end
)                       /* end of same */ 
 876         assert(t
->op 
== '.'); 
 877         assert(t
->left 
!= NULL 
&& t
->left
->cnfa
.nstates 
> 0); 
 878         assert(t
->right 
!= NULL 
&& t
->right
->cnfa
.nstates 
> 0); 
 879         assert(t
->left
->flags 
& SHORTER
); 
 881         /* concatenation -- need to split the substring between parts */ 
 882         d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
 885         d2 
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
 891         MDEBUG(("crev %d\n", t
->retry
)); 
 893         /* pick a tentative midpoint */ 
 894         if (v
->mem
[t
->retry
] == 0) 
 896                 mid 
= shortest(v
, d
, begin
, begin
, end
, (chr 
**) NULL
, (int *) NULL
); 
 903                 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
))); 
 904                 v
->mem
[t
->retry
] = (mid 
- begin
) + 1; 
 908                 mid 
= begin 
+ (v
->mem
[t
->retry
] - 1); 
 909                 MDEBUG(("working midpoint %ld\n", LOFF(mid
))); 
 912         /* iterate until satisfaction or failure */ 
 915                 /* try this midpoint on for size */ 
 916                 er 
= cdissect(v
, t
->left
, begin
, mid
); 
 917                 if (er 
== REG_OKAY 
&& 
 918                         longest(v
, d2
, mid
, end
, (int *) NULL
) == end 
&& 
 919                         (er 
= cdissect(v
, t
->right
, mid
, end
)) == 
 921                         break;                          /* NOTE BREAK OUT */ 
 922                 if (er 
!= REG_OKAY 
&& er 
!= REG_NOMATCH
) 
 929                 /* that midpoint didn't work, find a new one */ 
 932                         /* all possibilities exhausted */ 
 933                         MDEBUG(("%d no midpoint\n", t
->retry
)); 
 938                 mid 
= shortest(v
, d
, begin
, mid 
+ 1, end
, (chr 
**) NULL
, (int *) NULL
); 
 941                         /* failed to find a new one */ 
 942                         MDEBUG(("%d failed midpoint\n", t
->retry
)); 
 947                 MDEBUG(("%d: new midpoint %ld\n", t
->retry
, LOFF(mid
))); 
 948                 v
->mem
[t
->retry
] = (mid 
- begin
) + 1; 
 954         MDEBUG(("successful\n")); 
 961  * cbrdissect - determine backref subexpression matches 
 963 static int                                              /* regexec return code */ 
 964 cbrdissect(struct vars 
* v
, 
 966                    chr 
*begin
,                  /* beginning of relevant substring */ 
 967                    chr 
*end
)                    /* end of same */ 
 979         assert(t
->op 
== 'b'); 
 981         assert((size_t) n 
< v
->nmatch
); 
 983         MDEBUG(("cbackref n%d %d{%d-%d}\n", t
->retry
, n
, min
, max
)); 
 985         if (v
->pmatch
[n
].rm_so 
== -1) 
 987         paren 
= v
->start 
+ v
->pmatch
[n
].rm_so
; 
 988         len 
= v
->pmatch
[n
].rm_eo 
- v
->pmatch
[n
].rm_so
; 
 990         /* no room to maneuver -- retries are pointless */ 
 991         if (v
->mem
[t
->retry
]) 
 993         v
->mem
[t
->retry
] = 1; 
 995         /* special-case zero-length string */ 
1003         /* and too-short string */ 
1004         assert(end 
>= begin
); 
1005         if ((size_t) (end 
- begin
) < len
) 
1009         /* count occurrences */ 
1011         for (p 
= begin
; p 
<= stop 
&& (i 
< max 
|| max 
== INFINITY
); p 
+= len
) 
1013                 if ((*v
->g
->compare
) (paren
, p
, len
) != 0) 
1017         MDEBUG(("cbackref found %d\n", i
)); 
1019         /* and sort it out */ 
1020         if (p 
!= end
)                           /* didn't consume all of it */ 
1022         if (min 
<= i 
&& (i 
<= max 
|| max 
== INFINITY
)) 
1024         return REG_NOMATCH
;                     /* out of range */ 
1028  * caltdissect - determine alternative subexpression matches (w. complications) 
1030 static int                                              /* regexec return code */ 
1031 caltdissect(struct vars 
* v
, 
1033                         chr 
*begin
,                     /* beginning of relevant substring */ 
1034                         chr 
*end
)                       /* end of same */ 
1039 #define  UNTRIED 0                              /* not yet tried at all */ 
1040 #define  TRYING  1                              /* top matched, trying submatches */ 
1041 #define  TRIED   2                              /* top didn't match or submatches 
1046         assert(t
->op 
== '|'); 
1047         if (v
->mem
[t
->retry
] == TRIED
) 
1048                 return caltdissect(v
, t
->right
, begin
, end
); 
1050         MDEBUG(("calt n%d\n", t
->retry
)); 
1051         assert(t
->left 
!= NULL
); 
1053         if (v
->mem
[t
->retry
] == UNTRIED
) 
1055                 d 
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
); 
1058                 if (longest(v
, d
, begin
, end
, (int *) NULL
) != end
) 
1061                         v
->mem
[t
->retry
] = TRIED
; 
1062                         return caltdissect(v
, t
->right
, begin
, end
); 
1065                 MDEBUG(("calt matched\n")); 
1066                 v
->mem
[t
->retry
] = TRYING
; 
1069         er 
= cdissect(v
, t
->left
, begin
, end
); 
1070         if (er 
!= REG_NOMATCH
) 
1073         v
->mem
[t
->retry
] = TRIED
; 
1074         return caltdissect(v
, t
->right
, begin
, end
); 
1079 #include "rege_dfa.c"