]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regexec.c
2 * re_*exec and friends - match REs
4 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 * Development of this software was funded, in part, by Cray Research Inc.,
7 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8 * Corporation, none of whom are responsible for the results. The author
11 * Redistribution and use in source and binary forms -- with or without
12 * modification -- are permitted for any purpose, provided that
13 * redistributions in source form retain this entire copyright notice and
14 * indicate the origin and nature of any modifications.
16 * I'd appreciate being given credit for this package in the documentation
17 * of software which uses it, but that is not a requirement.
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regexec.c,v 1.23 2003/08/08 21:41:56 momjian Exp $
38 /* lazy-DFA representation */
40 { /* "pointer" to an outarc */
47 unsigned *states
; /* pointer to bitvector */
48 unsigned hash
; /* hash of bitvector */
49 #define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
50 #define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
53 #define STARTER 01 /* the initial state set */
54 #define POSTSTATE 02 /* includes the goal state */
55 #define LOCKED 04 /* locked in cache */
56 #define NOPROGRESS 010 /* zero-progress state set */
57 struct arcp ins
; /* chain of inarcs pointing here */
58 chr
*lastseen
; /* last entered on arrival here */
59 struct sset
**outs
; /* outarc vector indexed by color */
60 struct arcp
*inchain
; /* chain-pointer vector for outarcs */
65 int nssets
; /* size of cache */
66 int nssused
; /* how many entries occupied yet */
67 int nstates
; /* number of states */
68 int ncolors
; /* length of outarc and inchain vectors */
69 int wordsper
; /* length of state-set bitvectors */
70 struct sset
*ssets
; /* state-set cache */
71 unsigned *statesarea
; /* bitvector storage */
72 unsigned *work
; /* pointer to work area within statesarea */
73 struct sset
**outsarea
; /* outarc-vector storage */
74 struct arcp
*incarea
; /* inchain storage */
77 chr
*lastpost
; /* location of last cache-flushed success */
78 chr
*lastnopr
; /* location of last cache-flushed
80 struct sset
*search
; /* replacement-search-pointer memory */
81 int cptsmalloced
; /* were the areas individually malloced? */
82 char *mallocarea
; /* self, or master malloced area, or NULL */
85 #define WORK 1 /* number of work bitvectors needed */
87 /* setup for non-malloc allocation for small cases */
88 #define FEWSTATES 20 /* must be less than UBITS */
93 struct sset ssets
[FEWSTATES
* 2];
94 unsigned statesarea
[FEWSTATES
* 2 + WORK
];
95 struct sset
*outsarea
[FEWSTATES
* 2 * FEWCOLORS
];
96 struct arcp incarea
[FEWSTATES
* 2 * FEWCOLORS
];
99 #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
103 /* internal variables, bundled for easy passing around */
108 int eflags
; /* copies of arguments */
111 rm_detail_t
*details
;
112 chr
*start
; /* start of string */
113 chr
*stop
; /* just past end of string */
114 int err
; /* error code if any (0 none) */
115 regoff_t
*mem
; /* memory vector for backtracking */
116 struct smalldfa dfa1
;
117 struct smalldfa dfa2
;
120 #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
121 #define ISERR() VISERR(v)
122 #define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
123 #define ERR(e) VERR(v, e) /* record an error */
124 #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return
126 #define OFF(p) ((p) - v->start)
127 #define LOFF(p) ((long)OFF(p))
132 * forward declarations
134 /* === regexec.c === */
135 static int find(struct vars
*, struct cnfa
*, struct colormap
*);
136 static int cfind(struct vars
*, struct cnfa
*, struct colormap
*);
137 static int cfindloop(struct vars
*, struct cnfa
*, struct colormap
*, struct dfa
*, struct dfa
*, chr
**);
138 static void zapsubs(regmatch_t
*, size_t);
139 static void zapmem(struct vars
*, struct subre
*);
140 static void subset(struct vars
*, struct subre
*, chr
*, chr
*);
141 static int dissect(struct vars
*, struct subre
*, chr
*, chr
*);
142 static int condissect(struct vars
*, struct subre
*, chr
*, chr
*);
143 static int altdissect(struct vars
*, struct subre
*, chr
*, chr
*);
144 static int cdissect(struct vars
*, struct subre
*, chr
*, chr
*);
145 static int ccondissect(struct vars
*, struct subre
*, chr
*, chr
*);
146 static int crevdissect(struct vars
*, struct subre
*, chr
*, chr
*);
147 static int cbrdissect(struct vars
*, struct subre
*, chr
*, chr
*);
148 static int caltdissect(struct vars
*, struct subre
*, chr
*, chr
*);
150 /* === rege_dfa.c === */
151 static chr
*longest(struct vars
*, struct dfa
*, chr
*, chr
*, int *);
152 static chr
*shortest(struct vars
*, struct dfa
*, chr
*, chr
*, chr
*, chr
**, int *);
153 static chr
*lastcold(struct vars
*, struct dfa
*);
154 static struct dfa
*newdfa(struct vars
*, struct cnfa
*, struct colormap
*, struct smalldfa
*);
155 static void freedfa(struct dfa
*);
156 static unsigned hash(unsigned *, int);
157 static struct sset
*initialize(struct vars
*, struct dfa
*, chr
*);
158 static struct sset
*miss(struct vars
*, struct dfa
*, struct sset
*, pcolor
, chr
*, chr
*);
159 static int lacon(struct vars
*, struct cnfa
*, chr
*, pcolor
);
160 static struct sset
*getvacant(struct vars
*, struct dfa
*, chr
*, chr
*);
161 static struct sset
*pickss(struct vars
*, struct dfa
*, chr
*, chr
*);
165 * regexec - match regular expression
176 chr
* s2
= (chr
*) string
;
178 if (string
&& *string
)
183 nLen
= ((s2
- string
) / sizeof(chr
));
185 return wx_regexec(re
, string
, nLen
, &det
, nmatch
, pmatch
, flags
);
188 wx_regexec(regex_t
*re
,
191 rm_detail_t
*details
,
197 register struct vars
*v
= &var
;
203 regmatch_t mat
[LOCALMAT
];
206 regoff_t mem
[LOCALMEM
];
209 if (re
== NULL
|| string
== NULL
|| re
->re_magic
!= REMAGIC
)
211 if (re
->re_csize
!= sizeof(chr
))
216 v
->g
= (struct guts
*) re
->re_guts
;
217 if ((v
->g
->cflags
& REG_EXPECT
) && details
== NULL
)
219 if (v
->g
->info
& REG_UIMPOSSIBLE
)
221 backref
= (v
->g
->info
& REG_UBACKREF
) ? 1 : 0;
223 if (v
->g
->cflags
& REG_NOSUB
)
224 nmatch
= 0; /* override client */
229 if (v
->g
->nsub
+ 1 <= LOCALMAT
)
232 v
->pmatch
= (regmatch_t
*) MALLOC((v
->g
->nsub
+ 1) *
234 if (v
->pmatch
== NULL
)
236 v
->nmatch
= v
->g
->nsub
+ 1;
240 v
->details
= details
;
241 v
->start
= (chr
*) string
;
242 v
->stop
= (chr
*) string
+ len
;
246 /* need retry memory */
247 assert(v
->g
->ntree
>= 0);
248 n
= (size_t) v
->g
->ntree
;
252 v
->mem
= (regoff_t
*) MALLOC(n
* sizeof(regoff_t
));
255 if (v
->pmatch
!= pmatch
&& v
->pmatch
!= mat
)
264 assert(v
->g
->tree
!= NULL
);
266 st
= cfind(v
, &v
->g
->tree
->cnfa
, &v
->g
->cmap
);
268 st
= find(v
, &v
->g
->tree
->cnfa
, &v
->g
->cmap
);
270 /* copy (portion of) match vector over if necessary */
271 if (st
== REG_OKAY
&& v
->pmatch
!= pmatch
&& nmatch
> 0)
273 zapsubs(pmatch
, nmatch
);
274 n
= (nmatch
< v
->nmatch
) ? nmatch
: v
->nmatch
;
275 memcpy(VS(pmatch
), VS(v
->pmatch
), n
* sizeof(regmatch_t
));
279 if (v
->pmatch
!= pmatch
&& v
->pmatch
!= mat
)
281 if (v
->mem
!= NULL
&& v
->mem
!= mem
)
287 * find - find a match for the main NFA (no-complications case)
290 find(struct vars
* v
,
292 struct colormap
* cm
)
299 chr
*open
; /* open and close of range of possible
303 int shorter
= (v
->g
->tree
->flags
& SHORTER
) ? 1 : 0;
305 /* first, a shot with the search RE */
306 s
= newdfa(v
, &v
->g
->search
, cm
, &v
->dfa1
);
307 assert(!(ISERR() && s
!= NULL
));
309 MDEBUG(("\nsearch at %ld\n", LOFF(v
->start
)));
311 close
= shortest(v
, s
, v
->start
, v
->start
, v
->stop
, &cold
, (int *) NULL
);
314 if (v
->g
->cflags
& REG_EXPECT
)
316 assert(v
->details
!= NULL
);
318 v
->details
->rm_extend
.rm_so
= OFF(cold
);
320 v
->details
->rm_extend
.rm_so
= OFF(v
->stop
);
321 v
->details
->rm_extend
.rm_eo
= OFF(v
->stop
); /* unknown */
323 if (close
== NULL
) /* not found */
325 if (v
->nmatch
== 0) /* found, don't need exact location */
328 /* find starting point and match */
329 assert(cold
!= NULL
);
332 MDEBUG(("between %ld and %ld\n", LOFF(open
), LOFF(close
)));
333 d
= newdfa(v
, cnfa
, cm
, &v
->dfa1
);
334 assert(!(ISERR() && d
!= NULL
));
336 for (begin
= open
; begin
<= close
; begin
++)
338 MDEBUG(("\nfind trying at %ld\n", LOFF(begin
)));
340 end
= shortest(v
, d
, begin
, begin
, v
->stop
,
341 (chr
**) NULL
, &hitend
);
343 end
= longest(v
, d
, begin
, v
->stop
, &hitend
);
345 if (hitend
&& cold
== NULL
)
348 break; /* NOTE BREAK OUT */
350 assert(end
!= NULL
); /* search RE succeeded so loop should */
353 /* and pin down details */
354 assert(v
->nmatch
> 0);
355 v
->pmatch
[0].rm_so
= OFF(begin
);
356 v
->pmatch
[0].rm_eo
= OFF(end
);
357 if (v
->g
->cflags
& REG_EXPECT
)
360 v
->details
->rm_extend
.rm_so
= OFF(cold
);
362 v
->details
->rm_extend
.rm_so
= OFF(v
->stop
);
363 v
->details
->rm_extend
.rm_eo
= OFF(v
->stop
); /* unknown */
365 if (v
->nmatch
== 1) /* no need for submatches */
369 zapsubs(v
->pmatch
, v
->nmatch
);
370 return dissect(v
, v
->g
->tree
, begin
, end
);
374 * cfind - find a match for the main NFA (with complications)
377 cfind(struct vars
* v
,
379 struct colormap
* cm
)
386 s
= newdfa(v
, &v
->g
->search
, cm
, &v
->dfa1
);
388 d
= newdfa(v
, cnfa
, cm
, &v
->dfa2
);
396 ret
= cfindloop(v
, cnfa
, cm
, d
, s
, &cold
);
401 if (v
->g
->cflags
& REG_EXPECT
)
403 assert(v
->details
!= NULL
);
405 v
->details
->rm_extend
.rm_so
= OFF(cold
);
407 v
->details
->rm_extend
.rm_so
= OFF(v
->stop
);
408 v
->details
->rm_extend
.rm_eo
= OFF(v
->stop
); /* unknown */
414 * cfindloop - the heart of cfind
417 cfindloop(struct vars
* v
,
419 struct colormap
* cm
,
422 chr
**coldp
) /* where to put coldstart pointer */
427 chr
*open
; /* open and close of range of possible
433 int shorter
= v
->g
->tree
->flags
& SHORTER
;
436 assert(d
!= NULL
&& s
!= NULL
);
441 MDEBUG(("\ncsearch at %ld\n", LOFF(close
)));
442 close
= shortest(v
, s
, close
, close
, v
->stop
, &cold
, (int *) NULL
);
444 break; /* NOTE BREAK */
445 assert(cold
!= NULL
);
448 MDEBUG(("cbetween %ld and %ld\n", LOFF(open
), LOFF(close
)));
449 for (begin
= open
; begin
<= close
; begin
++)
451 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin
)));
457 end
= shortest(v
, d
, begin
, estart
,
458 estop
, (chr
**) NULL
, &hitend
);
460 end
= longest(v
, d
, begin
, estop
,
462 if (hitend
&& cold
== NULL
)
465 break; /* NOTE BREAK OUT */
466 MDEBUG(("tentative end %ld\n", LOFF(end
)));
467 zapsubs(v
->pmatch
, v
->nmatch
);
468 zapmem(v
, v
->g
->tree
);
469 er
= cdissect(v
, v
->g
->tree
, begin
, end
);
474 v
->pmatch
[0].rm_so
= OFF(begin
);
475 v
->pmatch
[0].rm_eo
= OFF(end
);
480 if (er
!= REG_NOMATCH
)
485 if ((shorter
) ? end
== estop
: end
== begin
)
487 /* no point in trying again */
491 /* go around and try again */
498 } while (close
< v
->stop
);
505 * zapsubs - initialize the subexpression matches to "no match"
508 zapsubs(regmatch_t
*p
,
513 for (i
= n
- 1; i
> 0; i
--)
521 * zapmem - initialize the retry memory of a subtree to zeros
524 zapmem(struct vars
* v
,
530 assert(v
->mem
!= NULL
);
531 v
->mem
[t
->retry
] = 0;
534 assert(t
->subno
> 0);
535 v
->pmatch
[t
->subno
].rm_so
= -1;
536 v
->pmatch
[t
->subno
].rm_eo
= -1;
541 if (t
->right
!= NULL
)
546 * subset - set any subexpression relevant to a successful subre
549 subset(struct vars
* v
,
557 if ((size_t) n
>= v
->nmatch
)
560 MDEBUG(("setting %d\n", n
));
561 v
->pmatch
[n
].rm_so
= OFF(begin
);
562 v
->pmatch
[n
].rm_eo
= OFF(end
);
566 * dissect - determine subexpression matches (uncomplicated case)
568 static int /* regexec return code */
569 dissect(struct vars
* v
,
571 chr
*begin
, /* beginning of relevant substring */
572 chr
*end
) /* end of same */
575 MDEBUG(("dissect %ld-%ld\n", LOFF(begin
), LOFF(end
)));
579 case '=': /* terminal node */
580 assert(t
->left
== NULL
&& t
->right
== NULL
);
581 return REG_OKAY
; /* no action, parent did the work */
583 case '|': /* alternation */
584 assert(t
->left
!= NULL
);
585 return altdissect(v
, t
, begin
, end
);
587 case 'b': /* back ref -- shouldn't be calling us! */
590 case '.': /* concatenation */
591 assert(t
->left
!= NULL
&& t
->right
!= NULL
);
592 return condissect(v
, t
, begin
, end
);
594 case '(': /* capturing */
595 assert(t
->left
!= NULL
&& t
->right
== NULL
);
596 assert(t
->subno
> 0);
597 subset(v
, t
, begin
, end
);
598 return dissect(v
, t
->left
, begin
, end
);
607 * condissect - determine concatenation subexpression matches (uncomplicated)
609 static int /* regexec return code */
610 condissect(struct vars
* v
,
612 chr
*begin
, /* beginning of relevant substring */
613 chr
*end
) /* end of same */
619 int shorter
= (t
->left
->flags
& SHORTER
) ? 1 : 0;
620 chr
*stop
= (shorter
) ? end
: begin
;
622 assert(t
->op
== '.');
623 assert(t
->left
!= NULL
&& t
->left
->cnfa
.nstates
> 0);
624 assert(t
->right
!= NULL
&& t
->right
->cnfa
.nstates
> 0);
626 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, &v
->dfa1
);
628 d2
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, &v
->dfa2
);
636 /* pick a tentative midpoint */
638 mid
= shortest(v
, d
, begin
, begin
, end
, (chr
**) NULL
,
641 mid
= longest(v
, d
, begin
, end
, (int *) NULL
);
648 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
)));
650 /* iterate until satisfaction or failure */
651 while (longest(v
, d2
, mid
, end
, (int *) NULL
) != end
)
653 /* that midpoint didn't work, find a new one */
656 /* all possibilities exhausted! */
657 MDEBUG(("no midpoint!\n"));
663 mid
= shortest(v
, d
, begin
, mid
+ 1, end
, (chr
**) NULL
,
666 mid
= longest(v
, d
, begin
, mid
- 1, (int *) NULL
);
669 /* failed to find a new one! */
670 MDEBUG(("failed midpoint!\n"));
675 MDEBUG(("new midpoint %ld\n", LOFF(mid
)));
679 MDEBUG(("successful\n"));
682 i
= dissect(v
, t
->left
, begin
, mid
);
685 return dissect(v
, t
->right
, mid
, end
);
689 * altdissect - determine alternative subexpression matches (uncomplicated)
691 static int /* regexec return code */
692 altdissect(struct vars
* v
,
694 chr
*begin
, /* beginning of relevant substring */
695 chr
*end
) /* end of same */
701 assert(t
->op
== '|');
703 for (i
= 0; t
!= NULL
; t
= t
->right
, i
++)
705 MDEBUG(("trying %dth\n", i
));
706 assert(t
->left
!= NULL
&& t
->left
->cnfa
.nstates
> 0);
707 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, &v
->dfa1
);
710 if (longest(v
, d
, begin
, end
, (int *) NULL
) == end
)
712 MDEBUG(("success\n"));
714 return dissect(v
, t
->left
, begin
, end
);
718 return REG_ASSERT
; /* none of them matched?!? */
722 * cdissect - determine subexpression matches (with complications)
723 * The retry memory stores the offset of the trial midpoint from begin,
724 * plus 1 so that 0 uniquely means "clean slate".
726 static int /* regexec return code */
727 cdissect(struct vars
* v
,
729 chr
*begin
, /* beginning of relevant substring */
730 chr
*end
) /* end of same */
735 MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin
), LOFF(end
), t
->op
));
739 case '=': /* terminal node */
740 assert(t
->left
== NULL
&& t
->right
== NULL
);
741 return REG_OKAY
; /* no action, parent did the work */
743 case '|': /* alternation */
744 assert(t
->left
!= NULL
);
745 return caltdissect(v
, t
, begin
, end
);
747 case 'b': /* back ref -- shouldn't be calling us! */
748 assert(t
->left
== NULL
&& t
->right
== NULL
);
749 return cbrdissect(v
, t
, begin
, end
);
751 case '.': /* concatenation */
752 assert(t
->left
!= NULL
&& t
->right
!= NULL
);
753 return ccondissect(v
, t
, begin
, end
);
755 case '(': /* capturing */
756 assert(t
->left
!= NULL
&& t
->right
== NULL
);
757 assert(t
->subno
> 0);
758 er
= cdissect(v
, t
->left
, begin
, end
);
760 subset(v
, t
, begin
, end
);
770 * ccondissect - concatenation subexpression matches (with complications)
771 * The retry memory stores the offset of the trial midpoint from begin,
772 * plus 1 so that 0 uniquely means "clean slate".
774 static int /* regexec return code */
775 ccondissect(struct vars
* v
,
777 chr
*begin
, /* beginning of relevant substring */
778 chr
*end
) /* end of same */
785 assert(t
->op
== '.');
786 assert(t
->left
!= NULL
&& t
->left
->cnfa
.nstates
> 0);
787 assert(t
->right
!= NULL
&& t
->right
->cnfa
.nstates
> 0);
789 if (t
->left
->flags
& SHORTER
) /* reverse scan */
790 return crevdissect(v
, t
, begin
, end
);
792 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
795 d2
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
801 MDEBUG(("cconcat %d\n", t
->retry
));
803 /* pick a tentative midpoint */
804 if (v
->mem
[t
->retry
] == 0)
806 mid
= longest(v
, d
, begin
, end
, (int *) NULL
);
813 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
)));
814 v
->mem
[t
->retry
] = (mid
- begin
) + 1;
818 mid
= begin
+ (v
->mem
[t
->retry
] - 1);
819 MDEBUG(("working midpoint %ld\n", LOFF(mid
)));
822 /* iterate until satisfaction or failure */
825 /* try this midpoint on for size */
826 er
= cdissect(v
, t
->left
, begin
, mid
);
827 if (er
== REG_OKAY
&&
828 longest(v
, d2
, mid
, end
, (int *) NULL
) == end
&&
829 (er
= cdissect(v
, t
->right
, mid
, end
)) ==
831 break; /* NOTE BREAK OUT */
832 if (er
!= REG_OKAY
&& er
!= REG_NOMATCH
)
839 /* that midpoint didn't work, find a new one */
842 /* all possibilities exhausted */
843 MDEBUG(("%d no midpoint\n", t
->retry
));
848 mid
= longest(v
, d
, begin
, mid
- 1, (int *) NULL
);
851 /* failed to find a new one */
852 MDEBUG(("%d failed midpoint\n", t
->retry
));
857 MDEBUG(("%d: new midpoint %ld\n", t
->retry
, LOFF(mid
)));
858 v
->mem
[t
->retry
] = (mid
- begin
) + 1;
864 MDEBUG(("successful\n"));
871 * crevdissect - determine backref shortest-first subexpression matches
872 * The retry memory stores the offset of the trial midpoint from begin,
873 * plus 1 so that 0 uniquely means "clean slate".
875 static int /* regexec return code */
876 crevdissect(struct vars
* v
,
878 chr
*begin
, /* beginning of relevant substring */
879 chr
*end
) /* end of same */
886 assert(t
->op
== '.');
887 assert(t
->left
!= NULL
&& t
->left
->cnfa
.nstates
> 0);
888 assert(t
->right
!= NULL
&& t
->right
->cnfa
.nstates
> 0);
889 assert(t
->left
->flags
& SHORTER
);
891 /* concatenation -- need to split the substring between parts */
892 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
895 d2
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
901 MDEBUG(("crev %d\n", t
->retry
));
903 /* pick a tentative midpoint */
904 if (v
->mem
[t
->retry
] == 0)
906 mid
= shortest(v
, d
, begin
, begin
, end
, (chr
**) NULL
, (int *) NULL
);
913 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
)));
914 v
->mem
[t
->retry
] = (mid
- begin
) + 1;
918 mid
= begin
+ (v
->mem
[t
->retry
] - 1);
919 MDEBUG(("working midpoint %ld\n", LOFF(mid
)));
922 /* iterate until satisfaction or failure */
925 /* try this midpoint on for size */
926 er
= cdissect(v
, t
->left
, begin
, mid
);
927 if (er
== REG_OKAY
&&
928 longest(v
, d2
, mid
, end
, (int *) NULL
) == end
&&
929 (er
= cdissect(v
, t
->right
, mid
, end
)) ==
931 break; /* NOTE BREAK OUT */
932 if (er
!= REG_OKAY
&& er
!= REG_NOMATCH
)
939 /* that midpoint didn't work, find a new one */
942 /* all possibilities exhausted */
943 MDEBUG(("%d no midpoint\n", t
->retry
));
948 mid
= shortest(v
, d
, begin
, mid
+ 1, end
, (chr
**) NULL
, (int *) NULL
);
951 /* failed to find a new one */
952 MDEBUG(("%d failed midpoint\n", t
->retry
));
957 MDEBUG(("%d: new midpoint %ld\n", t
->retry
, LOFF(mid
)));
958 v
->mem
[t
->retry
] = (mid
- begin
) + 1;
964 MDEBUG(("successful\n"));
971 * cbrdissect - determine backref subexpression matches
973 static int /* regexec return code */
974 cbrdissect(struct vars
* v
,
976 chr
*begin
, /* beginning of relevant substring */
977 chr
*end
) /* end of same */
989 assert(t
->op
== 'b');
991 assert((size_t) n
< v
->nmatch
);
993 MDEBUG(("cbackref n%d %d{%d-%d}\n", t
->retry
, n
, min
, max
));
995 if (v
->pmatch
[n
].rm_so
== -1)
997 paren
= v
->start
+ v
->pmatch
[n
].rm_so
;
998 len
= v
->pmatch
[n
].rm_eo
- v
->pmatch
[n
].rm_so
;
1000 /* no room to maneuver -- retries are pointless */
1001 if (v
->mem
[t
->retry
])
1003 v
->mem
[t
->retry
] = 1;
1005 /* special-case zero-length string */
1013 /* and too-short string */
1014 assert(end
>= begin
);
1015 if ((size_t) (end
- begin
) < len
)
1019 /* count occurrences */
1021 for (p
= begin
; p
<= stop
&& (i
< max
|| max
== INFINITY
); p
+= len
)
1023 if ((*v
->g
->compare
) (paren
, p
, len
) != 0)
1027 MDEBUG(("cbackref found %d\n", i
));
1029 /* and sort it out */
1030 if (p
!= end
) /* didn't consume all of it */
1032 if (min
<= i
&& (i
<= max
|| max
== INFINITY
))
1034 return REG_NOMATCH
; /* out of range */
1038 * caltdissect - determine alternative subexpression matches (w. complications)
1040 static int /* regexec return code */
1041 caltdissect(struct vars
* v
,
1043 chr
*begin
, /* beginning of relevant substring */
1044 chr
*end
) /* end of same */
1049 #define UNTRIED 0 /* not yet tried at all */
1050 #define TRYING 1 /* top matched, trying submatches */
1051 #define TRIED 2 /* top didn't match or submatches
1056 assert(t
->op
== '|');
1057 if (v
->mem
[t
->retry
] == TRIED
)
1058 return caltdissect(v
, t
->right
, begin
, end
);
1060 MDEBUG(("calt n%d\n", t
->retry
));
1061 assert(t
->left
!= NULL
);
1063 if (v
->mem
[t
->retry
] == UNTRIED
)
1065 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
1068 if (longest(v
, d
, begin
, end
, (int *) NULL
) != end
)
1071 v
->mem
[t
->retry
] = TRIED
;
1072 return caltdissect(v
, t
->right
, begin
, end
);
1075 MDEBUG(("calt matched\n"));
1076 v
->mem
[t
->retry
] = TRYING
;
1079 er
= cdissect(v
, t
->left
, begin
, end
);
1080 if (er
!= REG_NOMATCH
)
1083 v
->mem
[t
->retry
] = TRIED
;
1084 return caltdissect(v
, t
->right
, begin
, end
);
1089 #include "rege_dfa.c"