]>
git.saurik.com Git - wxWidgets.git/blob - src/regex/regexec.c
2 * re_*exec and friends - match REs
4 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 * Development of this software was funded, in part, by Cray Research Inc.,
7 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8 * Corporation, none of whom are responsible for the results. The author
11 * Redistribution and use in source and binary forms -- with or without
12 * modification -- are permitted for any purpose, provided that
13 * redistributions in source form retain this entire copyright notice and
14 * indicate the origin and nature of any modifications.
16 * I'd appreciate being given credit for this package in the documentation
17 * of software which uses it, but that is not a requirement.
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regexec.c,v 1.23 2003/08/08 21:41:56 momjian Exp $
38 /* lazy-DFA representation */
40 { /* "pointer" to an outarc */
47 unsigned *states
; /* pointer to bitvector */
48 unsigned hash
; /* hash of bitvector */
49 #define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
50 #define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
53 #define STARTER 01 /* the initial state set */
54 #define POSTSTATE 02 /* includes the goal state */
55 #define LOCKED 04 /* locked in cache */
56 #define NOPROGRESS 010 /* zero-progress state set */
57 struct arcp ins
; /* chain of inarcs pointing here */
58 chr
*lastseen
; /* last entered on arrival here */
59 struct sset
**outs
; /* outarc vector indexed by color */
60 struct arcp
*inchain
; /* chain-pointer vector for outarcs */
65 int nssets
; /* size of cache */
66 int nssused
; /* how many entries occupied yet */
67 int nstates
; /* number of states */
68 int ncolors
; /* length of outarc and inchain vectors */
69 int wordsper
; /* length of state-set bitvectors */
70 struct sset
*ssets
; /* state-set cache */
71 unsigned *statesarea
; /* bitvector storage */
72 unsigned *work
; /* pointer to work area within statesarea */
73 struct sset
**outsarea
; /* outarc-vector storage */
74 struct arcp
*incarea
; /* inchain storage */
77 chr
*lastpost
; /* location of last cache-flushed success */
78 chr
*lastnopr
; /* location of last cache-flushed
80 struct sset
*search
; /* replacement-search-pointer memory */
81 int cptsmalloced
; /* were the areas individually malloced? */
82 char *mallocarea
; /* self, or master malloced area, or NULL */
85 #define WORK 1 /* number of work bitvectors needed */
87 /* setup for non-malloc allocation for small cases */
88 #define FEWSTATES 20 /* must be less than UBITS */
93 struct sset ssets
[FEWSTATES
* 2];
94 unsigned statesarea
[FEWSTATES
* 2 + WORK
];
95 struct sset
*outsarea
[FEWSTATES
* 2 * FEWCOLORS
];
96 struct arcp incarea
[FEWSTATES
* 2 * FEWCOLORS
];
99 #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
103 /* internal variables, bundled for easy passing around */
108 int eflags
; /* copies of arguments */
111 rm_detail_t
*details
;
112 chr
*start
; /* start of string */
113 chr
*stop
; /* just past end of string */
114 int err
; /* error code if any (0 none) */
115 regoff_t
*mem
; /* memory vector for backtracking */
116 struct smalldfa dfa1
;
117 struct smalldfa dfa2
;
120 #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
121 #define ISERR() VISERR(v)
122 #define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
123 #define ERR(e) VERR(v, e) /* record an error */
124 #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return
126 #define OFF(p) ((p) - v->start)
127 #define LOFF(p) ((long)OFF(p))
132 * forward declarations
134 /* === regexec.c === */
135 static int find(struct vars
*, struct cnfa
*, struct colormap
*);
136 static int cfind(struct vars
*, struct cnfa
*, struct colormap
*);
137 static int cfindloop(struct vars
*, struct cnfa
*, struct colormap
*, struct dfa
*, struct dfa
*, chr
**);
138 static void zapsubs(regmatch_t
*, size_t);
139 static void zapmem(struct vars
*, struct subre
*);
140 static void subset(struct vars
*, struct subre
*, chr
*, chr
*);
141 static int dissect(struct vars
*, struct subre
*, chr
*, chr
*);
142 static int condissect(struct vars
*, struct subre
*, chr
*, chr
*);
143 static int altdissect(struct vars
*, struct subre
*, chr
*, chr
*);
144 static int cdissect(struct vars
*, struct subre
*, chr
*, chr
*);
145 static int ccondissect(struct vars
*, struct subre
*, chr
*, chr
*);
146 static int crevdissect(struct vars
*, struct subre
*, chr
*, chr
*);
147 static int cbrdissect(struct vars
*, struct subre
*, chr
*, chr
*);
148 static int caltdissect(struct vars
*, struct subre
*, chr
*, chr
*);
150 /* === rege_dfa.c === */
151 static chr
*longest(struct vars
*, struct dfa
*, chr
*, chr
*, int *);
152 static chr
*shortest(struct vars
*, struct dfa
*, chr
*, chr
*, chr
*, chr
**, int *);
153 static chr
*lastcold(struct vars
*, struct dfa
*);
154 static struct dfa
*newdfa(struct vars
*, struct cnfa
*, struct colormap
*, struct smalldfa
*);
155 static void freedfa(struct dfa
*);
156 static unsigned hash(unsigned *, int);
157 static struct sset
*initialize(struct vars
*, struct dfa
*, chr
*);
158 static struct sset
*miss(struct vars
*, struct dfa
*, struct sset
*, pcolor
, chr
*, chr
*);
159 static int lacon(struct vars
*, struct cnfa
*, chr
*, pcolor
);
160 static struct sset
*getvacant(struct vars
*, struct dfa
*, chr
*, chr
*);
161 static struct sset
*pickss(struct vars
*, struct dfa
*, chr
*, chr
*);
165 * regexec - match regular expression
175 return wx_regexec(re
, string
, wx_strlen(string
), &det
, nmatch
, pmatch
, flags
);
178 wx_regexec(regex_t
*re
,
181 rm_detail_t
*details
,
187 register struct vars
*v
= &var
;
193 regmatch_t mat
[LOCALMAT
];
196 regoff_t mem
[LOCALMEM
];
199 if (re
== NULL
|| string
== NULL
|| re
->re_magic
!= REMAGIC
)
201 if (re
->re_csize
!= sizeof(chr
))
206 v
->g
= (struct guts
*) re
->re_guts
;
207 if ((v
->g
->cflags
& REG_EXPECT
) && details
== NULL
)
209 if (v
->g
->info
& REG_UIMPOSSIBLE
)
211 backref
= (v
->g
->info
& REG_UBACKREF
) ? 1 : 0;
213 if (v
->g
->cflags
& REG_NOSUB
)
214 nmatch
= 0; /* override client */
219 if (v
->g
->nsub
+ 1 <= LOCALMAT
)
222 v
->pmatch
= (regmatch_t
*) MALLOC((v
->g
->nsub
+ 1) *
224 if (v
->pmatch
== NULL
)
226 v
->nmatch
= v
->g
->nsub
+ 1;
230 v
->details
= details
;
231 v
->start
= (chr
*) string
;
232 v
->stop
= (chr
*) string
+ len
;
236 /* need retry memory */
237 assert(v
->g
->ntree
>= 0);
238 n
= (size_t) v
->g
->ntree
;
242 v
->mem
= (regoff_t
*) MALLOC(n
* sizeof(regoff_t
));
245 if (v
->pmatch
!= pmatch
&& v
->pmatch
!= mat
)
254 assert(v
->g
->tree
!= NULL
);
256 st
= cfind(v
, &v
->g
->tree
->cnfa
, &v
->g
->cmap
);
258 st
= find(v
, &v
->g
->tree
->cnfa
, &v
->g
->cmap
);
260 /* copy (portion of) match vector over if necessary */
261 if (st
== REG_OKAY
&& v
->pmatch
!= pmatch
&& nmatch
> 0)
263 zapsubs(pmatch
, nmatch
);
264 n
= (nmatch
< v
->nmatch
) ? nmatch
: v
->nmatch
;
265 memcpy(VS(pmatch
), VS(v
->pmatch
), n
* sizeof(regmatch_t
));
269 if (v
->pmatch
!= pmatch
&& v
->pmatch
!= mat
)
271 if (v
->mem
!= NULL
&& v
->mem
!= mem
)
277 * find - find a match for the main NFA (no-complications case)
280 find(struct vars
* v
,
282 struct colormap
* cm
)
289 chr
*open
; /* open and close of range of possible
293 int shorter
= (v
->g
->tree
->flags
& SHORTER
) ? 1 : 0;
295 /* first, a shot with the search RE */
296 s
= newdfa(v
, &v
->g
->search
, cm
, &v
->dfa1
);
297 assert(!(ISERR() && s
!= NULL
));
299 MDEBUG(("\nsearch at %ld\n", LOFF(v
->start
)));
301 close
= shortest(v
, s
, v
->start
, v
->start
, v
->stop
, &cold
, (int *) NULL
);
304 if (v
->g
->cflags
& REG_EXPECT
)
306 assert(v
->details
!= NULL
);
308 v
->details
->rm_extend
.rm_so
= OFF(cold
);
310 v
->details
->rm_extend
.rm_so
= OFF(v
->stop
);
311 v
->details
->rm_extend
.rm_eo
= OFF(v
->stop
); /* unknown */
313 if (close
== NULL
) /* not found */
315 if (v
->nmatch
== 0) /* found, don't need exact location */
318 /* find starting point and match */
319 assert(cold
!= NULL
);
322 MDEBUG(("between %ld and %ld\n", LOFF(open
), LOFF(close
)));
323 d
= newdfa(v
, cnfa
, cm
, &v
->dfa1
);
324 assert(!(ISERR() && d
!= NULL
));
326 for (begin
= open
; begin
<= close
; begin
++)
328 MDEBUG(("\nfind trying at %ld\n", LOFF(begin
)));
330 end
= shortest(v
, d
, begin
, begin
, v
->stop
,
331 (chr
**) NULL
, &hitend
);
333 end
= longest(v
, d
, begin
, v
->stop
, &hitend
);
335 if (hitend
&& cold
== NULL
)
338 break; /* NOTE BREAK OUT */
340 assert(end
!= NULL
); /* search RE succeeded so loop should */
343 /* and pin down details */
344 assert(v
->nmatch
> 0);
345 v
->pmatch
[0].rm_so
= OFF(begin
);
346 v
->pmatch
[0].rm_eo
= OFF(end
);
347 if (v
->g
->cflags
& REG_EXPECT
)
350 v
->details
->rm_extend
.rm_so
= OFF(cold
);
352 v
->details
->rm_extend
.rm_so
= OFF(v
->stop
);
353 v
->details
->rm_extend
.rm_eo
= OFF(v
->stop
); /* unknown */
355 if (v
->nmatch
== 1) /* no need for submatches */
359 zapsubs(v
->pmatch
, v
->nmatch
);
360 return dissect(v
, v
->g
->tree
, begin
, end
);
364 * cfind - find a match for the main NFA (with complications)
367 cfind(struct vars
* v
,
369 struct colormap
* cm
)
376 s
= newdfa(v
, &v
->g
->search
, cm
, &v
->dfa1
);
378 d
= newdfa(v
, cnfa
, cm
, &v
->dfa2
);
386 ret
= cfindloop(v
, cnfa
, cm
, d
, s
, &cold
);
391 if (v
->g
->cflags
& REG_EXPECT
)
393 assert(v
->details
!= NULL
);
395 v
->details
->rm_extend
.rm_so
= OFF(cold
);
397 v
->details
->rm_extend
.rm_so
= OFF(v
->stop
);
398 v
->details
->rm_extend
.rm_eo
= OFF(v
->stop
); /* unknown */
404 * cfindloop - the heart of cfind
407 cfindloop(struct vars
* v
,
409 struct colormap
* cm
,
412 chr
**coldp
) /* where to put coldstart pointer */
417 chr
*open
; /* open and close of range of possible
423 int shorter
= v
->g
->tree
->flags
& SHORTER
;
426 assert(d
!= NULL
&& s
!= NULL
);
431 MDEBUG(("\ncsearch at %ld\n", LOFF(close
)));
432 close
= shortest(v
, s
, close
, close
, v
->stop
, &cold
, (int *) NULL
);
434 break; /* NOTE BREAK */
435 assert(cold
!= NULL
);
438 MDEBUG(("cbetween %ld and %ld\n", LOFF(open
), LOFF(close
)));
439 for (begin
= open
; begin
<= close
; begin
++)
441 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin
)));
447 end
= shortest(v
, d
, begin
, estart
,
448 estop
, (chr
**) NULL
, &hitend
);
450 end
= longest(v
, d
, begin
, estop
,
452 if (hitend
&& cold
== NULL
)
455 break; /* NOTE BREAK OUT */
456 MDEBUG(("tentative end %ld\n", LOFF(end
)));
457 zapsubs(v
->pmatch
, v
->nmatch
);
458 zapmem(v
, v
->g
->tree
);
459 er
= cdissect(v
, v
->g
->tree
, begin
, end
);
464 v
->pmatch
[0].rm_so
= OFF(begin
);
465 v
->pmatch
[0].rm_eo
= OFF(end
);
470 if (er
!= REG_NOMATCH
)
475 if ((shorter
) ? end
== estop
: end
== begin
)
477 /* no point in trying again */
481 /* go around and try again */
488 } while (close
< v
->stop
);
495 * zapsubs - initialize the subexpression matches to "no match"
498 zapsubs(regmatch_t
*p
,
503 for (i
= n
- 1; i
> 0; i
--)
511 * zapmem - initialize the retry memory of a subtree to zeros
514 zapmem(struct vars
* v
,
520 assert(v
->mem
!= NULL
);
521 v
->mem
[t
->retry
] = 0;
524 assert(t
->subno
> 0);
525 v
->pmatch
[t
->subno
].rm_so
= -1;
526 v
->pmatch
[t
->subno
].rm_eo
= -1;
531 if (t
->right
!= NULL
)
536 * subset - set any subexpression relevant to a successful subre
539 subset(struct vars
* v
,
547 if ((size_t) n
>= v
->nmatch
)
550 MDEBUG(("setting %d\n", n
));
551 v
->pmatch
[n
].rm_so
= OFF(begin
);
552 v
->pmatch
[n
].rm_eo
= OFF(end
);
556 * dissect - determine subexpression matches (uncomplicated case)
558 static int /* regexec return code */
559 dissect(struct vars
* v
,
561 chr
*begin
, /* beginning of relevant substring */
562 chr
*end
) /* end of same */
565 MDEBUG(("dissect %ld-%ld\n", LOFF(begin
), LOFF(end
)));
569 case '=': /* terminal node */
570 assert(t
->left
== NULL
&& t
->right
== NULL
);
571 return REG_OKAY
; /* no action, parent did the work */
573 case '|': /* alternation */
574 assert(t
->left
!= NULL
);
575 return altdissect(v
, t
, begin
, end
);
577 case 'b': /* back ref -- shouldn't be calling us! */
580 case '.': /* concatenation */
581 assert(t
->left
!= NULL
&& t
->right
!= NULL
);
582 return condissect(v
, t
, begin
, end
);
584 case '(': /* capturing */
585 assert(t
->left
!= NULL
&& t
->right
== NULL
);
586 assert(t
->subno
> 0);
587 subset(v
, t
, begin
, end
);
588 return dissect(v
, t
->left
, begin
, end
);
597 * condissect - determine concatenation subexpression matches (uncomplicated)
599 static int /* regexec return code */
600 condissect(struct vars
* v
,
602 chr
*begin
, /* beginning of relevant substring */
603 chr
*end
) /* end of same */
609 int shorter
= (t
->left
->flags
& SHORTER
) ? 1 : 0;
610 chr
*stop
= (shorter
) ? end
: begin
;
612 assert(t
->op
== '.');
613 assert(t
->left
!= NULL
&& t
->left
->cnfa
.nstates
> 0);
614 assert(t
->right
!= NULL
&& t
->right
->cnfa
.nstates
> 0);
616 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, &v
->dfa1
);
618 d2
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, &v
->dfa2
);
626 /* pick a tentative midpoint */
628 mid
= shortest(v
, d
, begin
, begin
, end
, (chr
**) NULL
,
631 mid
= longest(v
, d
, begin
, end
, (int *) NULL
);
638 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
)));
640 /* iterate until satisfaction or failure */
641 while (longest(v
, d2
, mid
, end
, (int *) NULL
) != end
)
643 /* that midpoint didn't work, find a new one */
646 /* all possibilities exhausted! */
647 MDEBUG(("no midpoint!\n"));
653 mid
= shortest(v
, d
, begin
, mid
+ 1, end
, (chr
**) NULL
,
656 mid
= longest(v
, d
, begin
, mid
- 1, (int *) NULL
);
659 /* failed to find a new one! */
660 MDEBUG(("failed midpoint!\n"));
665 MDEBUG(("new midpoint %ld\n", LOFF(mid
)));
669 MDEBUG(("successful\n"));
672 i
= dissect(v
, t
->left
, begin
, mid
);
675 return dissect(v
, t
->right
, mid
, end
);
679 * altdissect - determine alternative subexpression matches (uncomplicated)
681 static int /* regexec return code */
682 altdissect(struct vars
* v
,
684 chr
*begin
, /* beginning of relevant substring */
685 chr
*end
) /* end of same */
691 assert(t
->op
== '|');
693 for (i
= 0; t
!= NULL
; t
= t
->right
, i
++)
695 MDEBUG(("trying %dth\n", i
));
696 assert(t
->left
!= NULL
&& t
->left
->cnfa
.nstates
> 0);
697 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, &v
->dfa1
);
700 if (longest(v
, d
, begin
, end
, (int *) NULL
) == end
)
702 MDEBUG(("success\n"));
704 return dissect(v
, t
->left
, begin
, end
);
708 return REG_ASSERT
; /* none of them matched?!? */
712 * cdissect - determine subexpression matches (with complications)
713 * The retry memory stores the offset of the trial midpoint from begin,
714 * plus 1 so that 0 uniquely means "clean slate".
716 static int /* regexec return code */
717 cdissect(struct vars
* v
,
719 chr
*begin
, /* beginning of relevant substring */
720 chr
*end
) /* end of same */
725 MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin
), LOFF(end
), t
->op
));
729 case '=': /* terminal node */
730 assert(t
->left
== NULL
&& t
->right
== NULL
);
731 return REG_OKAY
; /* no action, parent did the work */
733 case '|': /* alternation */
734 assert(t
->left
!= NULL
);
735 return caltdissect(v
, t
, begin
, end
);
737 case 'b': /* back ref -- shouldn't be calling us! */
738 assert(t
->left
== NULL
&& t
->right
== NULL
);
739 return cbrdissect(v
, t
, begin
, end
);
741 case '.': /* concatenation */
742 assert(t
->left
!= NULL
&& t
->right
!= NULL
);
743 return ccondissect(v
, t
, begin
, end
);
745 case '(': /* capturing */
746 assert(t
->left
!= NULL
&& t
->right
== NULL
);
747 assert(t
->subno
> 0);
748 er
= cdissect(v
, t
->left
, begin
, end
);
750 subset(v
, t
, begin
, end
);
760 * ccondissect - concatenation subexpression matches (with complications)
761 * The retry memory stores the offset of the trial midpoint from begin,
762 * plus 1 so that 0 uniquely means "clean slate".
764 static int /* regexec return code */
765 ccondissect(struct vars
* v
,
767 chr
*begin
, /* beginning of relevant substring */
768 chr
*end
) /* end of same */
775 assert(t
->op
== '.');
776 assert(t
->left
!= NULL
&& t
->left
->cnfa
.nstates
> 0);
777 assert(t
->right
!= NULL
&& t
->right
->cnfa
.nstates
> 0);
779 if (t
->left
->flags
& SHORTER
) /* reverse scan */
780 return crevdissect(v
, t
, begin
, end
);
782 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
785 d2
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
791 MDEBUG(("cconcat %d\n", t
->retry
));
793 /* pick a tentative midpoint */
794 if (v
->mem
[t
->retry
] == 0)
796 mid
= longest(v
, d
, begin
, end
, (int *) NULL
);
803 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
)));
804 v
->mem
[t
->retry
] = (mid
- begin
) + 1;
808 mid
= begin
+ (v
->mem
[t
->retry
] - 1);
809 MDEBUG(("working midpoint %ld\n", LOFF(mid
)));
812 /* iterate until satisfaction or failure */
815 /* try this midpoint on for size */
816 er
= cdissect(v
, t
->left
, begin
, mid
);
817 if (er
== REG_OKAY
&&
818 longest(v
, d2
, mid
, end
, (int *) NULL
) == end
&&
819 (er
= cdissect(v
, t
->right
, mid
, end
)) ==
821 break; /* NOTE BREAK OUT */
822 if (er
!= REG_OKAY
&& er
!= REG_NOMATCH
)
829 /* that midpoint didn't work, find a new one */
832 /* all possibilities exhausted */
833 MDEBUG(("%d no midpoint\n", t
->retry
));
838 mid
= longest(v
, d
, begin
, mid
- 1, (int *) NULL
);
841 /* failed to find a new one */
842 MDEBUG(("%d failed midpoint\n", t
->retry
));
847 MDEBUG(("%d: new midpoint %ld\n", t
->retry
, LOFF(mid
)));
848 v
->mem
[t
->retry
] = (mid
- begin
) + 1;
854 MDEBUG(("successful\n"));
861 * crevdissect - determine backref shortest-first subexpression matches
862 * The retry memory stores the offset of the trial midpoint from begin,
863 * plus 1 so that 0 uniquely means "clean slate".
865 static int /* regexec return code */
866 crevdissect(struct vars
* v
,
868 chr
*begin
, /* beginning of relevant substring */
869 chr
*end
) /* end of same */
876 assert(t
->op
== '.');
877 assert(t
->left
!= NULL
&& t
->left
->cnfa
.nstates
> 0);
878 assert(t
->right
!= NULL
&& t
->right
->cnfa
.nstates
> 0);
879 assert(t
->left
->flags
& SHORTER
);
881 /* concatenation -- need to split the substring between parts */
882 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
885 d2
= newdfa(v
, &t
->right
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
891 MDEBUG(("crev %d\n", t
->retry
));
893 /* pick a tentative midpoint */
894 if (v
->mem
[t
->retry
] == 0)
896 mid
= shortest(v
, d
, begin
, begin
, end
, (chr
**) NULL
, (int *) NULL
);
903 MDEBUG(("tentative midpoint %ld\n", LOFF(mid
)));
904 v
->mem
[t
->retry
] = (mid
- begin
) + 1;
908 mid
= begin
+ (v
->mem
[t
->retry
] - 1);
909 MDEBUG(("working midpoint %ld\n", LOFF(mid
)));
912 /* iterate until satisfaction or failure */
915 /* try this midpoint on for size */
916 er
= cdissect(v
, t
->left
, begin
, mid
);
917 if (er
== REG_OKAY
&&
918 longest(v
, d2
, mid
, end
, (int *) NULL
) == end
&&
919 (er
= cdissect(v
, t
->right
, mid
, end
)) ==
921 break; /* NOTE BREAK OUT */
922 if (er
!= REG_OKAY
&& er
!= REG_NOMATCH
)
929 /* that midpoint didn't work, find a new one */
932 /* all possibilities exhausted */
933 MDEBUG(("%d no midpoint\n", t
->retry
));
938 mid
= shortest(v
, d
, begin
, mid
+ 1, end
, (chr
**) NULL
, (int *) NULL
);
941 /* failed to find a new one */
942 MDEBUG(("%d failed midpoint\n", t
->retry
));
947 MDEBUG(("%d: new midpoint %ld\n", t
->retry
, LOFF(mid
)));
948 v
->mem
[t
->retry
] = (mid
- begin
) + 1;
954 MDEBUG(("successful\n"));
961 * cbrdissect - determine backref subexpression matches
963 static int /* regexec return code */
964 cbrdissect(struct vars
* v
,
966 chr
*begin
, /* beginning of relevant substring */
967 chr
*end
) /* end of same */
979 assert(t
->op
== 'b');
981 assert((size_t) n
< v
->nmatch
);
983 MDEBUG(("cbackref n%d %d{%d-%d}\n", t
->retry
, n
, min
, max
));
985 if (v
->pmatch
[n
].rm_so
== -1)
987 paren
= v
->start
+ v
->pmatch
[n
].rm_so
;
988 len
= v
->pmatch
[n
].rm_eo
- v
->pmatch
[n
].rm_so
;
990 /* no room to maneuver -- retries are pointless */
991 if (v
->mem
[t
->retry
])
993 v
->mem
[t
->retry
] = 1;
995 /* special-case zero-length string */
1003 /* and too-short string */
1004 assert(end
>= begin
);
1005 if ((size_t) (end
- begin
) < len
)
1009 /* count occurrences */
1011 for (p
= begin
; p
<= stop
&& (i
< max
|| max
== INFINITY
); p
+= len
)
1013 if ((*v
->g
->compare
) (paren
, p
, len
) != 0)
1017 MDEBUG(("cbackref found %d\n", i
));
1019 /* and sort it out */
1020 if (p
!= end
) /* didn't consume all of it */
1022 if (min
<= i
&& (i
<= max
|| max
== INFINITY
))
1024 return REG_NOMATCH
; /* out of range */
1028 * caltdissect - determine alternative subexpression matches (w. complications)
1030 static int /* regexec return code */
1031 caltdissect(struct vars
* v
,
1033 chr
*begin
, /* beginning of relevant substring */
1034 chr
*end
) /* end of same */
1039 #define UNTRIED 0 /* not yet tried at all */
1040 #define TRYING 1 /* top matched, trying submatches */
1041 #define TRIED 2 /* top didn't match or submatches
1046 assert(t
->op
== '|');
1047 if (v
->mem
[t
->retry
] == TRIED
)
1048 return caltdissect(v
, t
->right
, begin
, end
);
1050 MDEBUG(("calt n%d\n", t
->retry
));
1051 assert(t
->left
!= NULL
);
1053 if (v
->mem
[t
->retry
] == UNTRIED
)
1055 d
= newdfa(v
, &t
->left
->cnfa
, &v
->g
->cmap
, DOMALLOC
);
1058 if (longest(v
, d
, begin
, end
, (int *) NULL
) != end
)
1061 v
->mem
[t
->retry
] = TRIED
;
1062 return caltdissect(v
, t
->right
, begin
, end
);
1065 MDEBUG(("calt matched\n"));
1066 v
->mem
[t
->retry
] = TRYING
;
1069 er
= cdissect(v
, t
->left
, begin
, end
);
1070 if (er
!= REG_NOMATCH
)
1073 v
->mem
[t
->retry
] = TRIED
;
1074 return caltdissect(v
, t
->right
, begin
, end
);
1079 #include "rege_dfa.c"