]> git.saurik.com Git - wxWidgets.git/blob - src/regex/regexec.c
added condition for DARWIN (thanks to Steve Hartwell)
[wxWidgets.git] / src / regex / regexec.c
1 /*
2 * re_*exec and friends - match REs
3 *
4 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
5 *
6 * Development of this software was funded, in part, by Cray Research Inc.,
7 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
8 * Corporation, none of whom are responsible for the results. The author
9 * thanks all of them.
10 *
11 * Redistribution and use in source and binary forms -- with or without
12 * modification -- are permitted for any purpose, provided that
13 * redistributions in source form retain this entire copyright notice and
14 * indicate the origin and nature of any modifications.
15 *
16 * I'd appreciate being given credit for this package in the documentation
17 * of software which uses it, but that is not a requirement.
18 *
19 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
20 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
21 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * $Header: /projects/cvsroot/pgsql-server/src/backend/regex/regexec.c,v 1.23 2003/08/08 21:41:56 momjian Exp $
31 *
32 */
33
34 #include "regguts.h"
35
36
37
38 /* lazy-DFA representation */
39 struct arcp
40 { /* "pointer" to an outarc */
41 struct sset *ss;
42 color co;
43 };
44
45 struct sset
46 { /* state set */
47 unsigned *states; /* pointer to bitvector */
48 unsigned hash; /* hash of bitvector */
49 #define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
50 #define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51 memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
52 int flags;
53 #define STARTER 01 /* the initial state set */
54 #define POSTSTATE 02 /* includes the goal state */
55 #define LOCKED 04 /* locked in cache */
56 #define NOPROGRESS 010 /* zero-progress state set */
57 struct arcp ins; /* chain of inarcs pointing here */
58 chr *lastseen; /* last entered on arrival here */
59 struct sset **outs; /* outarc vector indexed by color */
60 struct arcp *inchain; /* chain-pointer vector for outarcs */
61 };
62
63 struct dfa
64 {
65 int nssets; /* size of cache */
66 int nssused; /* how many entries occupied yet */
67 int nstates; /* number of states */
68 int ncolors; /* length of outarc and inchain vectors */
69 int wordsper; /* length of state-set bitvectors */
70 struct sset *ssets; /* state-set cache */
71 unsigned *statesarea; /* bitvector storage */
72 unsigned *work; /* pointer to work area within statesarea */
73 struct sset **outsarea; /* outarc-vector storage */
74 struct arcp *incarea; /* inchain storage */
75 struct cnfa *cnfa;
76 struct colormap *cm;
77 chr *lastpost; /* location of last cache-flushed success */
78 chr *lastnopr; /* location of last cache-flushed
79 * NOPROGRESS */
80 struct sset *search; /* replacement-search-pointer memory */
81 int cptsmalloced; /* were the areas individually malloced? */
82 char *mallocarea; /* self, or master malloced area, or NULL */
83 };
84
85 #define WORK 1 /* number of work bitvectors needed */
86
87 /* setup for non-malloc allocation for small cases */
88 #define FEWSTATES 20 /* must be less than UBITS */
89 #define FEWCOLORS 15
90 struct smalldfa
91 {
92 struct dfa dfa;
93 struct sset ssets[FEWSTATES * 2];
94 unsigned statesarea[FEWSTATES * 2 + WORK];
95 struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
96 struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
97 };
98
99 #define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
100
101
102
103 /* internal variables, bundled for easy passing around */
104 struct vars
105 {
106 regex_t *re;
107 struct guts *g;
108 int eflags; /* copies of arguments */
109 size_t nmatch;
110 regmatch_t *pmatch;
111 rm_detail_t *details;
112 chr *start; /* start of string */
113 chr *stop; /* just past end of string */
114 int err; /* error code if any (0 none) */
115 regoff_t *mem; /* memory vector for backtracking */
116 struct smalldfa dfa1;
117 struct smalldfa dfa2;
118 };
119
120 #define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
121 #define ISERR() VISERR(v)
122 #define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
123 #define ERR(e) VERR(v, e) /* record an error */
124 #define NOERR() {if (ISERR()) return v->err;} /* if error seen, return
125 * it */
126 #define OFF(p) ((p) - v->start)
127 #define LOFF(p) ((long)OFF(p))
128
129
130
131 /*
132 * forward declarations
133 */
134 /* === regexec.c === */
135 static int find(struct vars *, struct cnfa *, struct colormap *);
136 static int cfind(struct vars *, struct cnfa *, struct colormap *);
137 static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
138 static void zapsubs(regmatch_t *, size_t);
139 static void zapmem(struct vars *, struct subre *);
140 static void subset(struct vars *, struct subre *, chr *, chr *);
141 static int dissect(struct vars *, struct subre *, chr *, chr *);
142 static int condissect(struct vars *, struct subre *, chr *, chr *);
143 static int altdissect(struct vars *, struct subre *, chr *, chr *);
144 static int cdissect(struct vars *, struct subre *, chr *, chr *);
145 static int ccondissect(struct vars *, struct subre *, chr *, chr *);
146 static int crevdissect(struct vars *, struct subre *, chr *, chr *);
147 static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
148 static int caltdissect(struct vars *, struct subre *, chr *, chr *);
149
150 /* === rege_dfa.c === */
151 static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
152 static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
153 static chr *lastcold(struct vars *, struct dfa *);
154 static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
155 static void freedfa(struct dfa *);
156 static unsigned hash(unsigned *, int);
157 static struct sset *initialize(struct vars *, struct dfa *, chr *);
158 static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *);
159 static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
160 static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
161 static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
162
163
164 /*
165 * regexec - match regular expression
166 */
167 int
168 regexec(regex_t *re,
169 const chr *string,
170 size_t nmatch,
171 regmatch_t pmatch[],
172 int flags)
173 {
174 rm_detail_t det;
175 return wx_regexec(re, string, wx_strlen(string), &det, nmatch, pmatch, flags);
176 }
177 int
178 wx_regexec(regex_t *re,
179 const chr *string,
180 size_t len,
181 rm_detail_t *details,
182 size_t nmatch,
183 regmatch_t pmatch[],
184 int flags)
185 {
186 struct vars var;
187 register struct vars *v = &var;
188 int st;
189 size_t n;
190 int backref;
191
192 #define LOCALMAT 20
193 regmatch_t mat[LOCALMAT];
194
195 #define LOCALMEM 40
196 regoff_t mem[LOCALMEM];
197
198 /* sanity checks */
199 if (re == NULL || string == NULL || re->re_magic != REMAGIC)
200 return REG_INVARG;
201 if (re->re_csize != sizeof(chr))
202 return REG_MIXED;
203
204 /* setup */
205 v->re = re;
206 v->g = (struct guts *) re->re_guts;
207 if ((v->g->cflags & REG_EXPECT) && details == NULL)
208 return REG_INVARG;
209 if (v->g->info & REG_UIMPOSSIBLE)
210 return REG_NOMATCH;
211 backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
212 v->eflags = flags;
213 if (v->g->cflags & REG_NOSUB)
214 nmatch = 0; /* override client */
215 v->nmatch = nmatch;
216 if (backref)
217 {
218 /* need work area */
219 if (v->g->nsub + 1 <= LOCALMAT)
220 v->pmatch = mat;
221 else
222 v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
223 sizeof(regmatch_t));
224 if (v->pmatch == NULL)
225 return REG_ESPACE;
226 v->nmatch = v->g->nsub + 1;
227 }
228 else
229 v->pmatch = pmatch;
230 v->details = details;
231 v->start = (chr *) string;
232 v->stop = (chr *) string + len;
233 v->err = 0;
234 if (backref)
235 {
236 /* need retry memory */
237 assert(v->g->ntree >= 0);
238 n = (size_t) v->g->ntree;
239 if (n <= LOCALMEM)
240 v->mem = mem;
241 else
242 v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t));
243 if (v->mem == NULL)
244 {
245 if (v->pmatch != pmatch && v->pmatch != mat)
246 FREE(v->pmatch);
247 return REG_ESPACE;
248 }
249 }
250 else
251 v->mem = NULL;
252
253 /* do it */
254 assert(v->g->tree != NULL);
255 if (backref)
256 st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
257 else
258 st = find(v, &v->g->tree->cnfa, &v->g->cmap);
259
260 /* copy (portion of) match vector over if necessary */
261 if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
262 {
263 zapsubs(pmatch, nmatch);
264 n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
265 memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
266 }
267
268 /* clean up */
269 if (v->pmatch != pmatch && v->pmatch != mat)
270 FREE(v->pmatch);
271 if (v->mem != NULL && v->mem != mem)
272 FREE(v->mem);
273 return st;
274 }
275
276 /*
277 * find - find a match for the main NFA (no-complications case)
278 */
279 static int
280 find(struct vars * v,
281 struct cnfa * cnfa,
282 struct colormap * cm)
283 {
284 struct dfa *s;
285 struct dfa *d;
286 chr *begin;
287 chr *end = NULL;
288 chr *cold;
289 chr *open; /* open and close of range of possible
290 * starts */
291 chr *close;
292 int hitend;
293 int shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
294
295 /* first, a shot with the search RE */
296 s = newdfa(v, &v->g->search, cm, &v->dfa1);
297 assert(!(ISERR() && s != NULL));
298 NOERR();
299 MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
300 cold = NULL;
301 close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *) NULL);
302 freedfa(s);
303 NOERR();
304 if (v->g->cflags & REG_EXPECT)
305 {
306 assert(v->details != NULL);
307 if (cold != NULL)
308 v->details->rm_extend.rm_so = OFF(cold);
309 else
310 v->details->rm_extend.rm_so = OFF(v->stop);
311 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
312 }
313 if (close == NULL) /* not found */
314 return REG_NOMATCH;
315 if (v->nmatch == 0) /* found, don't need exact location */
316 return REG_OKAY;
317
318 /* find starting point and match */
319 assert(cold != NULL);
320 open = cold;
321 cold = NULL;
322 MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
323 d = newdfa(v, cnfa, cm, &v->dfa1);
324 assert(!(ISERR() && d != NULL));
325 NOERR();
326 for (begin = open; begin <= close; begin++)
327 {
328 MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
329 if (shorter)
330 end = shortest(v, d, begin, begin, v->stop,
331 (chr **) NULL, &hitend);
332 else
333 end = longest(v, d, begin, v->stop, &hitend);
334 NOERR();
335 if (hitend && cold == NULL)
336 cold = begin;
337 if (end != NULL)
338 break; /* NOTE BREAK OUT */
339 }
340 assert(end != NULL); /* search RE succeeded so loop should */
341 freedfa(d);
342
343 /* and pin down details */
344 assert(v->nmatch > 0);
345 v->pmatch[0].rm_so = OFF(begin);
346 v->pmatch[0].rm_eo = OFF(end);
347 if (v->g->cflags & REG_EXPECT)
348 {
349 if (cold != NULL)
350 v->details->rm_extend.rm_so = OFF(cold);
351 else
352 v->details->rm_extend.rm_so = OFF(v->stop);
353 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
354 }
355 if (v->nmatch == 1) /* no need for submatches */
356 return REG_OKAY;
357
358 /* submatches */
359 zapsubs(v->pmatch, v->nmatch);
360 return dissect(v, v->g->tree, begin, end);
361 }
362
363 /*
364 * cfind - find a match for the main NFA (with complications)
365 */
366 static int
367 cfind(struct vars * v,
368 struct cnfa * cnfa,
369 struct colormap * cm)
370 {
371 struct dfa *s;
372 struct dfa *d;
373 chr *cold;
374 int ret;
375
376 s = newdfa(v, &v->g->search, cm, &v->dfa1);
377 NOERR();
378 d = newdfa(v, cnfa, cm, &v->dfa2);
379 if (ISERR())
380 {
381 assert(d == NULL);
382 freedfa(s);
383 return v->err;
384 }
385
386 ret = cfindloop(v, cnfa, cm, d, s, &cold);
387
388 freedfa(d);
389 freedfa(s);
390 NOERR();
391 if (v->g->cflags & REG_EXPECT)
392 {
393 assert(v->details != NULL);
394 if (cold != NULL)
395 v->details->rm_extend.rm_so = OFF(cold);
396 else
397 v->details->rm_extend.rm_so = OFF(v->stop);
398 v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
399 }
400 return ret;
401 }
402
403 /*
404 * cfindloop - the heart of cfind
405 */
406 static int
407 cfindloop(struct vars * v,
408 struct cnfa * cnfa,
409 struct colormap * cm,
410 struct dfa * d,
411 struct dfa * s,
412 chr **coldp) /* where to put coldstart pointer */
413 {
414 chr *begin;
415 chr *end;
416 chr *cold;
417 chr *open; /* open and close of range of possible
418 * starts */
419 chr *close;
420 chr *estart;
421 chr *estop;
422 int er;
423 int shorter = v->g->tree->flags & SHORTER;
424 int hitend;
425
426 assert(d != NULL && s != NULL);
427 cold = NULL;
428 close = v->start;
429 do
430 {
431 MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
432 close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
433 if (close == NULL)
434 break; /* NOTE BREAK */
435 assert(cold != NULL);
436 open = cold;
437 cold = NULL;
438 MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
439 for (begin = open; begin <= close; begin++)
440 {
441 MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
442 estart = begin;
443 estop = v->stop;
444 for (;;)
445 {
446 if (shorter)
447 end = shortest(v, d, begin, estart,
448 estop, (chr **) NULL, &hitend);
449 else
450 end = longest(v, d, begin, estop,
451 &hitend);
452 if (hitend && cold == NULL)
453 cold = begin;
454 if (end == NULL)
455 break; /* NOTE BREAK OUT */
456 MDEBUG(("tentative end %ld\n", LOFF(end)));
457 zapsubs(v->pmatch, v->nmatch);
458 zapmem(v, v->g->tree);
459 er = cdissect(v, v->g->tree, begin, end);
460 if (er == REG_OKAY)
461 {
462 if (v->nmatch > 0)
463 {
464 v->pmatch[0].rm_so = OFF(begin);
465 v->pmatch[0].rm_eo = OFF(end);
466 }
467 *coldp = cold;
468 return REG_OKAY;
469 }
470 if (er != REG_NOMATCH)
471 {
472 ERR(er);
473 return er;
474 }
475 if ((shorter) ? end == estop : end == begin)
476 {
477 /* no point in trying again */
478 *coldp = cold;
479 return REG_NOMATCH;
480 }
481 /* go around and try again */
482 if (shorter)
483 estart = end + 1;
484 else
485 estop = end - 1;
486 }
487 }
488 } while (close < v->stop);
489
490 *coldp = cold;
491 return REG_NOMATCH;
492 }
493
494 /*
495 * zapsubs - initialize the subexpression matches to "no match"
496 */
497 static void
498 zapsubs(regmatch_t *p,
499 size_t n)
500 {
501 size_t i;
502
503 for (i = n - 1; i > 0; i--)
504 {
505 p[i].rm_so = -1;
506 p[i].rm_eo = -1;
507 }
508 }
509
510 /*
511 * zapmem - initialize the retry memory of a subtree to zeros
512 */
513 static void
514 zapmem(struct vars * v,
515 struct subre * t)
516 {
517 if (t == NULL)
518 return;
519
520 assert(v->mem != NULL);
521 v->mem[t->retry] = 0;
522 if (t->op == '(')
523 {
524 assert(t->subno > 0);
525 v->pmatch[t->subno].rm_so = -1;
526 v->pmatch[t->subno].rm_eo = -1;
527 }
528
529 if (t->left != NULL)
530 zapmem(v, t->left);
531 if (t->right != NULL)
532 zapmem(v, t->right);
533 }
534
535 /*
536 * subset - set any subexpression relevant to a successful subre
537 */
538 static void
539 subset(struct vars * v,
540 struct subre * sub,
541 chr *begin,
542 chr *end)
543 {
544 int n = sub->subno;
545
546 assert(n > 0);
547 if ((size_t) n >= v->nmatch)
548 return;
549
550 MDEBUG(("setting %d\n", n));
551 v->pmatch[n].rm_so = OFF(begin);
552 v->pmatch[n].rm_eo = OFF(end);
553 }
554
555 /*
556 * dissect - determine subexpression matches (uncomplicated case)
557 */
558 static int /* regexec return code */
559 dissect(struct vars * v,
560 struct subre * t,
561 chr *begin, /* beginning of relevant substring */
562 chr *end) /* end of same */
563 {
564 assert(t != NULL);
565 MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
566
567 switch (t->op)
568 {
569 case '=': /* terminal node */
570 assert(t->left == NULL && t->right == NULL);
571 return REG_OKAY; /* no action, parent did the work */
572 break;
573 case '|': /* alternation */
574 assert(t->left != NULL);
575 return altdissect(v, t, begin, end);
576 break;
577 case 'b': /* back ref -- shouldn't be calling us! */
578 return REG_ASSERT;
579 break;
580 case '.': /* concatenation */
581 assert(t->left != NULL && t->right != NULL);
582 return condissect(v, t, begin, end);
583 break;
584 case '(': /* capturing */
585 assert(t->left != NULL && t->right == NULL);
586 assert(t->subno > 0);
587 subset(v, t, begin, end);
588 return dissect(v, t->left, begin, end);
589 break;
590 default:
591 return REG_ASSERT;
592 break;
593 }
594 }
595
596 /*
597 * condissect - determine concatenation subexpression matches (uncomplicated)
598 */
599 static int /* regexec return code */
600 condissect(struct vars * v,
601 struct subre * t,
602 chr *begin, /* beginning of relevant substring */
603 chr *end) /* end of same */
604 {
605 struct dfa *d;
606 struct dfa *d2;
607 chr *mid;
608 int i;
609 int shorter = (t->left->flags & SHORTER) ? 1 : 0;
610 chr *stop = (shorter) ? end : begin;
611
612 assert(t->op == '.');
613 assert(t->left != NULL && t->left->cnfa.nstates > 0);
614 assert(t->right != NULL && t->right->cnfa.nstates > 0);
615
616 d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
617 NOERR();
618 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
619 if (ISERR())
620 {
621 assert(d2 == NULL);
622 freedfa(d);
623 return v->err;
624 }
625
626 /* pick a tentative midpoint */
627 if (shorter)
628 mid = shortest(v, d, begin, begin, end, (chr **) NULL,
629 (int *) NULL);
630 else
631 mid = longest(v, d, begin, end, (int *) NULL);
632 if (mid == NULL)
633 {
634 freedfa(d);
635 freedfa(d2);
636 return REG_ASSERT;
637 }
638 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
639
640 /* iterate until satisfaction or failure */
641 while (longest(v, d2, mid, end, (int *) NULL) != end)
642 {
643 /* that midpoint didn't work, find a new one */
644 if (mid == stop)
645 {
646 /* all possibilities exhausted! */
647 MDEBUG(("no midpoint!\n"));
648 freedfa(d);
649 freedfa(d2);
650 return REG_ASSERT;
651 }
652 if (shorter)
653 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL,
654 (int *) NULL);
655 else
656 mid = longest(v, d, begin, mid - 1, (int *) NULL);
657 if (mid == NULL)
658 {
659 /* failed to find a new one! */
660 MDEBUG(("failed midpoint!\n"));
661 freedfa(d);
662 freedfa(d2);
663 return REG_ASSERT;
664 }
665 MDEBUG(("new midpoint %ld\n", LOFF(mid)));
666 }
667
668 /* satisfaction */
669 MDEBUG(("successful\n"));
670 freedfa(d);
671 freedfa(d2);
672 i = dissect(v, t->left, begin, mid);
673 if (i != REG_OKAY)
674 return i;
675 return dissect(v, t->right, mid, end);
676 }
677
678 /*
679 * altdissect - determine alternative subexpression matches (uncomplicated)
680 */
681 static int /* regexec return code */
682 altdissect(struct vars * v,
683 struct subre * t,
684 chr *begin, /* beginning of relevant substring */
685 chr *end) /* end of same */
686 {
687 struct dfa *d;
688 int i;
689
690 assert(t != NULL);
691 assert(t->op == '|');
692
693 for (i = 0; t != NULL; t = t->right, i++)
694 {
695 MDEBUG(("trying %dth\n", i));
696 assert(t->left != NULL && t->left->cnfa.nstates > 0);
697 d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
698 if (ISERR())
699 return v->err;
700 if (longest(v, d, begin, end, (int *) NULL) == end)
701 {
702 MDEBUG(("success\n"));
703 freedfa(d);
704 return dissect(v, t->left, begin, end);
705 }
706 freedfa(d);
707 }
708 return REG_ASSERT; /* none of them matched?!? */
709 }
710
711 /*
712 * cdissect - determine subexpression matches (with complications)
713 * The retry memory stores the offset of the trial midpoint from begin,
714 * plus 1 so that 0 uniquely means "clean slate".
715 */
716 static int /* regexec return code */
717 cdissect(struct vars * v,
718 struct subre * t,
719 chr *begin, /* beginning of relevant substring */
720 chr *end) /* end of same */
721 {
722 int er;
723
724 assert(t != NULL);
725 MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
726
727 switch (t->op)
728 {
729 case '=': /* terminal node */
730 assert(t->left == NULL && t->right == NULL);
731 return REG_OKAY; /* no action, parent did the work */
732 break;
733 case '|': /* alternation */
734 assert(t->left != NULL);
735 return caltdissect(v, t, begin, end);
736 break;
737 case 'b': /* back ref -- shouldn't be calling us! */
738 assert(t->left == NULL && t->right == NULL);
739 return cbrdissect(v, t, begin, end);
740 break;
741 case '.': /* concatenation */
742 assert(t->left != NULL && t->right != NULL);
743 return ccondissect(v, t, begin, end);
744 break;
745 case '(': /* capturing */
746 assert(t->left != NULL && t->right == NULL);
747 assert(t->subno > 0);
748 er = cdissect(v, t->left, begin, end);
749 if (er == REG_OKAY)
750 subset(v, t, begin, end);
751 return er;
752 break;
753 default:
754 return REG_ASSERT;
755 break;
756 }
757 }
758
759 /*
760 * ccondissect - concatenation subexpression matches (with complications)
761 * The retry memory stores the offset of the trial midpoint from begin,
762 * plus 1 so that 0 uniquely means "clean slate".
763 */
764 static int /* regexec return code */
765 ccondissect(struct vars * v,
766 struct subre * t,
767 chr *begin, /* beginning of relevant substring */
768 chr *end) /* end of same */
769 {
770 struct dfa *d;
771 struct dfa *d2;
772 chr *mid;
773 int er;
774
775 assert(t->op == '.');
776 assert(t->left != NULL && t->left->cnfa.nstates > 0);
777 assert(t->right != NULL && t->right->cnfa.nstates > 0);
778
779 if (t->left->flags & SHORTER) /* reverse scan */
780 return crevdissect(v, t, begin, end);
781
782 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
783 if (ISERR())
784 return v->err;
785 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
786 if (ISERR())
787 {
788 freedfa(d);
789 return v->err;
790 }
791 MDEBUG(("cconcat %d\n", t->retry));
792
793 /* pick a tentative midpoint */
794 if (v->mem[t->retry] == 0)
795 {
796 mid = longest(v, d, begin, end, (int *) NULL);
797 if (mid == NULL)
798 {
799 freedfa(d);
800 freedfa(d2);
801 return REG_NOMATCH;
802 }
803 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
804 v->mem[t->retry] = (mid - begin) + 1;
805 }
806 else
807 {
808 mid = begin + (v->mem[t->retry] - 1);
809 MDEBUG(("working midpoint %ld\n", LOFF(mid)));
810 }
811
812 /* iterate until satisfaction or failure */
813 for (;;)
814 {
815 /* try this midpoint on for size */
816 er = cdissect(v, t->left, begin, mid);
817 if (er == REG_OKAY &&
818 longest(v, d2, mid, end, (int *) NULL) == end &&
819 (er = cdissect(v, t->right, mid, end)) ==
820 REG_OKAY)
821 break; /* NOTE BREAK OUT */
822 if (er != REG_OKAY && er != REG_NOMATCH)
823 {
824 freedfa(d);
825 freedfa(d2);
826 return er;
827 }
828
829 /* that midpoint didn't work, find a new one */
830 if (mid == begin)
831 {
832 /* all possibilities exhausted */
833 MDEBUG(("%d no midpoint\n", t->retry));
834 freedfa(d);
835 freedfa(d2);
836 return REG_NOMATCH;
837 }
838 mid = longest(v, d, begin, mid - 1, (int *) NULL);
839 if (mid == NULL)
840 {
841 /* failed to find a new one */
842 MDEBUG(("%d failed midpoint\n", t->retry));
843 freedfa(d);
844 freedfa(d2);
845 return REG_NOMATCH;
846 }
847 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
848 v->mem[t->retry] = (mid - begin) + 1;
849 zapmem(v, t->left);
850 zapmem(v, t->right);
851 }
852
853 /* satisfaction */
854 MDEBUG(("successful\n"));
855 freedfa(d);
856 freedfa(d2);
857 return REG_OKAY;
858 }
859
860 /*
861 * crevdissect - determine backref shortest-first subexpression matches
862 * The retry memory stores the offset of the trial midpoint from begin,
863 * plus 1 so that 0 uniquely means "clean slate".
864 */
865 static int /* regexec return code */
866 crevdissect(struct vars * v,
867 struct subre * t,
868 chr *begin, /* beginning of relevant substring */
869 chr *end) /* end of same */
870 {
871 struct dfa *d;
872 struct dfa *d2;
873 chr *mid;
874 int er;
875
876 assert(t->op == '.');
877 assert(t->left != NULL && t->left->cnfa.nstates > 0);
878 assert(t->right != NULL && t->right->cnfa.nstates > 0);
879 assert(t->left->flags & SHORTER);
880
881 /* concatenation -- need to split the substring between parts */
882 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
883 if (ISERR())
884 return v->err;
885 d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
886 if (ISERR())
887 {
888 freedfa(d);
889 return v->err;
890 }
891 MDEBUG(("crev %d\n", t->retry));
892
893 /* pick a tentative midpoint */
894 if (v->mem[t->retry] == 0)
895 {
896 mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
897 if (mid == NULL)
898 {
899 freedfa(d);
900 freedfa(d2);
901 return REG_NOMATCH;
902 }
903 MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
904 v->mem[t->retry] = (mid - begin) + 1;
905 }
906 else
907 {
908 mid = begin + (v->mem[t->retry] - 1);
909 MDEBUG(("working midpoint %ld\n", LOFF(mid)));
910 }
911
912 /* iterate until satisfaction or failure */
913 for (;;)
914 {
915 /* try this midpoint on for size */
916 er = cdissect(v, t->left, begin, mid);
917 if (er == REG_OKAY &&
918 longest(v, d2, mid, end, (int *) NULL) == end &&
919 (er = cdissect(v, t->right, mid, end)) ==
920 REG_OKAY)
921 break; /* NOTE BREAK OUT */
922 if (er != REG_OKAY && er != REG_NOMATCH)
923 {
924 freedfa(d);
925 freedfa(d2);
926 return er;
927 }
928
929 /* that midpoint didn't work, find a new one */
930 if (mid == end)
931 {
932 /* all possibilities exhausted */
933 MDEBUG(("%d no midpoint\n", t->retry));
934 freedfa(d);
935 freedfa(d2);
936 return REG_NOMATCH;
937 }
938 mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
939 if (mid == NULL)
940 {
941 /* failed to find a new one */
942 MDEBUG(("%d failed midpoint\n", t->retry));
943 freedfa(d);
944 freedfa(d2);
945 return REG_NOMATCH;
946 }
947 MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
948 v->mem[t->retry] = (mid - begin) + 1;
949 zapmem(v, t->left);
950 zapmem(v, t->right);
951 }
952
953 /* satisfaction */
954 MDEBUG(("successful\n"));
955 freedfa(d);
956 freedfa(d2);
957 return REG_OKAY;
958 }
959
960 /*
961 * cbrdissect - determine backref subexpression matches
962 */
963 static int /* regexec return code */
964 cbrdissect(struct vars * v,
965 struct subre * t,
966 chr *begin, /* beginning of relevant substring */
967 chr *end) /* end of same */
968 {
969 int i;
970 int n = t->subno;
971 size_t len;
972 chr *paren;
973 chr *p;
974 chr *stop;
975 int min = t->min;
976 int max = t->max;
977
978 assert(t != NULL);
979 assert(t->op == 'b');
980 assert(n >= 0);
981 assert((size_t) n < v->nmatch);
982
983 MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
984
985 if (v->pmatch[n].rm_so == -1)
986 return REG_NOMATCH;
987 paren = v->start + v->pmatch[n].rm_so;
988 len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
989
990 /* no room to maneuver -- retries are pointless */
991 if (v->mem[t->retry])
992 return REG_NOMATCH;
993 v->mem[t->retry] = 1;
994
995 /* special-case zero-length string */
996 if (len == 0)
997 {
998 if (begin == end)
999 return REG_OKAY;
1000 return REG_NOMATCH;
1001 }
1002
1003 /* and too-short string */
1004 assert(end >= begin);
1005 if ((size_t) (end - begin) < len)
1006 return REG_NOMATCH;
1007 stop = end - len;
1008
1009 /* count occurrences */
1010 i = 0;
1011 for (p = begin; p <= stop && (i < max || max == INFINITY); p += len)
1012 {
1013 if ((*v->g->compare) (paren, p, len) != 0)
1014 break;
1015 i++;
1016 }
1017 MDEBUG(("cbackref found %d\n", i));
1018
1019 /* and sort it out */
1020 if (p != end) /* didn't consume all of it */
1021 return REG_NOMATCH;
1022 if (min <= i && (i <= max || max == INFINITY))
1023 return REG_OKAY;
1024 return REG_NOMATCH; /* out of range */
1025 }
1026
1027 /*
1028 * caltdissect - determine alternative subexpression matches (w. complications)
1029 */
1030 static int /* regexec return code */
1031 caltdissect(struct vars * v,
1032 struct subre * t,
1033 chr *begin, /* beginning of relevant substring */
1034 chr *end) /* end of same */
1035 {
1036 struct dfa *d;
1037 int er;
1038
1039 #define UNTRIED 0 /* not yet tried at all */
1040 #define TRYING 1 /* top matched, trying submatches */
1041 #define TRIED 2 /* top didn't match or submatches
1042 * exhausted */
1043
1044 if (t == NULL)
1045 return REG_NOMATCH;
1046 assert(t->op == '|');
1047 if (v->mem[t->retry] == TRIED)
1048 return caltdissect(v, t->right, begin, end);
1049
1050 MDEBUG(("calt n%d\n", t->retry));
1051 assert(t->left != NULL);
1052
1053 if (v->mem[t->retry] == UNTRIED)
1054 {
1055 d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
1056 if (ISERR())
1057 return v->err;
1058 if (longest(v, d, begin, end, (int *) NULL) != end)
1059 {
1060 freedfa(d);
1061 v->mem[t->retry] = TRIED;
1062 return caltdissect(v, t->right, begin, end);
1063 }
1064 freedfa(d);
1065 MDEBUG(("calt matched\n"));
1066 v->mem[t->retry] = TRYING;
1067 }
1068
1069 er = cdissect(v, t->left, begin, end);
1070 if (er != REG_NOMATCH)
1071 return er;
1072
1073 v->mem[t->retry] = TRIED;
1074 return caltdissect(v, t->right, begin, end);
1075 }
1076
1077
1078
1079 #include "rege_dfa.c"