]>
Commit | Line | Data |
---|---|---|
9385eb3d A |
1 | /*- |
2 | * Copyright (c) 1992, 1993, 1994 Henry Spencer. | |
e9ce8d39 A |
3 | * Copyright (c) 1992, 1993, 1994 |
4 | * The Regents of the University of California. All rights reserved. | |
5 | * | |
6 | * This code is derived from software contributed to Berkeley by | |
7 | * Henry Spencer. | |
8 | * | |
9 | * Redistribution and use in source and binary forms, with or without | |
10 | * modification, are permitted provided that the following conditions | |
11 | * are met: | |
12 | * 1. Redistributions of source code must retain the above copyright | |
13 | * notice, this list of conditions and the following disclaimer. | |
14 | * 2. Redistributions in binary form must reproduce the above copyright | |
15 | * notice, this list of conditions and the following disclaimer in the | |
16 | * documentation and/or other materials provided with the distribution. | |
e9ce8d39 A |
17 | * 4. Neither the name of the University nor the names of its contributors |
18 | * may be used to endorse or promote products derived from this software | |
19 | * without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
9385eb3d A |
32 | * |
33 | * @(#)engine.c 8.5 (Berkeley) 3/20/94 | |
e9ce8d39 A |
34 | */ |
35 | ||
9385eb3d | 36 | #include <sys/cdefs.h> |
1f2f436a | 37 | __FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds Exp $"); |
9385eb3d | 38 | |
e9ce8d39 A |
39 | /* |
40 | * The matching engine and friends. This file is #included by regexec.c | |
41 | * after suitable #defines of a variety of macros used herein, so that | |
42 | * different state representations can be used without duplicating masses | |
43 | * of code. | |
44 | */ | |
45 | ||
46 | #ifdef SNAMES | |
47 | #define matcher smatcher | |
48 | #define fast sfast | |
49 | #define slow sslow | |
50 | #define dissect sdissect | |
51 | #define backref sbackref | |
52 | #define step sstep | |
53 | #define print sprint | |
54 | #define at sat | |
55 | #define match smat | |
56 | #endif | |
57 | #ifdef LNAMES | |
58 | #define matcher lmatcher | |
59 | #define fast lfast | |
60 | #define slow lslow | |
61 | #define dissect ldissect | |
62 | #define backref lbackref | |
63 | #define step lstep | |
64 | #define print lprint | |
65 | #define at lat | |
66 | #define match lmat | |
67 | #endif | |
3d9156a7 A |
68 | #ifdef MNAMES |
69 | #define matcher mmatcher | |
70 | #define fast mfast | |
71 | #define slow mslow | |
72 | #define dissect mdissect | |
73 | #define backref mbackref | |
74 | #define step mstep | |
75 | #define print mprint | |
76 | #define at mat | |
77 | #define match mmat | |
78 | #endif | |
e9ce8d39 A |
79 | |
80 | /* another structure passed up and down to avoid zillions of parameters */ | |
81 | struct match { | |
82 | struct re_guts *g; | |
83 | int eflags; | |
84 | regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ | |
1f2f436a A |
85 | const char *offp; /* offsets work from here */ |
86 | const char *beginp; /* start of string -- virtual NUL precedes */ | |
87 | const char *endp; /* end of string -- virtual NUL here */ | |
88 | const char *coldp; /* can be no match starting before here */ | |
89 | const char **lastpos; /* [nplus+1] */ | |
e9ce8d39 A |
90 | STATEVARS; |
91 | states st; /* current states */ | |
92 | states fresh; /* states for a fresh start */ | |
93 | states tmp; /* temporary */ | |
94 | states empty; /* empty set of states */ | |
3d9156a7 | 95 | mbstate_t mbs; /* multibyte conversion state */ |
e9ce8d39 A |
96 | }; |
97 | ||
98 | /* ========= begin header generated by ./mkh ========= */ | |
99 | #ifdef __cplusplus | |
100 | extern "C" { | |
101 | #endif | |
102 | ||
103 | /* === engine.c === */ | |
1f2f436a A |
104 | static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); |
105 | static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); | |
106 | static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int); | |
107 | static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); | |
108 | static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst); | |
3d9156a7 | 109 | static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft); |
1f2f436a | 110 | #define MAX_RECURSION 100 |
3d9156a7 A |
111 | #define BOL (OUT-1) |
112 | #define EOL (BOL-1) | |
113 | #define BOLEOL (BOL-2) | |
114 | #define NOTHING (BOL-3) | |
115 | #define BOW (BOL-4) | |
116 | #define EOW (BOL-5) | |
117 | #define BADCHAR (BOL-6) | |
118 | #define NONCHAR(c) ((c) <= OUT) | |
e9ce8d39 | 119 | #ifdef REDEBUG |
1f2f436a | 120 | static void print(struct match *m, const char *caption, states st, int ch, FILE *d); |
e9ce8d39 A |
121 | #endif |
122 | #ifdef REDEBUG | |
1f2f436a | 123 | static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst); |
e9ce8d39 A |
124 | #endif |
125 | #ifdef REDEBUG | |
1f2f436a | 126 | static const char *pchar(int ch); |
e9ce8d39 A |
127 | #endif |
128 | ||
129 | #ifdef __cplusplus | |
130 | } | |
131 | #endif | |
132 | /* ========= end header generated by ./mkh ========= */ | |
133 | ||
134 | #ifdef REDEBUG | |
135 | #define SP(t, s, c) print(m, t, s, c, stdout) | |
136 | #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) | |
137 | #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } | |
138 | #else | |
139 | #define SP(t, s, c) /* nothing */ | |
140 | #define AT(t, p1, p2, s1, s2) /* nothing */ | |
141 | #define NOTE(s) /* nothing */ | |
142 | #endif | |
143 | ||
144 | /* | |
145 | - matcher - the actual matching engine | |
1f2f436a | 146 | == static int matcher(struct re_guts *g, const char *string, \ |
e9ce8d39 A |
147 | == size_t nmatch, regmatch_t pmatch[], int eflags); |
148 | */ | |
149 | static int /* 0 success, REG_NOMATCH failure */ | |
1f2f436a A |
150 | matcher(struct re_guts *g, |
151 | const char *string, | |
152 | size_t nmatch, | |
153 | regmatch_t pmatch[], | |
154 | int eflags) | |
e9ce8d39 | 155 | { |
1f2f436a | 156 | const char *endp; |
9385eb3d | 157 | int i; |
e9ce8d39 | 158 | struct match mv; |
9385eb3d | 159 | struct match *m = &mv; |
1f2f436a | 160 | const char *dp; |
9385eb3d A |
161 | const sopno gf = g->firststate+1; /* +1 for OEND */ |
162 | const sopno gl = g->laststate; | |
1f2f436a A |
163 | const char *start; |
164 | const char *stop; | |
9385eb3d | 165 | /* Boyer-Moore algorithms variables */ |
1f2f436a | 166 | const char *pp; |
9385eb3d | 167 | int cj, mj; |
1f2f436a A |
168 | const char *mustfirst; |
169 | const char *mustlast; | |
9385eb3d A |
170 | int *matchjump; |
171 | int *charjump; | |
e9ce8d39 A |
172 | |
173 | /* simplify the situation where possible */ | |
174 | if (g->cflags®_NOSUB) | |
175 | nmatch = 0; | |
176 | if (eflags®_STARTEND) { | |
177 | start = string + pmatch[0].rm_so; | |
178 | stop = string + pmatch[0].rm_eo; | |
179 | } else { | |
180 | start = string; | |
181 | stop = start + strlen(start); | |
182 | } | |
183 | if (stop < start) | |
184 | return(REG_INVARG); | |
185 | ||
186 | /* prescreening; this does wonders for this rather slow code */ | |
187 | if (g->must != NULL) { | |
9385eb3d A |
188 | if (g->charjump != NULL && g->matchjump != NULL) { |
189 | mustfirst = g->must; | |
190 | mustlast = g->must + g->mlen - 1; | |
191 | charjump = g->charjump; | |
192 | matchjump = g->matchjump; | |
193 | pp = mustlast; | |
194 | for (dp = start+g->mlen-1; dp < stop;) { | |
195 | /* Fast skip non-matches */ | |
196 | while (dp < stop && charjump[(int)*dp]) | |
197 | dp += charjump[(int)*dp]; | |
198 | ||
199 | if (dp >= stop) | |
200 | break; | |
201 | ||
202 | /* Greedy matcher */ | |
203 | /* We depend on not being used for | |
204 | * for strings of length 1 | |
205 | */ | |
206 | while (*--dp == *--pp && pp != mustfirst); | |
207 | ||
208 | if (*dp == *pp) | |
209 | break; | |
210 | ||
211 | /* Jump to next possible match */ | |
212 | mj = matchjump[pp - mustfirst]; | |
213 | cj = charjump[(int)*dp]; | |
214 | dp += (cj < mj ? mj : cj); | |
215 | pp = mustlast; | |
216 | } | |
217 | if (pp != mustfirst) | |
218 | return(REG_NOMATCH); | |
219 | } else { | |
220 | for (dp = start; dp < stop; dp++) | |
221 | if (*dp == g->must[0] && | |
222 | stop - dp >= g->mlen && | |
223 | memcmp(dp, g->must, (size_t)g->mlen) == 0) | |
224 | break; | |
225 | if (dp == stop) /* we didn't find g->must */ | |
226 | return(REG_NOMATCH); | |
227 | } | |
e9ce8d39 A |
228 | } |
229 | ||
230 | /* match struct setup */ | |
231 | m->g = g; | |
232 | m->eflags = eflags; | |
233 | m->pmatch = NULL; | |
234 | m->lastpos = NULL; | |
235 | m->offp = string; | |
236 | m->beginp = start; | |
237 | m->endp = stop; | |
238 | STATESETUP(m, 4); | |
239 | SETUP(m->st); | |
240 | SETUP(m->fresh); | |
241 | SETUP(m->tmp); | |
242 | SETUP(m->empty); | |
243 | CLEAR(m->empty); | |
3d9156a7 | 244 | ZAPSTATE(&m->mbs); |
e9ce8d39 | 245 | |
9385eb3d A |
246 | /* Adjust start according to moffset, to speed things up */ |
247 | if (g->moffset > -1) | |
248 | start = ((dp - g->moffset) < start) ? start : dp - g->moffset; | |
249 | ||
1f2f436a A |
250 | SP("mloop", m->st, *start); |
251 | ||
e9ce8d39 A |
252 | /* this loop does only one repetition except for backrefs */ |
253 | for (;;) { | |
254 | endp = fast(m, start, stop, gf, gl); | |
255 | if (endp == NULL) { /* a miss */ | |
1f2f436a A |
256 | if (m->pmatch != NULL) |
257 | free((char *)m->pmatch); | |
258 | if (m->lastpos != NULL) | |
259 | free((char *)m->lastpos); | |
e9ce8d39 A |
260 | STATETEARDOWN(m); |
261 | return(REG_NOMATCH); | |
262 | } | |
263 | if (nmatch == 0 && !g->backrefs) | |
264 | break; /* no further info needed */ | |
265 | ||
266 | /* where? */ | |
267 | assert(m->coldp != NULL); | |
268 | for (;;) { | |
269 | NOTE("finding start"); | |
270 | endp = slow(m, m->coldp, stop, gf, gl); | |
271 | if (endp != NULL) | |
272 | break; | |
273 | assert(m->coldp < m->endp); | |
3d9156a7 A |
274 | m->coldp += XMBRTOWC(NULL, m->coldp, |
275 | m->endp - m->coldp, &m->mbs, 0); | |
e9ce8d39 A |
276 | } |
277 | if (nmatch == 1 && !g->backrefs) | |
278 | break; /* no further info needed */ | |
279 | ||
280 | /* oh my, he wants the subexpressions... */ | |
281 | if (m->pmatch == NULL) | |
282 | m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * | |
283 | sizeof(regmatch_t)); | |
284 | if (m->pmatch == NULL) { | |
285 | STATETEARDOWN(m); | |
286 | return(REG_ESPACE); | |
287 | } | |
288 | for (i = 1; i <= m->g->nsub; i++) | |
289 | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; | |
290 | if (!g->backrefs && !(m->eflags®_BACKR)) { | |
291 | NOTE("dissecting"); | |
292 | dp = dissect(m, m->coldp, endp, gf, gl); | |
293 | } else { | |
294 | if (g->nplus > 0 && m->lastpos == NULL) | |
1f2f436a A |
295 | m->lastpos = malloc((g->nplus+1) * |
296 | sizeof(const char *)); | |
e9ce8d39 A |
297 | if (g->nplus > 0 && m->lastpos == NULL) { |
298 | free(m->pmatch); | |
299 | STATETEARDOWN(m); | |
300 | return(REG_ESPACE); | |
301 | } | |
302 | NOTE("backref dissect"); | |
1f2f436a | 303 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
e9ce8d39 A |
304 | } |
305 | if (dp != NULL) | |
306 | break; | |
307 | ||
308 | /* uh-oh... we couldn't find a subexpression-level match */ | |
309 | assert(g->backrefs); /* must be back references doing it */ | |
310 | assert(g->nplus == 0 || m->lastpos != NULL); | |
311 | for (;;) { | |
312 | if (dp != NULL || endp <= m->coldp) | |
313 | break; /* defeat */ | |
314 | NOTE("backoff"); | |
315 | endp = slow(m, m->coldp, endp-1, gf, gl); | |
316 | if (endp == NULL) | |
317 | break; /* defeat */ | |
318 | /* try it on a shorter possibility */ | |
319 | #ifndef NDEBUG | |
320 | for (i = 1; i <= m->g->nsub; i++) { | |
321 | assert(m->pmatch[i].rm_so == -1); | |
322 | assert(m->pmatch[i].rm_eo == -1); | |
323 | } | |
324 | #endif | |
325 | NOTE("backoff dissect"); | |
1f2f436a | 326 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
e9ce8d39 A |
327 | } |
328 | assert(dp == NULL || dp == endp); | |
329 | if (dp != NULL) /* found a shorter one */ | |
330 | break; | |
331 | ||
332 | /* despite initial appearances, there is no match here */ | |
333 | NOTE("false alarm"); | |
3d9156a7 A |
334 | /* recycle starting later */ |
335 | start = m->coldp + XMBRTOWC(NULL, m->coldp, | |
1f2f436a | 336 | stop - m->coldp, &m->mbs, 0); |
e9ce8d39 A |
337 | assert(start <= stop); |
338 | } | |
339 | ||
340 | /* fill in the details if requested */ | |
341 | if (nmatch > 0) { | |
342 | pmatch[0].rm_so = m->coldp - m->offp; | |
343 | pmatch[0].rm_eo = endp - m->offp; | |
344 | } | |
345 | if (nmatch > 1) { | |
346 | assert(m->pmatch != NULL); | |
347 | for (i = 1; i < nmatch; i++) | |
348 | if (i <= m->g->nsub) | |
349 | pmatch[i] = m->pmatch[i]; | |
350 | else { | |
351 | pmatch[i].rm_so = -1; | |
352 | pmatch[i].rm_eo = -1; | |
353 | } | |
354 | } | |
355 | ||
356 | if (m->pmatch != NULL) | |
357 | free((char *)m->pmatch); | |
358 | if (m->lastpos != NULL) | |
359 | free((char *)m->lastpos); | |
360 | STATETEARDOWN(m); | |
361 | return(0); | |
362 | } | |
363 | ||
364 | /* | |
365 | - dissect - figure out what matched what, no back references | |
1f2f436a A |
366 | == static const char *dissect(struct match *m, const char *start, \ |
367 | == const char *stop, sopno startst, sopno stopst); | |
e9ce8d39 | 368 | */ |
1f2f436a A |
369 | static const char * /* == stop (success) always */ |
370 | dissect(struct match *m, | |
371 | const char *start, | |
372 | const char *stop, | |
373 | sopno startst, | |
374 | sopno stopst) | |
e9ce8d39 | 375 | { |
9385eb3d A |
376 | int i; |
377 | sopno ss; /* start sop of current subRE */ | |
378 | sopno es; /* end sop of current subRE */ | |
1f2f436a A |
379 | const char *sp; /* start of string matched by it */ |
380 | const char *stp; /* string matched by it cannot pass here */ | |
381 | const char *rest; /* start of rest of string */ | |
382 | const char *tail; /* string unmatched by rest of RE */ | |
9385eb3d A |
383 | sopno ssub; /* start sop of subsubRE */ |
384 | sopno esub; /* end sop of subsubRE */ | |
1f2f436a A |
385 | const char *ssp; /* start of string matched by subsubRE */ |
386 | const char *sep; /* end of string matched by subsubRE */ | |
387 | const char *oldssp; /* previous ssp */ | |
388 | const char *dp; | |
e9ce8d39 A |
389 | |
390 | AT("diss", start, stop, startst, stopst); | |
391 | sp = start; | |
392 | for (ss = startst; ss < stopst; ss = es) { | |
393 | /* identify end of subRE */ | |
394 | es = ss; | |
395 | switch (OP(m->g->strip[es])) { | |
396 | case OPLUS_: | |
397 | case OQUEST_: | |
398 | es += OPND(m->g->strip[es]); | |
399 | break; | |
400 | case OCH_: | |
401 | while (OP(m->g->strip[es]) != O_CH) | |
402 | es += OPND(m->g->strip[es]); | |
403 | break; | |
404 | } | |
405 | es++; | |
406 | ||
407 | /* figure out what it matched */ | |
408 | switch (OP(m->g->strip[ss])) { | |
409 | case OEND: | |
410 | assert(nope); | |
411 | break; | |
412 | case OCHAR: | |
3d9156a7 | 413 | sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); |
e9ce8d39 A |
414 | break; |
415 | case OBOL: | |
416 | case OEOL: | |
417 | case OBOW: | |
418 | case OEOW: | |
419 | break; | |
420 | case OANY: | |
421 | case OANYOF: | |
3d9156a7 | 422 | sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); |
e9ce8d39 A |
423 | break; |
424 | case OBACK_: | |
425 | case O_BACK: | |
426 | assert(nope); | |
427 | break; | |
428 | /* cases where length of match is hard to find */ | |
429 | case OQUEST_: | |
430 | stp = stop; | |
431 | for (;;) { | |
432 | /* how long could this one be? */ | |
433 | rest = slow(m, sp, stp, ss, es); | |
434 | assert(rest != NULL); /* it did match */ | |
435 | /* could the rest match the rest? */ | |
436 | tail = slow(m, rest, stop, es, stopst); | |
437 | if (tail == stop) | |
438 | break; /* yes! */ | |
439 | /* no -- try a shorter match for this one */ | |
440 | stp = rest - 1; | |
441 | assert(stp >= sp); /* it did work */ | |
442 | } | |
443 | ssub = ss + 1; | |
444 | esub = es - 1; | |
445 | /* did innards match? */ | |
446 | if (slow(m, sp, rest, ssub, esub) != NULL) { | |
447 | dp = dissect(m, sp, rest, ssub, esub); | |
448 | assert(dp == rest); | |
449 | } else /* no */ | |
450 | assert(sp == rest); | |
451 | sp = rest; | |
452 | break; | |
453 | case OPLUS_: | |
454 | stp = stop; | |
455 | for (;;) { | |
456 | /* how long could this one be? */ | |
457 | rest = slow(m, sp, stp, ss, es); | |
458 | assert(rest != NULL); /* it did match */ | |
459 | /* could the rest match the rest? */ | |
460 | tail = slow(m, rest, stop, es, stopst); | |
461 | if (tail == stop) | |
462 | break; /* yes! */ | |
463 | /* no -- try a shorter match for this one */ | |
464 | stp = rest - 1; | |
465 | assert(stp >= sp); /* it did work */ | |
466 | } | |
467 | ssub = ss + 1; | |
468 | esub = es - 1; | |
469 | ssp = sp; | |
470 | oldssp = ssp; | |
471 | for (;;) { /* find last match of innards */ | |
472 | sep = slow(m, ssp, rest, ssub, esub); | |
473 | if (sep == NULL || sep == ssp) | |
474 | break; /* failed or matched null */ | |
475 | oldssp = ssp; /* on to next try */ | |
476 | ssp = sep; | |
477 | } | |
478 | if (sep == NULL) { | |
479 | /* last successful match */ | |
480 | sep = ssp; | |
481 | ssp = oldssp; | |
482 | } | |
483 | assert(sep == rest); /* must exhaust substring */ | |
484 | assert(slow(m, ssp, sep, ssub, esub) == rest); | |
485 | dp = dissect(m, ssp, sep, ssub, esub); | |
486 | assert(dp == sep); | |
487 | sp = rest; | |
488 | break; | |
489 | case OCH_: | |
490 | stp = stop; | |
491 | for (;;) { | |
492 | /* how long could this one be? */ | |
493 | rest = slow(m, sp, stp, ss, es); | |
494 | assert(rest != NULL); /* it did match */ | |
495 | /* could the rest match the rest? */ | |
496 | tail = slow(m, rest, stop, es, stopst); | |
497 | if (tail == stop) | |
498 | break; /* yes! */ | |
499 | /* no -- try a shorter match for this one */ | |
500 | stp = rest - 1; | |
501 | assert(stp >= sp); /* it did work */ | |
502 | } | |
503 | ssub = ss + 1; | |
504 | esub = ss + OPND(m->g->strip[ss]) - 1; | |
505 | assert(OP(m->g->strip[esub]) == OOR1); | |
506 | for (;;) { /* find first matching branch */ | |
507 | if (slow(m, sp, rest, ssub, esub) == rest) | |
508 | break; /* it matched all of it */ | |
509 | /* that one missed, try next one */ | |
510 | assert(OP(m->g->strip[esub]) == OOR1); | |
511 | esub++; | |
512 | assert(OP(m->g->strip[esub]) == OOR2); | |
513 | ssub = esub + 1; | |
514 | esub += OPND(m->g->strip[esub]); | |
515 | if (OP(m->g->strip[esub]) == OOR2) | |
516 | esub--; | |
517 | else | |
518 | assert(OP(m->g->strip[esub]) == O_CH); | |
519 | } | |
520 | dp = dissect(m, sp, rest, ssub, esub); | |
521 | assert(dp == rest); | |
522 | sp = rest; | |
523 | break; | |
524 | case O_PLUS: | |
525 | case O_QUEST: | |
526 | case OOR1: | |
527 | case OOR2: | |
528 | case O_CH: | |
529 | assert(nope); | |
530 | break; | |
531 | case OLPAREN: | |
532 | i = OPND(m->g->strip[ss]); | |
533 | assert(0 < i && i <= m->g->nsub); | |
534 | m->pmatch[i].rm_so = sp - m->offp; | |
535 | break; | |
536 | case ORPAREN: | |
537 | i = OPND(m->g->strip[ss]); | |
538 | assert(0 < i && i <= m->g->nsub); | |
539 | m->pmatch[i].rm_eo = sp - m->offp; | |
540 | break; | |
541 | default: /* uh oh */ | |
542 | assert(nope); | |
543 | break; | |
544 | } | |
545 | } | |
546 | ||
547 | assert(sp == stop); | |
548 | return(sp); | |
549 | } | |
550 | ||
551 | /* | |
552 | - backref - figure out what matched what, figuring in back references | |
1f2f436a A |
553 | == static const char *backref(struct match *m, const char *start, \ |
554 | == const char *stop, sopno startst, sopno stopst, sopno lev); | |
e9ce8d39 | 555 | */ |
1f2f436a A |
556 | static const char * /* == stop (success) or NULL (failure) */ |
557 | backref(struct match *m, | |
558 | const char *start, | |
559 | const char *stop, | |
560 | sopno startst, | |
561 | sopno stopst, | |
562 | sopno lev, /* PLUS nesting level */ | |
563 | int rec) | |
e9ce8d39 | 564 | { |
9385eb3d A |
565 | int i; |
566 | sopno ss; /* start sop of current subRE */ | |
1f2f436a | 567 | const char *sp; /* start of string matched by it */ |
9385eb3d A |
568 | sopno ssub; /* start sop of subsubRE */ |
569 | sopno esub; /* end sop of subsubRE */ | |
1f2f436a A |
570 | const char *ssp; /* start of string matched by subsubRE */ |
571 | const char *dp; | |
9385eb3d A |
572 | size_t len; |
573 | int hard; | |
574 | sop s; | |
575 | regoff_t offsave; | |
576 | cset *cs; | |
3d9156a7 | 577 | wint_t wc; |
e9ce8d39 A |
578 | |
579 | AT("back", start, stop, startst, stopst); | |
580 | sp = start; | |
581 | ||
582 | /* get as far as we can with easy stuff */ | |
583 | hard = 0; | |
584 | for (ss = startst; !hard && ss < stopst; ss++) | |
585 | switch (OP(s = m->g->strip[ss])) { | |
586 | case OCHAR: | |
3d9156a7 A |
587 | if (sp == stop) |
588 | return(NULL); | |
589 | sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); | |
590 | if (wc != OPND(s)) | |
e9ce8d39 A |
591 | return(NULL); |
592 | break; | |
593 | case OANY: | |
594 | if (sp == stop) | |
595 | return(NULL); | |
3d9156a7 A |
596 | sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); |
597 | if (wc == BADCHAR) | |
598 | return (NULL); | |
e9ce8d39 A |
599 | break; |
600 | case OANYOF: | |
3d9156a7 A |
601 | if (sp == stop) |
602 | return (NULL); | |
e9ce8d39 | 603 | cs = &m->g->sets[OPND(s)]; |
3d9156a7 A |
604 | sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); |
605 | if (wc == BADCHAR || !CHIN(cs, wc)) | |
e9ce8d39 A |
606 | return(NULL); |
607 | break; | |
608 | case OBOL: | |
609 | if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || | |
610 | (sp < m->endp && *(sp-1) == '\n' && | |
611 | (m->g->cflags®_NEWLINE)) ) | |
612 | { /* yes */ } | |
613 | else | |
614 | return(NULL); | |
615 | break; | |
616 | case OEOL: | |
617 | if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || | |
618 | (sp < m->endp && *sp == '\n' && | |
619 | (m->g->cflags®_NEWLINE)) ) | |
620 | { /* yes */ } | |
621 | else | |
622 | return(NULL); | |
623 | break; | |
624 | case OBOW: | |
625 | if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || | |
626 | (sp < m->endp && *(sp-1) == '\n' && | |
627 | (m->g->cflags®_NEWLINE)) || | |
628 | (sp > m->beginp && | |
629 | !ISWORD(*(sp-1))) ) && | |
630 | (sp < m->endp && ISWORD(*sp)) ) | |
631 | { /* yes */ } | |
632 | else | |
633 | return(NULL); | |
634 | break; | |
635 | case OEOW: | |
636 | if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || | |
637 | (sp < m->endp && *sp == '\n' && | |
638 | (m->g->cflags®_NEWLINE)) || | |
639 | (sp < m->endp && !ISWORD(*sp)) ) && | |
640 | (sp > m->beginp && ISWORD(*(sp-1))) ) | |
641 | { /* yes */ } | |
642 | else | |
643 | return(NULL); | |
644 | break; | |
645 | case O_QUEST: | |
646 | break; | |
647 | case OOR1: /* matches null but needs to skip */ | |
648 | ss++; | |
649 | s = m->g->strip[ss]; | |
650 | do { | |
651 | assert(OP(s) == OOR2); | |
652 | ss += OPND(s); | |
653 | } while (OP(s = m->g->strip[ss]) != O_CH); | |
654 | /* note that the ss++ gets us past the O_CH */ | |
655 | break; | |
656 | default: /* have to make a choice */ | |
657 | hard = 1; | |
658 | break; | |
659 | } | |
660 | if (!hard) { /* that was it! */ | |
661 | if (sp != stop) | |
662 | return(NULL); | |
663 | return(sp); | |
664 | } | |
665 | ss--; /* adjust for the for's final increment */ | |
666 | ||
667 | /* the hard stuff */ | |
668 | AT("hard", sp, stop, ss, stopst); | |
669 | s = m->g->strip[ss]; | |
670 | switch (OP(s)) { | |
671 | case OBACK_: /* the vilest depths */ | |
672 | i = OPND(s); | |
673 | assert(0 < i && i <= m->g->nsub); | |
674 | if (m->pmatch[i].rm_eo == -1) | |
675 | return(NULL); | |
676 | assert(m->pmatch[i].rm_so != -1); | |
677 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; | |
1f2f436a A |
678 | if (len == 0 && rec++ > MAX_RECURSION) |
679 | return(NULL); | |
e9ce8d39 A |
680 | assert(stop - m->beginp >= len); |
681 | if (sp > stop - len) | |
682 | return(NULL); /* not enough left to match */ | |
683 | ssp = m->offp + m->pmatch[i].rm_so; | |
684 | if (memcmp(sp, ssp, len) != 0) | |
685 | return(NULL); | |
686 | while (m->g->strip[ss] != SOP(O_BACK, i)) | |
687 | ss++; | |
1f2f436a | 688 | return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); |
e9ce8d39 A |
689 | break; |
690 | case OQUEST_: /* to null or not */ | |
1f2f436a | 691 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
e9ce8d39 A |
692 | if (dp != NULL) |
693 | return(dp); /* not */ | |
1f2f436a | 694 | return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); |
e9ce8d39 A |
695 | break; |
696 | case OPLUS_: | |
697 | assert(m->lastpos != NULL); | |
698 | assert(lev+1 <= m->g->nplus); | |
699 | m->lastpos[lev+1] = sp; | |
1f2f436a | 700 | return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); |
e9ce8d39 A |
701 | break; |
702 | case O_PLUS: | |
703 | if (sp == m->lastpos[lev]) /* last pass matched null */ | |
1f2f436a | 704 | return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); |
e9ce8d39 A |
705 | /* try another pass */ |
706 | m->lastpos[lev] = sp; | |
1f2f436a | 707 | dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); |
e9ce8d39 | 708 | if (dp == NULL) |
1f2f436a | 709 | return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); |
e9ce8d39 A |
710 | else |
711 | return(dp); | |
712 | break; | |
713 | case OCH_: /* find the right one, if any */ | |
714 | ssub = ss + 1; | |
715 | esub = ss + OPND(s) - 1; | |
716 | assert(OP(m->g->strip[esub]) == OOR1); | |
717 | for (;;) { /* find first matching branch */ | |
1f2f436a | 718 | dp = backref(m, sp, stop, ssub, esub, lev, rec); |
e9ce8d39 A |
719 | if (dp != NULL) |
720 | return(dp); | |
721 | /* that one missed, try next one */ | |
722 | if (OP(m->g->strip[esub]) == O_CH) | |
723 | return(NULL); /* there is none */ | |
724 | esub++; | |
725 | assert(OP(m->g->strip[esub]) == OOR2); | |
726 | ssub = esub + 1; | |
727 | esub += OPND(m->g->strip[esub]); | |
728 | if (OP(m->g->strip[esub]) == OOR2) | |
729 | esub--; | |
730 | else | |
731 | assert(OP(m->g->strip[esub]) == O_CH); | |
732 | } | |
733 | break; | |
734 | case OLPAREN: /* must undo assignment if rest fails */ | |
735 | i = OPND(s); | |
736 | assert(0 < i && i <= m->g->nsub); | |
737 | offsave = m->pmatch[i].rm_so; | |
738 | m->pmatch[i].rm_so = sp - m->offp; | |
1f2f436a | 739 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
e9ce8d39 A |
740 | if (dp != NULL) |
741 | return(dp); | |
742 | m->pmatch[i].rm_so = offsave; | |
743 | return(NULL); | |
744 | break; | |
745 | case ORPAREN: /* must undo assignment if rest fails */ | |
746 | i = OPND(s); | |
747 | assert(0 < i && i <= m->g->nsub); | |
748 | offsave = m->pmatch[i].rm_eo; | |
749 | m->pmatch[i].rm_eo = sp - m->offp; | |
1f2f436a | 750 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
e9ce8d39 A |
751 | if (dp != NULL) |
752 | return(dp); | |
753 | m->pmatch[i].rm_eo = offsave; | |
754 | return(NULL); | |
755 | break; | |
756 | default: /* uh oh */ | |
757 | assert(nope); | |
758 | break; | |
759 | } | |
760 | ||
761 | /* "can't happen" */ | |
762 | assert(nope); | |
763 | /* NOTREACHED */ | |
9385eb3d | 764 | return "shut up gcc"; |
e9ce8d39 A |
765 | } |
766 | ||
767 | /* | |
768 | - fast - step through the string at top speed | |
1f2f436a A |
769 | == static const char *fast(struct match *m, const char *start, \ |
770 | == const char *stop, sopno startst, sopno stopst); | |
e9ce8d39 | 771 | */ |
1f2f436a A |
772 | static const char * /* where tentative match ended, or NULL */ |
773 | fast( struct match *m, | |
774 | const char *start, | |
775 | const char *stop, | |
776 | sopno startst, | |
777 | sopno stopst) | |
e9ce8d39 | 778 | { |
9385eb3d A |
779 | states st = m->st; |
780 | states fresh = m->fresh; | |
781 | states tmp = m->tmp; | |
1f2f436a | 782 | const char *p = start; |
3d9156a7 A |
783 | wint_t c; |
784 | wint_t lastc; /* previous c */ | |
785 | wint_t flagch; | |
9385eb3d | 786 | int i; |
1f2f436a | 787 | const char *coldp; /* last p after which no match was underway */ |
3d9156a7 | 788 | size_t clen; |
e9ce8d39 A |
789 | |
790 | CLEAR(st); | |
791 | SET1(st, startst); | |
1f2f436a | 792 | SP("fast", st, *p); |
e9ce8d39 A |
793 | st = step(m->g, startst, stopst, st, NOTHING, st); |
794 | ASSIGN(fresh, st); | |
795 | SP("start", st, *p); | |
796 | coldp = NULL; | |
3d9156a7 A |
797 | if (start == m->beginp) |
798 | c = OUT; | |
799 | else { | |
800 | /* | |
801 | * XXX Wrong if the previous character was multi-byte. | |
802 | * Newline never is (in encodings supported by FreeBSD), | |
803 | * so this only breaks the ISWORD tests below. | |
804 | */ | |
805 | c = (uch)*(start - 1); | |
806 | } | |
e9ce8d39 A |
807 | for (;;) { |
808 | /* next character */ | |
809 | lastc = c; | |
1f2f436a A |
810 | if (p == m->endp) { |
811 | clen = 0; | |
3d9156a7 | 812 | c = OUT; |
1f2f436a | 813 | } else |
3d9156a7 | 814 | clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); |
e9ce8d39 A |
815 | if (EQ(st, fresh)) |
816 | coldp = p; | |
817 | ||
818 | /* is there an EOL and/or BOL between lastc and c? */ | |
819 | flagch = '\0'; | |
820 | i = 0; | |
821 | if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || | |
822 | (lastc == OUT && !(m->eflags®_NOTBOL)) ) { | |
823 | flagch = BOL; | |
824 | i = m->g->nbol; | |
825 | } | |
826 | if ( (c == '\n' && m->g->cflags®_NEWLINE) || | |
827 | (c == OUT && !(m->eflags®_NOTEOL)) ) { | |
828 | flagch = (flagch == BOL) ? BOLEOL : EOL; | |
829 | i += m->g->neol; | |
830 | } | |
831 | if (i != 0) { | |
832 | for (; i > 0; i--) | |
833 | st = step(m->g, startst, stopst, st, flagch, st); | |
834 | SP("boleol", st, c); | |
835 | } | |
836 | ||
837 | /* how about a word boundary? */ | |
838 | if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && | |
839 | (c != OUT && ISWORD(c)) ) { | |
840 | flagch = BOW; | |
841 | } | |
842 | if ( (lastc != OUT && ISWORD(lastc)) && | |
843 | (flagch == EOL || (c != OUT && !ISWORD(c))) ) { | |
844 | flagch = EOW; | |
845 | } | |
846 | if (flagch == BOW || flagch == EOW) { | |
847 | st = step(m->g, startst, stopst, st, flagch, st); | |
848 | SP("boweow", st, c); | |
849 | } | |
850 | ||
851 | /* are we done? */ | |
1f2f436a | 852 | if (ISSET(st, stopst) || p == stop || clen > stop - p) |
e9ce8d39 A |
853 | break; /* NOTE BREAK OUT */ |
854 | ||
855 | /* no, we must deal with this character */ | |
856 | ASSIGN(tmp, st); | |
857 | ASSIGN(st, fresh); | |
858 | assert(c != OUT); | |
859 | st = step(m->g, startst, stopst, tmp, c, st); | |
860 | SP("aft", st, c); | |
861 | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | |
3d9156a7 | 862 | p += clen; |
e9ce8d39 A |
863 | } |
864 | ||
865 | assert(coldp != NULL); | |
866 | m->coldp = coldp; | |
867 | if (ISSET(st, stopst)) | |
1f2f436a | 868 | return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); |
e9ce8d39 A |
869 | else |
870 | return(NULL); | |
871 | } | |
872 | ||
873 | /* | |
874 | - slow - step through the string more deliberately | |
1f2f436a A |
875 | == static const char *slow(struct match *m, const char *start, \ |
876 | == const char *stop, sopno startst, sopno stopst); | |
e9ce8d39 | 877 | */ |
1f2f436a A |
878 | static const char * /* where it ended */ |
879 | slow( struct match *m, | |
880 | const char *start, | |
881 | const char *stop, | |
882 | sopno startst, | |
883 | sopno stopst) | |
e9ce8d39 | 884 | { |
9385eb3d A |
885 | states st = m->st; |
886 | states empty = m->empty; | |
887 | states tmp = m->tmp; | |
1f2f436a | 888 | const char *p = start; |
3d9156a7 A |
889 | wint_t c; |
890 | wint_t lastc; /* previous c */ | |
891 | wint_t flagch; | |
9385eb3d | 892 | int i; |
1f2f436a | 893 | const char *matchp; /* last p at which a match ended */ |
3d9156a7 | 894 | size_t clen; |
e9ce8d39 A |
895 | |
896 | AT("slow", start, stop, startst, stopst); | |
897 | CLEAR(st); | |
898 | SET1(st, startst); | |
899 | SP("sstart", st, *p); | |
900 | st = step(m->g, startst, stopst, st, NOTHING, st); | |
901 | matchp = NULL; | |
3d9156a7 A |
902 | if (start == m->beginp) |
903 | c = OUT; | |
904 | else { | |
905 | /* | |
906 | * XXX Wrong if the previous character was multi-byte. | |
907 | * Newline never is (in encodings supported by FreeBSD), | |
908 | * so this only breaks the ISWORD tests below. | |
909 | */ | |
910 | c = (uch)*(start - 1); | |
911 | } | |
e9ce8d39 A |
912 | for (;;) { |
913 | /* next character */ | |
914 | lastc = c; | |
3d9156a7 A |
915 | if (p == m->endp) { |
916 | c = OUT; | |
917 | clen = 0; | |
918 | } else | |
919 | clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); | |
e9ce8d39 A |
920 | |
921 | /* is there an EOL and/or BOL between lastc and c? */ | |
922 | flagch = '\0'; | |
923 | i = 0; | |
924 | if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || | |
925 | (lastc == OUT && !(m->eflags®_NOTBOL)) ) { | |
926 | flagch = BOL; | |
927 | i = m->g->nbol; | |
928 | } | |
929 | if ( (c == '\n' && m->g->cflags®_NEWLINE) || | |
930 | (c == OUT && !(m->eflags®_NOTEOL)) ) { | |
931 | flagch = (flagch == BOL) ? BOLEOL : EOL; | |
932 | i += m->g->neol; | |
933 | } | |
934 | if (i != 0) { | |
935 | for (; i > 0; i--) | |
936 | st = step(m->g, startst, stopst, st, flagch, st); | |
937 | SP("sboleol", st, c); | |
938 | } | |
939 | ||
940 | /* how about a word boundary? */ | |
941 | if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && | |
942 | (c != OUT && ISWORD(c)) ) { | |
943 | flagch = BOW; | |
944 | } | |
945 | if ( (lastc != OUT && ISWORD(lastc)) && | |
946 | (flagch == EOL || (c != OUT && !ISWORD(c))) ) { | |
947 | flagch = EOW; | |
948 | } | |
949 | if (flagch == BOW || flagch == EOW) { | |
950 | st = step(m->g, startst, stopst, st, flagch, st); | |
951 | SP("sboweow", st, c); | |
952 | } | |
953 | ||
954 | /* are we done? */ | |
955 | if (ISSET(st, stopst)) | |
956 | matchp = p; | |
1f2f436a | 957 | if (EQ(st, empty) || p == stop || clen > stop - p) |
e9ce8d39 A |
958 | break; /* NOTE BREAK OUT */ |
959 | ||
960 | /* no, we must deal with this character */ | |
961 | ASSIGN(tmp, st); | |
962 | ASSIGN(st, empty); | |
963 | assert(c != OUT); | |
964 | st = step(m->g, startst, stopst, tmp, c, st); | |
965 | SP("saft", st, c); | |
966 | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | |
3d9156a7 | 967 | p += clen; |
e9ce8d39 A |
968 | } |
969 | ||
970 | return(matchp); | |
971 | } | |
972 | ||
973 | ||
974 | /* | |
975 | - step - map set of states reachable before char to set reachable after | |
9385eb3d A |
976 | == static states step(struct re_guts *g, sopno start, sopno stop, \ |
977 | == states bef, int ch, states aft); | |
3d9156a7 A |
978 | == #define BOL (OUT-1) |
979 | == #define EOL (BOL-1) | |
980 | == #define BOLEOL (BOL-2) | |
981 | == #define NOTHING (BOL-3) | |
982 | == #define BOW (BOL-4) | |
983 | == #define EOW (BOL-5) | |
984 | == #define BADCHAR (BOL-6) | |
985 | == #define NONCHAR(c) ((c) <= OUT) | |
e9ce8d39 A |
986 | */ |
987 | static states | |
1f2f436a A |
988 | step(struct re_guts *g, |
989 | sopno start, /* start state within strip */ | |
990 | sopno stop, /* state after stop state within strip */ | |
991 | states bef, /* states reachable before */ | |
992 | wint_t ch, /* character or NONCHAR code */ | |
993 | states aft) /* states already known reachable after */ | |
e9ce8d39 | 994 | { |
9385eb3d A |
995 | cset *cs; |
996 | sop s; | |
997 | sopno pc; | |
998 | onestate here; /* note, macros know this name */ | |
999 | sopno look; | |
1000 | int i; | |
e9ce8d39 A |
1001 | |
1002 | for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { | |
1003 | s = g->strip[pc]; | |
1004 | switch (OP(s)) { | |
1005 | case OEND: | |
1006 | assert(pc == stop-1); | |
1007 | break; | |
1008 | case OCHAR: | |
1009 | /* only characters can match */ | |
3d9156a7 A |
1010 | assert(!NONCHAR(ch) || ch != OPND(s)); |
1011 | if (ch == OPND(s)) | |
e9ce8d39 A |
1012 | FWD(aft, bef, 1); |
1013 | break; | |
1014 | case OBOL: | |
1015 | if (ch == BOL || ch == BOLEOL) | |
1016 | FWD(aft, bef, 1); | |
1017 | break; | |
1018 | case OEOL: | |
1019 | if (ch == EOL || ch == BOLEOL) | |
1020 | FWD(aft, bef, 1); | |
1021 | break; | |
1022 | case OBOW: | |
1023 | if (ch == BOW) | |
1024 | FWD(aft, bef, 1); | |
1025 | break; | |
1026 | case OEOW: | |
1027 | if (ch == EOW) | |
1028 | FWD(aft, bef, 1); | |
1029 | break; | |
1030 | case OANY: | |
1031 | if (!NONCHAR(ch)) | |
1032 | FWD(aft, bef, 1); | |
1033 | break; | |
1034 | case OANYOF: | |
1035 | cs = &g->sets[OPND(s)]; | |
1036 | if (!NONCHAR(ch) && CHIN(cs, ch)) | |
1037 | FWD(aft, bef, 1); | |
1038 | break; | |
1039 | case OBACK_: /* ignored here */ | |
1040 | case O_BACK: | |
1041 | FWD(aft, aft, 1); | |
1042 | break; | |
1043 | case OPLUS_: /* forward, this is just an empty */ | |
1044 | FWD(aft, aft, 1); | |
1045 | break; | |
1046 | case O_PLUS: /* both forward and back */ | |
1047 | FWD(aft, aft, 1); | |
1048 | i = ISSETBACK(aft, OPND(s)); | |
1049 | BACK(aft, aft, OPND(s)); | |
1050 | if (!i && ISSETBACK(aft, OPND(s))) { | |
1051 | /* oho, must reconsider loop body */ | |
1052 | pc -= OPND(s) + 1; | |
1053 | INIT(here, pc); | |
1054 | } | |
1055 | break; | |
1056 | case OQUEST_: /* two branches, both forward */ | |
1057 | FWD(aft, aft, 1); | |
1058 | FWD(aft, aft, OPND(s)); | |
1059 | break; | |
1060 | case O_QUEST: /* just an empty */ | |
1061 | FWD(aft, aft, 1); | |
1062 | break; | |
1063 | case OLPAREN: /* not significant here */ | |
1064 | case ORPAREN: | |
1065 | FWD(aft, aft, 1); | |
1066 | break; | |
1067 | case OCH_: /* mark the first two branches */ | |
1068 | FWD(aft, aft, 1); | |
1069 | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | |
1070 | FWD(aft, aft, OPND(s)); | |
1071 | break; | |
1072 | case OOR1: /* done a branch, find the O_CH */ | |
1073 | if (ISSTATEIN(aft, here)) { | |
1074 | for (look = 1; | |
1075 | OP(s = g->strip[pc+look]) != O_CH; | |
1076 | look += OPND(s)) | |
1077 | assert(OP(s) == OOR2); | |
1f2f436a | 1078 | FWD(aft, aft, look + 1); |
e9ce8d39 A |
1079 | } |
1080 | break; | |
1081 | case OOR2: /* propagate OCH_'s marking */ | |
1082 | FWD(aft, aft, 1); | |
1083 | if (OP(g->strip[pc+OPND(s)]) != O_CH) { | |
1084 | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | |
1085 | FWD(aft, aft, OPND(s)); | |
1086 | } | |
1087 | break; | |
1088 | case O_CH: /* just empty */ | |
1089 | FWD(aft, aft, 1); | |
1090 | break; | |
1091 | default: /* ooooops... */ | |
1092 | assert(nope); | |
1093 | break; | |
1094 | } | |
1095 | } | |
1096 | ||
1097 | return(aft); | |
1098 | } | |
1099 | ||
1100 | #ifdef REDEBUG | |
1101 | /* | |
1102 | - print - print a set of states | |
1103 | == #ifdef REDEBUG | |
1f2f436a | 1104 | == static void print(struct match *m, const char *caption, states st, \ |
e9ce8d39 A |
1105 | == int ch, FILE *d); |
1106 | == #endif | |
1107 | */ | |
1108 | static void | |
1f2f436a A |
1109 | print(struct match *m, |
1110 | const char *caption, | |
1111 | states st, | |
1112 | int ch, | |
1113 | FILE *d) | |
e9ce8d39 | 1114 | { |
9385eb3d A |
1115 | struct re_guts *g = m->g; |
1116 | int i; | |
1117 | int first = 1; | |
e9ce8d39 A |
1118 | |
1119 | if (!(m->eflags®_TRACE)) | |
1120 | return; | |
1121 | ||
1122 | fprintf(d, "%s", caption); | |
1123 | if (ch != '\0') | |
1124 | fprintf(d, " %s", pchar(ch)); | |
1125 | for (i = 0; i < g->nstates; i++) | |
1126 | if (ISSET(st, i)) { | |
1127 | fprintf(d, "%s%d", (first) ? "\t" : ", ", i); | |
1128 | first = 0; | |
1129 | } | |
1130 | fprintf(d, "\n"); | |
1131 | } | |
1132 | ||
9385eb3d | 1133 | /* |
e9ce8d39 A |
1134 | - at - print current situation |
1135 | == #ifdef REDEBUG | |
1f2f436a A |
1136 | == static void at(struct match *m, const char *title, const char *start, \ |
1137 | == const char *stop, sopno startst, sopno stopst); | |
e9ce8d39 A |
1138 | == #endif |
1139 | */ | |
1140 | static void | |
1f2f436a A |
1141 | at( struct match *m, |
1142 | const char *title, | |
1143 | const char *start, | |
1144 | const char *stop, | |
1145 | sopno startst, | |
1146 | sopno stopst) | |
e9ce8d39 A |
1147 | { |
1148 | if (!(m->eflags®_TRACE)) | |
1149 | return; | |
1150 | ||
1151 | printf("%s %s-", title, pchar(*start)); | |
1152 | printf("%s ", pchar(*stop)); | |
1153 | printf("%ld-%ld\n", (long)startst, (long)stopst); | |
1154 | } | |
1155 | ||
1156 | #ifndef PCHARDONE | |
1157 | #define PCHARDONE /* never again */ | |
1158 | /* | |
1159 | - pchar - make a character printable | |
1160 | == #ifdef REDEBUG | |
1f2f436a | 1161 | == static const char *pchar(int ch); |
e9ce8d39 A |
1162 | == #endif |
1163 | * | |
1164 | * Is this identical to regchar() over in debug.c? Well, yes. But a | |
1165 | * duplicate here avoids having a debugging-capable regexec.o tied to | |
1166 | * a matching debug.o, and this is convenient. It all disappears in | |
1167 | * the non-debug compilation anyway, so it doesn't matter much. | |
1168 | */ | |
1f2f436a A |
1169 | static const char * /* -> representation */ |
1170 | pchar(int ch) | |
e9ce8d39 A |
1171 | { |
1172 | static char pbuf[10]; | |
1173 | ||
9385eb3d | 1174 | if (isprint((uch)ch) || ch == ' ') |
e9ce8d39 A |
1175 | sprintf(pbuf, "%c", ch); |
1176 | else | |
1177 | sprintf(pbuf, "\\%o", ch); | |
1178 | return(pbuf); | |
1179 | } | |
1180 | #endif | |
1181 | #endif | |
1182 | ||
1183 | #undef matcher | |
1184 | #undef fast | |
1185 | #undef slow | |
1186 | #undef dissect | |
1187 | #undef backref | |
1188 | #undef step | |
1189 | #undef print | |
1190 | #undef at | |
1191 | #undef match |