]> git.saurik.com Git - apple/libc.git/blob - regex/TRE/lib/tre-match-backtrack.c
Libc-1439.100.3.tar.gz
[apple/libc.git] / regex / TRE / lib / tre-match-backtrack.c
1 /*
2 tre-match-backtrack.c - TRE backtracking regex matching engine
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 /*
10 This matcher is for regexps that use back referencing. Regexp matching
11 with back referencing is an NP-complete problem on the number of back
12 references. The easiest way to match them is to use a backtracking
13 routine which basically goes through all possible paths in the TNFA
14 and chooses the one which results in the best (leftmost and longest)
15 match. This can be spectacularly expensive and may run out of stack
16 space, but there really is no better known generic algorithm. Quoting
17 Henry Spencer from comp.compilers:
18 <URL: http://compilers.iecc.com/comparch/article/93-03-102>
19
20 POSIX.2 REs require longest match, which is really exciting to
21 implement since the obsolete ("basic") variant also includes
22 \<digit>. I haven't found a better way of tackling this than doing
23 a preliminary match using a DFA (or simulation) on a modified RE
24 that just replicates subREs for \<digit>, and then doing a
25 backtracking match to determine whether the subRE matches were
26 right. This can be rather slow, but I console myself with the
27 thought that people who use \<digit> deserve very slow execution.
28 (Pun unintentional but very appropriate.)
29
30 */
31
32
33 #ifdef HAVE_CONFIG_H
34 #include <config.h>
35 #endif /* HAVE_CONFIG_H */
36
37 /* Unset TRE_USE_ALLOCA to avoid using the stack to hold all the state
38 info while running */
39 #undef TRE_USE_ALLOCA
40
41 #ifdef TRE_USE_ALLOCA
42 /* AIX requires this to be the first thing in the file. */
43 #ifndef __GNUC__
44 # if HAVE_ALLOCA_H
45 # include <alloca.h>
46 # else
47 # ifdef _AIX
48 #pragma alloca
49 # else
50 # ifndef alloca /* predefined by HP cc +Olibcalls */
51 char *alloca ();
52 # endif
53 # endif
54 # endif
55 #endif
56 #endif /* TRE_USE_ALLOCA */
57
58 #include <assert.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #ifdef HAVE_WCHAR_H
62 #include <wchar.h>
63 #endif /* HAVE_WCHAR_H */
64 #ifdef HAVE_WCTYPE_H
65 #include <wctype.h>
66 #endif /* HAVE_WCTYPE_H */
67 #ifndef TRE_WCHAR
68 #include <ctype.h>
69 #endif /* !TRE_WCHAR */
70 #ifdef HAVE_MALLOC_H
71 #include <malloc.h>
72 #endif /* HAVE_MALLOC_H */
73
74 #include "tre-internal.h"
75 #include "tre-mem.h"
76 #include "tre-match-utils.h"
77 #include "tre.h"
78 #include "xmalloc.h"
79
80 typedef struct {
81 int pos;
82 unsigned int pos_add_next;
83 const char *str_byte;
84 #ifdef TRE_WCHAR
85 const wchar_t *str_wide;
86 #endif /* TRE_WCHAR */
87 tre_tnfa_transition_t *state;
88 int state_id;
89 int next_c;
90 tre_tag_t *tags;
91 #ifdef TRE_MBSTATE
92 mbstate_t mbstate;
93 #endif /* TRE_MBSTATE */
94 } tre_backtrack_item_t;
95
96 typedef struct tre_backtrack_struct {
97 tre_backtrack_item_t item;
98 struct tre_backtrack_struct *prev;
99 struct tre_backtrack_struct *next;
100 } *tre_backtrack_t;
101
102 #ifdef TRE_WCHAR
103 #define BT_STACK_WIDE_IN(_str_wide) stack->item.str_wide = (_str_wide)
104 #define BT_STACK_WIDE_OUT (str_wide) = stack->item.str_wide
105 #else /* !TRE_WCHAR */
106 #define BT_STACK_WIDE_IN(_str_wide)
107 #define BT_STACK_WIDE_OUT
108 #endif /* !TRE_WCHAR */
109
110 #ifdef TRE_MBSTATE
111 #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate)
112 #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
113 #else /* !TRE_MBSTATE */
114 #define BT_STACK_MBSTATE_IN
115 #define BT_STACK_MBSTATE_OUT
116 #endif /* !TRE_MBSTATE */
117
118
119 #ifdef TRE_USE_ALLOCA
120 #define tre_bt_mem_new tre_mem_newa
121 #define tre_bt_mem_alloc tre_mem_alloca
122 #define tre_bt_mem_destroy(obj) do { } while (0)
123 #else /* !TRE_USE_ALLOCA */
124 #define tre_bt_mem_new tre_mem_new
125 #define tre_bt_mem_alloc tre_mem_alloc
126 #define tre_bt_mem_destroy tre_mem_destroy
127 #endif /* !TRE_USE_ALLOCA */
128
129
130 #define BT_STACK_PUSH(_pos, _pos_add_next, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
131 do \
132 { \
133 if (!stack->next) \
134 { \
135 tre_backtrack_t s; \
136 s = tre_bt_mem_alloc(mem, sizeof(*s)); \
137 if (!s) \
138 { \
139 tre_bt_mem_destroy(mem); \
140 if (tags) \
141 xfree(tags); \
142 if (pmatch) \
143 xfree(pmatch); \
144 if (states_seen) \
145 xfree(states_seen); \
146 return REG_ESPACE; \
147 } \
148 s->prev = stack; \
149 s->next = NULL; \
150 s->item.tags = tre_bt_mem_alloc(mem, \
151 num_tags * sizeof(*tags)); \
152 if (!s->item.tags) \
153 { \
154 tre_bt_mem_destroy(mem); \
155 if (tags) \
156 xfree(tags); \
157 if (pmatch) \
158 xfree(pmatch); \
159 if (states_seen) \
160 xfree(states_seen); \
161 return REG_ESPACE; \
162 } \
163 stack->next = s; \
164 stack = s; \
165 } \
166 else \
167 stack = stack->next; \
168 stack->item.pos = (_pos); \
169 stack->item.pos_add_next = (_pos_add_next); \
170 stack->item.str_byte = (_str_byte); \
171 BT_STACK_WIDE_IN(_str_wide); \
172 stack->item.state = (_state); \
173 stack->item.state_id = (_state_id); \
174 stack->item.next_c = (_next_c); \
175 memcpy(stack->item.tags, (_tags), num_tags * sizeof(*(_tags))); \
176 BT_STACK_MBSTATE_IN; \
177 } \
178 while (/*CONSTCOND*/0)
179
180 #ifdef TRE_STR_USER
181 #define BT_STACK_POP() \
182 do \
183 { \
184 assert(stack->prev); \
185 pos = stack->item.pos; \
186 pos_add_next = stack->item.pos_add_next; \
187 if (type == STR_USER) \
188 str_source->rewind(pos + pos_add_next, str_source->context); \
189 str_byte = stack->item.str_byte; \
190 BT_STACK_WIDE_OUT; \
191 state = stack->item.state; \
192 next_c = stack->item.next_c; \
193 memcpy(tags, stack->item.tags, num_tags * sizeof(*tags)); \
194 BT_STACK_MBSTATE_OUT; \
195 stack = stack->prev; \
196 } \
197 while (/*CONSTCOND*/0)
198 #else /* !TRE_STR_USER */
199 #define BT_STACK_POP() \
200 do \
201 { \
202 assert(stack->prev); \
203 pos = stack->item.pos; \
204 pos_add_next = stack->item.pos_add_next; \
205 str_byte = stack->item.str_byte; \
206 BT_STACK_WIDE_OUT; \
207 state = stack->item.state; \
208 next_c = stack->item.next_c; \
209 memcpy(tags, stack->item.tags, num_tags * sizeof(*tags)); \
210 BT_STACK_MBSTATE_OUT; \
211 stack = stack->prev; \
212 } \
213 while (/*CONSTCOND*/0)
214 #endif /* !TRE_STR_USER */
215
216 #undef MIN
217 #define MIN(a, b) ((a) <= (b) ? (a) : (b))
218
219 reg_errcode_t
220 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
221 int len, tre_str_type_t type, tre_tag_t *match_tags,
222 int eflags, int *match_end_ofs)
223 {
224 /* State variables required by GET_NEXT_WCHAR. */
225 tre_char_t prev_c = 0, next_c = 0;
226 const char *str_byte = string;
227 int pos = 0;
228 unsigned int pos_add_next = 1;
229 #ifdef TRE_WCHAR
230 const wchar_t *str_wide = string;
231 #ifdef TRE_MBSTATE
232 mbstate_t mbstate;
233 #endif /* TRE_MBSTATE */
234 #endif /* TRE_WCHAR */
235 int reg_notbol = eflags & REG_NOTBOL;
236 int reg_noteol = eflags & REG_NOTEOL;
237 int reg_newline = tnfa->cflags & REG_NEWLINE;
238 #ifdef TRE_STR_USER
239 int str_user_end = 0;
240 #endif /* TRE_STR_USER */
241 int i;
242
243 /* These are used to remember the necessary values of the above
244 variables to return to the position where the current search
245 started from. */
246 int next_c_start;
247 const char *str_byte_start;
248 int pos_start = -1;
249 #ifdef TRE_WCHAR
250 const wchar_t *str_wide_start;
251 #endif /* TRE_WCHAR */
252 #ifdef TRE_MBSTATE
253 mbstate_t mbstate_start;
254 #endif /* TRE_MBSTATE */
255
256 /* End offset of best match so far, or -1 if no match found yet. */
257 int match_eo = -1;
258 /* Tag arrays. */
259 int *next_tags;
260 tre_tag_t *tags = NULL;
261 /* Current TNFA state. */
262 tre_tnfa_transition_t *state;
263 int *states_seen = NULL;
264
265 /* Memory allocator to for allocating the backtracking stack. */
266 tre_mem_t mem = tre_bt_mem_new();
267
268 /* The backtracking stack. */
269 tre_backtrack_t stack;
270
271 tre_tnfa_transition_t *trans_i;
272 regmatch_t *pmatch = NULL;
273 reg_errcode_t ret;
274
275 int num_tags = tnfa->num_tags;
276 int touch = 1;
277 char *buf = NULL;
278 int tbytes;
279
280 #ifdef TRE_MBSTATE
281 memset(&mbstate, '\0', sizeof(mbstate));
282 #endif /* TRE_MBSTATE */
283
284 if (!mem)
285 return REG_ESPACE;
286 stack = tre_bt_mem_alloc(mem, sizeof(*stack));
287 if (!stack)
288 {
289 ret = REG_ESPACE;
290 goto error_exit;
291 }
292 stack->prev = NULL;
293 stack->next = NULL;
294
295 DPRINT(("tnfa_execute_backtrack, input type %d\n", type));
296 DPRINT(("len = %d\n", len));
297
298 {
299 int pbytes, sbytes, total_bytes;
300 char *tmp_buf;
301 /* Compute the length of the block we need. */
302 tbytes = sizeof(*tags) * num_tags;
303 pbytes = sizeof(*pmatch) * tnfa->num_submatches;
304 sbytes = sizeof(*states_seen) * tnfa->num_states;
305 total_bytes =
306 (sizeof(long) - 1) * 2 /* for alignment paddings */
307 + tbytes + pbytes + sbytes;
308
309 DPRINT(("tre_tnfa_run_backtrack, allocate %d bytes\n", total_bytes));
310 /* Allocate the memory. */
311 #ifdef TRE_USE_ALLOCA
312 buf = alloca(total_bytes);
313 #else /* !TRE_USE_ALLOCA */
314 buf = xmalloc((unsigned)total_bytes);
315 #endif /* !TRE_USE_ALLOCA */
316 if (buf == NULL)
317 return REG_ESPACE;
318
319 /* Get the various pointers within tmp_buf (properly aligned). */
320 tags = (void *)buf;
321 tmp_buf = buf + tbytes;
322 tmp_buf += ALIGN(tmp_buf, long);
323 pmatch = (void *)tmp_buf;
324 tmp_buf += pbytes;
325 tmp_buf += ALIGN(tmp_buf, long);
326 states_seen = (void *)tmp_buf;
327 }
328
329 retry:
330 {
331 memset(tags, 0, num_tags * sizeof(*tags));
332 if (match_tags)
333 memset(match_tags, 0, num_tags * sizeof(*match_tags));
334 for (i = 0; i < tnfa->num_states; i++)
335 states_seen[i] = 0;
336 }
337
338 state = NULL;
339 pos = pos_start;
340 #ifdef TRE_STR_USER
341 if (type == STR_USER)
342 str_source->rewind(pos + pos_add_next, str_source->context);
343 #endif /* TRE_STR_USER */
344 GET_NEXT_WCHAR();
345 pos_start = pos;
346 next_c_start = next_c;
347 str_byte_start = str_byte;
348 #ifdef TRE_WCHAR
349 str_wide_start = str_wide;
350 #endif /* TRE_WCHAR */
351 #ifdef TRE_MBSTATE
352 mbstate_start = mbstate;
353 #endif /* TRE_MBSTATE */
354
355 /* Handle initial states. */
356 next_tags = NULL;
357 for (trans_i = tnfa->initial; trans_i->state; trans_i++)
358 {
359 DPRINT(("> init %p, prev_c %lc\n", trans_i->state, (tre_cint_t)prev_c));
360 if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
361 {
362 DPRINT(("assert failed\n"));
363 continue;
364 }
365 if (state == NULL)
366 {
367 /* Start from this state. */
368 state = trans_i->state;
369 next_tags = trans_i->tags;
370 }
371 else
372 {
373 /* Backtrack to this state. */
374 DPRINT(("saving state %d for backtracking\n", trans_i->state_id));
375 BT_STACK_PUSH(pos, pos_add_next, str_byte, str_wide, trans_i->state,
376 trans_i->state_id, next_c, tags, mbstate);
377 {
378 int *tmp = trans_i->tags;
379 if (tmp)
380 {
381 while (*tmp >= 0)
382 tre_tag_set(stack->item.tags, *tmp++, pos, touch);
383 touch++;
384 }
385 }
386 }
387 }
388
389 if (next_tags)
390 {
391 for (; *next_tags >= 0; next_tags++)
392 tre_tag_set(tags, *next_tags, pos, touch);
393 touch++;
394 }
395
396
397 DPRINT(("entering match loop, pos %d, str_byte %p\n", pos, str_byte));
398 DPRINT(("pos:chr/code | state and tags\n"));
399 DPRINT(("-------------+------------------------------------------------\n"));
400
401 if (state == NULL)
402 goto backtrack;
403
404 while (/*CONSTCOND*/1)
405 {
406 tre_tnfa_transition_t *next_state;
407 int empty_br_match;
408
409 DPRINT(("start loop\n"));
410
411 if (match_eo >= 0 && tnfa->num_minimals)
412 {
413 int skip = 0;
414 #ifdef TRE_DEBUG
415 DPRINT(("Checking minimal conditions: match_eo=%d match_tags=",
416 match_eo));
417 tre_print_tags(match_tags, tnfa->num_tags);
418 DPRINT(("\n"));
419 #endif /* TRE_DEBUG */
420 for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
421 {
422 int end = tnfa->minimal_tags[i];
423 int start = tnfa->minimal_tags[i + 1];
424 DPRINT((" Minimal start %d, end %d\n", start, end));
425 if (tre_minimal_tag_order(start, end, match_tags, tags) > 0)
426 {
427 skip = 1;
428 break;
429 }
430 }
431 if (!skip)
432 {
433 #ifdef TRE_DEBUG
434 DPRINT((" Keeping tags="));
435 tre_print_tags(tags, tnfa->num_tags);
436 DPRINT(("\n"));
437 #endif /* TRE_DEBUG */
438 }
439 else
440 {
441 #ifdef TRE_DEBUG
442 DPRINT((" Throwing out tags="));
443 tre_print_tags(tags, tnfa->num_tags);
444 DPRINT(("\n"));
445 #endif /* TRE_DEBUG */
446 goto backtrack;
447 }
448 }
449
450 if (state == tnfa->final)
451 {
452 DPRINT((" match found, match_eo=%d pos=%d\n", match_eo, pos));
453
454 if (match_eo >= 0 && tnfa->num_minimals)
455 {
456 int compare = 0;
457 #ifdef TRE_DEBUG
458 DPRINT(("Checking minimal conditions: match_eo=%d "
459 "match_tags=", match_eo));
460 tre_print_tags(match_tags, tnfa->num_tags);
461 DPRINT(("\n"));
462 #endif /* TRE_DEBUG */
463 for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
464 {
465 int end = tnfa->minimal_tags[i];
466 int start = tnfa->minimal_tags[i + 1];
467 DPRINT((" Minimal start %d, end %d\n", start, end));
468 if ((compare = tre_minimal_tag_order(start, end,
469 match_tags, tags)) != 0)
470 break;
471 }
472 if (compare > 0)
473 {
474 #ifdef TRE_DEBUG
475 DPRINT((" Throwing out new match, tags="));
476 tre_print_tags(tags, tnfa->num_tags);
477 DPRINT(("\n"));
478 #endif /* TRE_DEBUG */
479 goto backtrack;
480 }
481 else if (compare < 0)
482 {
483 #ifdef TRE_DEBUG
484 DPRINT((" Throwing out old match, tags="));
485 tre_print_tags(match_tags, tnfa->num_tags);
486 DPRINT(("\n"));
487 #endif /* TRE_DEBUG */
488 match_eo = -1;
489 }
490 }
491
492 if (match_eo < pos
493 || (match_eo == pos
494 && match_tags
495 && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
496 tags, match_tags)))
497 {
498 /* This match wins the previous match. */
499 #ifdef TRE_DEBUG
500 DPRINT((" win previous tags="));
501 tre_print_tags(tags, tnfa->num_tags);
502 DPRINT(("\n"));
503 #endif /* TRE_DEBUG */
504 match_eo = pos;
505 if (match_tags)
506 memcpy(match_tags, tags, num_tags * sizeof(*tags));
507 }
508 /* Our TNFAs never have transitions leaving from the final state,
509 so we jump right to backtracking. */
510 goto backtrack;
511 }
512
513 #ifdef TRE_DEBUG
514 DPRINT(("%3d:%2lc/%05d | %p ", pos, (tre_cint_t)next_c, (int)next_c,
515 state));
516 tre_print_tags(tags, tnfa->num_tags);
517 DPRINT(("\n"));
518 #endif /* TRE_DEBUG */
519
520 /* Go to the next character in the input string. */
521 empty_br_match = 0;
522 trans_i = state;
523 if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
524 {
525 /* This is a back reference state. All transitions leaving from
526 this state have the same back reference "assertion". Instead
527 of reading the next character, we match the back reference. */
528 int so, eo, bt = trans_i->u.backref;
529 int bt_len;
530 int result;
531
532 DPRINT((" should match back reference %d\n", bt));
533 /* Get the substring we need to match against. Remember to
534 turn off REG_NOSUB temporarily. */
535 ret = tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
536 tnfa, tags, pos);
537 if (ret != REG_OK) goto error_exit;
538 so = pmatch[bt].rm_so;
539 eo = pmatch[bt].rm_eo;
540 bt_len = eo - so;
541
542 #ifdef TRE_DEBUG
543 {
544 int slen;
545 if (len < 0)
546 slen = bt_len;
547 else
548 slen = MIN(bt_len, len - pos);
549
550 if (type == STR_BYTE)
551 {
552 DPRINT((" substring (len %d) is [%d, %d]: '%.*s'\n",
553 bt_len, so, eo, bt_len, (char*)string + so));
554 DPRINT((" current string is '%.*s'\n", slen, str_byte - 1));
555 }
556 #ifdef TRE_WCHAR
557 else if (type == STR_WIDE)
558 {
559 DPRINT((" substring (len %d) is [%d, %d]: '%.*" STRF "'\n",
560 bt_len, so, eo, bt_len, (wchar_t*)string + so));
561 DPRINT((" current string is '%.*" STRF "'\n",
562 slen, str_wide - 1));
563 }
564 #endif /* TRE_WCHAR */
565 }
566 #endif
567
568 if (so < 0)
569 {
570 result = 1; /* Back reference of nomatch doesn't match */
571 }
572 else if (len < 0)
573 {
574 #ifdef TRE_STR_USER
575 if (type == STR_USER)
576 result = str_source->compare((unsigned)so, (unsigned)pos,
577 (unsigned)bt_len,
578 str_source->context);
579 else
580 #endif /* TRE_STR_USER */
581 #ifdef TRE_WCHAR
582 if (type == STR_WIDE)
583 result = wcsncmp((const wchar_t*)string + so, str_wide - 1,
584 (size_t)bt_len);
585 else
586 #endif /* TRE_WCHAR */
587 result = strncmp((const char*)string + so, str_byte - 1,
588 (size_t)bt_len);
589 }
590 else if (len - pos < bt_len)
591 result = 1;
592 #ifdef TRE_WCHAR
593 else if (type == STR_WIDE)
594 result = wmemcmp((const wchar_t*)string + so, str_wide - 1,
595 (size_t)bt_len);
596 #endif /* TRE_WCHAR */
597 else
598 result = memcmp((const char*)string + so, str_byte - 1,
599 (size_t)bt_len);
600
601 if (result == 0)
602 {
603 /* Back reference matched. Check for infinite loop. */
604 if (bt_len == 0)
605 empty_br_match = 1;
606 if (empty_br_match && states_seen[trans_i->state_id])
607 {
608 DPRINT((" avoid loop\n"));
609 goto backtrack;
610 }
611
612 states_seen[trans_i->state_id] = empty_br_match;
613
614 /* Advance in input string and resync `prev_c', `next_c'
615 and pos. */
616 DPRINT((" back reference matched\n"));
617 str_byte += bt_len - 1;
618 #ifdef TRE_WCHAR
619 str_wide += bt_len - 1;
620 #endif /* TRE_WCHAR */
621 pos += bt_len - 1;
622 GET_NEXT_WCHAR();
623 DPRINT((" pos now %d\n", pos));
624 }
625 else
626 {
627 DPRINT((" back reference did not match\n"));
628 goto backtrack;
629 }
630 }
631 else
632 {
633 /* Check for end of string. */
634 if (len < 0)
635 {
636 #ifdef TRE_STR_USER
637 if (type == STR_USER)
638 {
639 if (str_user_end)
640 goto backtrack;
641 }
642 else
643 #endif /* TRE_STR_USER */
644 if (next_c == L'\0')
645 goto backtrack;
646 }
647 else
648 {
649 if (pos >= len)
650 goto backtrack;
651 }
652
653 /* Read the next character. */
654 GET_NEXT_WCHAR();
655 }
656
657 next_state = NULL;
658 for (trans_i = state; trans_i->state; trans_i++)
659 {
660 DPRINT((" transition %d-%d (%c-%c) %d to %d\n",
661 trans_i->code_min, trans_i->code_max,
662 trans_i->code_min, trans_i->code_max,
663 trans_i->assertions, trans_i->state_id));
664 if (trans_i->code_min <= (tre_cint_t)prev_c
665 && trans_i->code_max >= (tre_cint_t)prev_c)
666 {
667 if (trans_i->assertions
668 && (CHECK_ASSERTIONS(trans_i->assertions)
669 || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
670 {
671 DPRINT((" assertion failed\n"));
672 continue;
673 }
674
675 if (next_state == NULL)
676 {
677 /* First matching transition. */
678 DPRINT((" Next state is %d\n", trans_i->state_id));
679 next_state = trans_i->state;
680 next_tags = trans_i->tags;
681 }
682 else
683 {
684 /* Second matching transition. We may need to backtrack here
685 to take this transition instead of the first one, so we
686 push this transition in the backtracking stack so we can
687 jump back here if needed. */
688 DPRINT((" saving state %d for backtracking\n",
689 trans_i->state_id));
690 BT_STACK_PUSH(pos, pos_add_next, str_byte, str_wide,
691 trans_i->state, trans_i->state_id, next_c,
692 tags, mbstate);
693 {
694 int *tmp;
695 for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
696 tre_tag_set(stack->item.tags, *tmp, pos, touch);
697 touch++;
698 }
699 #if 0 /* XXX - it's important not to look at all transitions here to keep
700 the stack small! */
701 break;
702 #endif
703 }
704 }
705 }
706
707 if (next_state != NULL)
708 {
709 /* Matching transitions were found. Take the first one. */
710 state = next_state;
711
712 /* Update the tag values. */
713 if (next_tags)
714 {
715 while (*next_tags >= 0)
716 tre_tag_set(tags, *next_tags++, pos, touch);
717 touch++;
718 }
719 }
720 else
721 {
722 backtrack:
723 /* A matching transition was not found. Try to backtrack. */
724 if (stack->prev)
725 {
726 DPRINT((" backtracking\n"));
727 if (stack->item.state->assertions & ASSERT_BACKREF)
728 {
729 DPRINT((" states_seen[%d] = 0\n",
730 stack->item.state_id));
731 states_seen[stack->item.state_id] = 0;
732 }
733
734 BT_STACK_POP();
735 }
736 else if (match_eo < 0)
737 {
738 /* Try starting from a later position in the input string. */
739 /* Check for end of string. */
740 if (pos == pos_start)
741 {
742 if (len < 0)
743 {
744 if (next_c == L'\0')
745 {
746 DPRINT(("end of string.\n"));
747 break;
748 }
749 }
750 else
751 {
752 if (pos >= len)
753 {
754 DPRINT(("end of string.\n"));
755 break;
756 }
757 }
758 }
759 DPRINT(("restarting from next start position\n"));
760 next_c = next_c_start;
761 #ifdef TRE_MBSTATE
762 mbstate = mbstate_start;
763 #endif /* TRE_MBSTATE */
764 str_byte = str_byte_start;
765 #ifdef TRE_WCHAR
766 str_wide = str_wide_start;
767 #endif /* TRE_WCHAR */
768 goto retry;
769 }
770 else
771 {
772 DPRINT(("finished\n"));
773 break;
774 }
775 }
776 }
777
778 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
779 *match_end_ofs = match_eo;
780
781 error_exit:
782 tre_bt_mem_destroy(mem);
783 #ifndef TRE_USE_ALLOCA
784 if (buf)
785 xfree(buf);
786 #endif /* !TRE_USE_ALLOCA */
787
788 return ret;
789 }