]> git.saurik.com Git - apple/libc.git/blob - regex/TRE/lib/tre-compile.c
Libc-1439.100.3.tar.gz
[apple/libc.git] / regex / TRE / lib / tre-compile.c
1 /*
2 tre-compile.c - TRE regex compiler
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 /*
10 TODO:
11 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12 function calls.
13 */
14
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif /* HAVE_CONFIG_H */
19 #include <stdio.h>
20 #include <assert.h>
21 #include <string.h>
22 #include <limits.h>
23
24 #include "tre-internal.h"
25 #include "tre-mem.h"
26 #include "tre-stack.h"
27 #include "tre-ast.h"
28 #include "tre-parse.h"
29 #include "tre-compile.h"
30 #include "tre.h"
31 #include "tre-last-matched.h"
32 #include "xmalloc.h"
33
34 #pragma clang diagnostic push
35 #pragma clang diagnostic ignored "-Wunreachable-code"
36
37 /*
38 The bit_ffs() macro in bitstring.h is flawed. Replace it with a working one.
39 */
40 #undef bit_ffs
41 #define bit_ffs(name, nbits, value) { \
42 register bitstr_t *_name = name; \
43 register int _byte, _nbits = nbits; \
44 register int _stopbyte = _bit_byte(_nbits), _value = -1; \
45 for (_byte = 0; _byte <= _stopbyte; ++_byte) \
46 if (_name[_byte]) { \
47 _value = _byte << 3; \
48 for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
49 ++_value, _stopbyte >>= 1); \
50 break; \
51 } \
52 *(value) = _value; \
53 }
54
55 /*
56 Algorithms to setup tags so that submatch addressing can be done.
57 */
58
59
60 #ifdef TRE_DEBUG
61 static const char *tag_dir_str[] = {
62 "minimize",
63 "maximize",
64 "left-maximize"
65 };
66
67 static const char _indent[] = " ";
68
69 static void
70 print_indent(int indent)
71 {
72 while (indent-- > 0)
73 DPRINT((_indent));
74 }
75
76 static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
77 int num_tags);
78 static void
79 print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
80 int num_tags)
81 {
82 tre_last_matched_pre_t *u = branch->last_matched;
83 int n_last_matched = 0;
84
85 while (u)
86 {
87 n_last_matched++;
88 u = u->next;
89 }
90
91 print_indent(indent);
92 DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
93 branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
94 print_indent(indent);
95 DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
96 n_last_matched));
97 if (branch->n_last_matched != n_last_matched)
98 DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
99 if (branch->cmp_tag > 0)
100 {
101 int i;
102 const char *sep = " tags=";
103 print_indent(indent);
104 DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
105 for (i = 0; i < num_tags; i++)
106 if (bit_test(branch->tags, i))
107 {
108 DPRINT(("%s%d", sep, i));
109 sep = ",";
110 }
111 DPRINT(("\n"));
112 }
113
114 u = branch->last_matched;
115 indent++;
116 while (u)
117 {
118 print_last_matched_pre(u, indent, num_tags);
119 u = u->next;
120 }
121 }
122
123 static void
124 print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
125 {
126 tre_last_matched_branch_pre_t *b = lm->branches;
127 int n_branches = 0;
128
129 while (b)
130 {
131 n_branches++;
132 b = b->next;
133 }
134
135 print_indent(indent);
136 DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
137 lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
138 print_indent(indent);
139 DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
140 lm->n_branches, n_branches));
141 if (lm->n_branches != n_branches)
142 DPRINT(("*** mismatch between n and branches ***\n"));
143
144 b = lm->branches;
145 indent++;
146 while (b)
147 {
148 print_last_match_branch_pre(b, indent, num_tags);
149 b = b->next;
150 }
151 }
152
153 static void print_last_matched(tre_last_matched_t *lm, int indent);
154 static void
155 print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
156 {
157 tre_last_matched_t *u;
158 int i;
159
160 print_indent(indent);
161 DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
162 if (branch->cmp_tag > 0)
163 {
164 print_indent(indent);
165 DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
166 if (branch->n_tags > 0)
167 {
168 const char *sep = " tags=";
169 for (i = 0; i < branch->n_tags; i++)
170 {
171 DPRINT(("%s%d", sep, branch->tags[i]));
172 sep = ",";
173 }
174 }
175 DPRINT(("\n"));
176 }
177
178 u = branch->last_matched;
179 indent++;
180 for (i = branch->n_last_matched; i > 0; i--, u++)
181 print_last_matched(u, indent);
182 }
183
184 static void
185 print_last_matched(tre_last_matched_t *lm, int indent)
186 {
187 int i;
188 tre_last_matched_branch_t *b;
189
190 print_indent(indent);
191 DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
192 lm->start_tag));
193
194 b = lm->branches;
195 indent++;
196 for (i = lm->n_branches; i > 0; i--, b++)
197 print_last_match_branch(b, indent);
198 }
199 #endif /* TRE_DEBUG */
200
201
202 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
203 one if needed. If tag_id > 0, add that tag as well (a negative tag_id will
204 create an unset tre_last_matched_branch_pre_t. */
205 static reg_errcode_t
206 tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
207 int tag_id, int num_tags)
208 {
209 tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
210 tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
211
212 if (db)
213 {
214 if (sb)
215 {
216 bitstr_t *l = db->tags;
217 bitstr_t *r = sb->tags;
218 int i = bitstr_size(num_tags);
219
220 while(i-- > 0)
221 *l++ |= *r++;
222 /* db and sb are the info from two parallel sub-trees, so the tags
223 must be mutually exclusive, and we can just add their numbers */
224 db->n_tags += sb->n_tags;
225 db->tot_tags += sb->tot_tags;
226 if (db->last_matched)
227 {
228 if (sb->last_matched)
229 {
230 tre_last_matched_pre_t *u = db->last_matched;
231
232 while(u->next)
233 u = u->next;
234 u->next = sb->last_matched;
235 db->n_last_matched += sb->n_last_matched;
236 db->tot_branches += sb->tot_branches;
237 db->tot_last_matched += sb->tot_last_matched;
238 }
239 }
240 else if (sb->last_matched)
241 {
242 db->last_matched = sb->last_matched;
243 db->n_last_matched = sb->n_last_matched;
244 db->tot_branches = sb->tot_branches;
245 db->tot_last_matched = sb->tot_last_matched;
246 }
247 }
248 }
249 else
250 db = sb;
251
252 if (tag_id != 0)
253 {
254 if (!db)
255 {
256 db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
257 + bitstr_size(num_tags));
258 if (db == NULL)
259 return REG_ESPACE;
260 db->tot_branches = 1;
261 }
262 if (tag_id > 0)
263 {
264 /* tag_id is a new tag, and shouldn't exist in db's tags,
265 so we can always increment n_tags */
266 bit_set(db->tags, tag_id);
267 db->n_tags++;
268 db->tot_tags++;
269 }
270 }
271 dst->last_matched_branch = db;
272 return REG_OK;
273 }
274
275
276 /* Inserts a catenation node to the root of the tree given in `node'.
277 As the left child a new tag with number `tag_id' to `node' is added,
278 and the right child is the old root. */
279 static reg_errcode_t
280 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
281 {
282 tre_catenation_t *c;
283
284 DPRINT(("add_tag_left: tag %d\n", tag_id));
285
286 c = tre_mem_alloc(mem, sizeof(*c));
287 if (c == NULL)
288 return REG_ESPACE;
289 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
290 if (c->left == NULL)
291 return REG_ESPACE;
292 c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
293 if (c->right == NULL)
294 return REG_ESPACE;
295
296 c->right->obj = node->obj;
297 c->right->type = node->type;
298 c->right->last_matched_branch = node->last_matched_branch;
299 c->right->nullable = -1;
300 c->right->submatch_id = -1;
301 node->obj = c;
302 node->type = CATENATION;
303 node->original = c->right;
304 return REG_OK;
305 }
306
307 /* Inserts a catenation node to the root of the tree given in `node'.
308 As the right child a new tag with number `tag_id' to `node' is added,
309 and the left child is the old root. */
310 static reg_errcode_t
311 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
312 {
313 tre_catenation_t *c;
314
315 DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
316
317 c = tre_mem_alloc(mem, sizeof(*c));
318 if (c == NULL)
319 return REG_ESPACE;
320 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
321 if (c->right == NULL)
322 return REG_ESPACE;
323 c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
324 if (c->left == NULL)
325 return REG_ESPACE;
326
327 c->left->obj = node->obj;
328 c->left->type = node->type;
329 c->left->last_matched_branch = node->last_matched_branch;
330 c->left->nullable = -1;
331 c->left->submatch_id = -1;
332 node->obj = c;
333 node->type = CATENATION;
334 node->original = c->left;
335 return REG_OK;
336 }
337
338 typedef enum {
339 ADDTAGS_RECURSE,
340 ADDTAGS_RECURSE_NOT_TOP_UNION,
341 ADDTAGS_AFTER_ITERATION,
342 ADDTAGS_AFTER_UNION_LEFT,
343 ADDTAGS_AFTER_UNION_RIGHT,
344 ADDTAGS_AFTER_CAT_LEFT,
345 ADDTAGS_AFTER_CAT_RIGHT,
346 ADDTAGS_SET_SUBMATCH_END,
347 ADDTAGS_UNION_RECURSE,
348 ADDTAGS_UNION_RIGHT_RECURSE,
349 ADDTAGS_AFTER_UNION_TOP,
350 } tre_addtags_symbol_t;
351
352 enum {
353 COPY_LAST_MATCHED_BRANCH,
354 COPY_LAST_MATCHED_BRANCH_NEXT,
355 COPY_LAST_MATCHED,
356 COPY_LAST_MATCHED_NEXT,
357 };
358
359
360 #define REGSET_UNSET ((unsigned)-1)
361
362 /* Go through `regset' and set submatch data for submatches that are
363 using this tag. */
364 static void
365 tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
366 {
367 int i;
368
369 for (i = 0; regset[i] != REGSET_UNSET; i++)
370 {
371 int id = regset[i] / 2;
372 int start = !(regset[i] % 2);
373 if (id >= SUBMATCH_ID_INVISIBLE_START)
374 continue;
375 DPRINT((" Using tag %d for %s offset of "
376 "submatch %d\n", tag,
377 start ? "start" : "end", id));
378 if (start)
379 tnfa->submatch_data[id].so_tag = tag;
380 else
381 tnfa->submatch_data[id].eo_tag = tag;
382 }
383 regset[0] = -1;
384 }
385
386
387 #define REGSET_HAS_STARTS 0x1
388 #define REGSET_HAS_ENDS 0x2
389
390
391 /* Adds tags to appropriate locations in the parse tree in `tree', so that
392 subexpressions marked for submatch addressing can be traced. */
393 static reg_errcode_t
394 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
395 tre_tnfa_t *tnfa)
396 {
397 reg_errcode_t status = REG_OK;
398 tre_addtags_symbol_t symbol;
399 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
400 int bottom = tre_stack_num_objects(stack);
401 /* True for first pass (counting number of needed tags) */
402 int first_pass = (mem == NULL || tnfa == NULL);
403 unsigned *regset, *orig_regset;
404 int regset_contains = 0;
405 int num_tags = 0; /* Total number of tags. */
406 int num_minimals = 0; /* Number of special minimal tags. */
407 int tag = 0; /* The tag that is to be added next. */
408 int next_tag = 1; /* Next tag to use after this one. */
409 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
410 int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
411 * the first is the tag to reorder, the second
412 * is the tag after which the first is reordered */
413 int *rtp; /* Pointer used to fill in reorder_tags and
414 * tag_order */
415 int *to_reorder; /* Transform array converting sequential order to
416 * that specified by reorder_tags */
417 int id;
418
419 tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
420 if (!first_pass)
421 {
422 DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
423 tnfa->end_tag = 0;
424 tnfa->minimal_tags[0] = -1;
425 }
426
427 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
428 + tnfa->num_submatches_invisible + 1) * 2));
429 if (regset == NULL)
430 {
431 status = REG_ESPACE;
432 goto error_regset;
433 }
434 regset[0] = REGSET_UNSET;
435 orig_regset = regset;
436
437 if (!first_pass)
438 {
439 /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
440 * to_reorder in one batch (assuming all are the same type) */
441 rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
442 ((2 * tnfa->num_reorder_tags + 1) +
443 tnfa->num_tags));
444 if (reorder_tags == NULL)
445 {
446 status = REG_ESPACE;
447 goto error_reorder_tags;
448 }
449 to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
450 }
451
452 STACK_PUSH(stack, voidptr, node);
453 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
454
455 while (tre_stack_num_objects(stack) > bottom)
456 {
457 if (status != REG_OK)
458 break;
459
460 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
461 switch (symbol)
462 {
463 int top_union;
464
465 case ADDTAGS_SET_SUBMATCH_END:
466 {
467 int i;
468
469 id = tre_stack_pop_int(stack);
470 node = tre_stack_pop_voidptr(stack);
471 /* Add end of this submatch to regset. */
472 for (i = 0; regset[i] != REGSET_UNSET; i++);
473 regset[i] = id * 2 + 1;
474 regset[i + 1] = -1;
475 regset_contains |= REGSET_HAS_ENDS;
476
477 /* Always put a tag after a minimal iterator. */
478 if (minimal_tag >= 0)
479 {
480 if (first_pass)
481 {
482 node->num_tags++;
483 DPRINT((" ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
484 node->num_tags));
485 }
486 else
487 {
488 int i;
489 status = tre_merge_branches(mem, node, NULL, tag,
490 tnfa->num_tags);
491 if (status != REG_OK)
492 break;
493 status = tre_add_tag_right(mem, node, tag);
494 if (status != REG_OK)
495 break;
496 tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
497 DPRINT(("Setting t%d direction to %s\n", tag,
498 tag_dir_str[tnfa->tag_directions[tag]]));
499 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
500 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
501 tnfa->minimal_tags[i] = tag;
502 tnfa->minimal_tags[i + 1] = minimal_tag;
503 tnfa->minimal_tags[i + 2] = -1;
504
505 DPRINT((" Minimal end: t%d reordered to "
506 "after t%d\n", tag, minimal_tag));
507 /* Append to tag_order, move "tag" after
508 * "minimal_tag" */
509 *rtp++ = tag;
510 *rtp++ = minimal_tag;
511
512 num_minimals++;
513 tre_purge_regset(regset, tnfa, tag);
514 }
515
516 minimal_tag = -1;
517 DPRINT((" ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
518 regset[0] = REGSET_UNSET;
519 regset_contains = 0;
520 tag = next_tag;
521 num_tags++;
522 next_tag++;
523 }
524 break;
525 }
526
527 case ADDTAGS_RECURSE_NOT_TOP_UNION:
528 /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
529 * indicating that if a union is being processed, it is not the
530 * top-most of a series */
531 top_union = 0;
532 goto do_addtags_recurse;
533
534 case ADDTAGS_RECURSE:
535 /* Setting top_union to 1 means that if a union is begin processed,
536 * it is the top-most of a series, and should recurse through the
537 * series to set the left_tag and right_tag values */
538 top_union = 1;
539
540 do_addtags_recurse:
541 node = tre_stack_pop_voidptr(stack);
542
543 id = node->submatch_id;
544 if (id >= 0)
545 {
546 int i;
547
548
549 /* Add start of this submatch to regset. */
550 for (i = 0; regset[i] != REGSET_UNSET; i++);
551 regset[i] = id * 2;
552 regset[i + 1] = -1;
553 regset_contains |= REGSET_HAS_STARTS;
554
555 /* Add end of this submatch to regset after processing this
556 node. */
557 STACK_PUSH(stack, voidptr, node);
558 STACK_PUSHX(stack, int, id);
559 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
560 }
561
562 switch (node->type)
563 {
564 case LITERAL:
565 {
566 tre_literal_t *lit = node->obj;
567
568 if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
569 {
570 DPRINT(("Literal %d-%d\n",
571 (int)lit->code_min, (int)lit->code_max));
572 if (regset_contains)
573 {
574 /* Regset is not empty, so add a tag before the
575 literal or backref. */
576 if (first_pass)
577 {
578 DPRINT((" ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
579 node->num_tags = 1;
580 }
581 else
582 {
583 status = tre_merge_branches(mem, node, NULL, tag,
584 tnfa->num_tags);
585 if (status != REG_OK)
586 break;
587 status = tre_add_tag_left(mem, node, tag);
588 if (status != REG_OK)
589 break;
590 if (regset_contains == REGSET_HAS_STARTS)
591 tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
592 else
593 tnfa->tag_directions[tag] = direction;
594 DPRINT(("Setting t%d direction to %s\n", tag,
595 tag_dir_str[tnfa->tag_directions[tag]]));
596 tre_purge_regset(regset, tnfa, tag);
597
598 if (IS_BACKREF(lit))
599 {
600 int b = lit->code_max;
601 int t = tnfa->submatch_data[b].so_tag;
602 /* Fail if the referenced submatch hasn't been
603 * completed yet */
604 if (tnfa->submatch_data[b].eo_tag < 0)
605 {
606 status = REG_ESUBREG;
607 break;
608 }
609 if (t < tag)
610 {
611 DPRINT((" Backref %d start: "
612 "t%d reordered to before t%d\n",
613 b, tag, t));
614 if(t > 0)
615 t--;
616 /* Append to tag_order, move "tag" after
617 * "t" */
618 *rtp++ = tag;
619 *rtp++ = t;
620 }
621 #if TRE_DEBUG
622 else
623 DPRINT((" Backref %d start: "
624 "(t%d already before t%d)\n",
625 b, tag, t));
626 #endif /* TRE_DEBUG */
627 }
628 }
629
630 DPRINT((" ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
631 tag));
632 regset[0] = REGSET_UNSET;
633 regset_contains = 0;
634 tag = next_tag;
635 num_tags++;
636 next_tag++;
637 }
638 }
639 else
640 {
641 assert(!IS_TAG(lit));
642 }
643 break;
644 }
645 case CATENATION:
646 {
647 tre_catenation_t *cat = node->obj;
648 tre_ast_node_t *left = cat->left;
649 tre_ast_node_t *right = cat->right;
650 int reserved_tag = -1;
651 DPRINT(("Catenation, next_tag = %d\n", next_tag));
652
653
654 /* After processing right child. */
655 STACK_PUSHX(stack, voidptr, node);
656 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
657
658 /* Process right child. */
659 STACK_PUSHX(stack, voidptr, right);
660 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
661
662 /* After processing left child. */
663 STACK_PUSHX(stack, int, next_tag + left->num_tags);
664 DPRINT((" Pushing %d for after left\n",
665 next_tag + left->num_tags));
666 if (left->num_tags > 0 && right->num_tags > 0)
667 {
668 /* Reserve the next tag to the right child. */
669 DPRINT((" ADDTAGS_RECURSE:CATENATION num_tags++ "
670 "Reserving next_tag %d to right child\n",
671 next_tag));
672 reserved_tag = next_tag;
673 next_tag++;
674 }
675 STACK_PUSHX(stack, int, reserved_tag);
676 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
677
678 /* Process left child. */
679 STACK_PUSHX(stack, voidptr, left);
680 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
681
682 }
683 break;
684 case ITERATION:
685 {
686 tre_iteration_t *iter = node->obj;
687 DPRINT(("Iteration\n"));
688
689 if (first_pass)
690 STACK_PUSHX(stack, int, regset_contains != 0);
691 STACK_PUSHX(stack, int, tag);
692 STACK_PUSHX(stack, voidptr, node);
693 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
694
695 STACK_PUSHX(stack, voidptr, iter->arg);
696 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
697
698 /* Regset is not empty, so add a tag here (this always happens
699 because iterators always get submatch id, even if in the
700 invisible range) */
701 if (regset_contains)
702 {
703 if (!first_pass)
704 {
705 status = tre_merge_branches(mem, node, NULL, tag,
706 tnfa->num_tags);
707 if (status != REG_OK)
708 break;
709 status = tre_add_tag_left(mem, node, tag);
710 if (status != REG_OK)
711 break;
712 if (regset_contains == REGSET_HAS_STARTS && tag != 0)
713 tnfa->tag_directions[tag] = iter->minimal ?
714 TRE_TAG_MINIMIZE :
715 TRE_TAG_LEFT_MAXIMIZE;
716 else
717 tnfa->tag_directions[tag] = direction;
718 DPRINT(("Setting t%d direction to %s\n", tag,
719 tag_dir_str[tnfa->tag_directions[tag]]));
720 tre_purge_regset(regset, tnfa, tag);
721 }
722
723 DPRINT((" ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
724 tag));
725 regset[0] = REGSET_UNSET;
726 regset_contains = 0;
727 tag = next_tag;
728 num_tags++;
729 next_tag++;
730 }
731 direction = TRE_TAG_LEFT_MAXIMIZE;
732 DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
733 }
734 break;
735 case UNION:
736 {
737 tre_union_t *uni;
738 tre_ast_node_t *left;
739 tre_ast_node_t *right;
740 int front_tag = -1;
741
742 DPRINT(("Union\n"));
743
744 if (regset_contains)
745 {
746 DPRINT((" UNION num_tags++ tag=%d\n", tag));
747 front_tag = tag;
748 tag = next_tag;
749 num_tags++;
750 next_tag++;
751 }
752
753 /* For the top union, walk the tree of consecutive unions,
754 * setting the left_tag and right_tag values in increasing
755 * order (left to right priority) */
756 if (top_union &&
757 (node->num_submatches -
758 (node->submatch_id >= 0 &&
759 node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
760 {
761 tre_ast_node_t *n;
762 int last = tre_stack_num_objects(stack);
763
764 STACK_PUSH(stack, voidptr, node);
765 STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
766
767 while (tre_stack_num_objects(stack) > last)
768 {
769 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
770 switch (symbol)
771 {
772 case ADDTAGS_UNION_RECURSE:
773 n = tre_stack_pop_voidptr(stack);
774 uni = n->obj;
775 left = uni->left;
776
777 /* Since the top union has num_submatches > 0,
778 * we set all the consecutive union's
779 * make_branches to 1 to force the generation
780 * of end tags for each union branch. */
781 n->make_branches = 1;
782
783 STACK_PUSH(stack, voidptr, n);
784 STACK_PUSH(stack, int,
785 ADDTAGS_UNION_RIGHT_RECURSE);
786
787 if (left->type == UNION)
788 {
789 STACK_PUSH(stack, voidptr, left);
790 STACK_PUSH(stack, int,
791 ADDTAGS_UNION_RECURSE);
792 }
793 else
794 {
795 DPRINT((" ADDTAGS_UNION_RECURSE "
796 "num_tags++ tag=%d\n", tag));
797 uni->left_tag = tag;
798 tag = next_tag;
799 num_tags++;
800 next_tag++;
801 }
802 break;
803
804 case ADDTAGS_UNION_RIGHT_RECURSE:
805 n = tre_stack_pop_voidptr(stack);
806 uni = n->obj;
807 right = uni->right;
808
809 if (right->type == UNION)
810 {
811 STACK_PUSH(stack, voidptr, right);
812 STACK_PUSH(stack, int,
813 ADDTAGS_UNION_RECURSE);
814 }
815 else
816 {
817 DPRINT((" ADDTAGS_UNION_RIGHT_RECURSE "
818 "num_tags++ tag=%d\n", tag));
819 uni->right_tag = tag;
820 tag = next_tag;
821 num_tags++;
822 next_tag++;
823 }
824
825 break;
826
827 default:
828 assert(0);
829 break;
830
831 } /* end switch(symbol) */
832 } /* end while(tre_stack_num_objects(stack) > last */
833 if (!first_pass)
834 {
835 STACK_PUSHX(stack, int, front_tag);
836 STACK_PUSHX(stack, voidptr, node);
837 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
838 }
839 } /* end if (top_union && ...) */
840
841 uni = node->obj;
842 left = uni->left;
843 right = uni->right;
844
845 /* After processing right child. */
846 STACK_PUSHX(stack, voidptr, regset);
847 STACK_PUSHX(stack, int, regset_contains != 0);
848 STACK_PUSHX(stack, voidptr, node);
849 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
850
851 /* Process right child. */
852 STACK_PUSHX(stack, voidptr, right);
853 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
854
855 /* After processing left child. */
856 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
857
858 /* Process left child. */
859 STACK_PUSHX(stack, voidptr, left);
860 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
861
862 /* Regset is not empty, so add a tag here. */
863 if (regset_contains)
864 {
865 if (!first_pass)
866 {
867 status = tre_merge_branches(mem, node, NULL, front_tag,
868 tnfa->num_tags);
869 if (status != REG_OK)
870 break;
871 status = tre_add_tag_left(mem, node, front_tag);
872 if (status != REG_OK)
873 break;
874 if (regset_contains == REGSET_HAS_STARTS)
875 tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
876 else
877 tnfa->tag_directions[front_tag] = direction;
878 DPRINT(("Setting t%d direction to %s\n", front_tag,
879 tag_dir_str[tnfa->tag_directions[front_tag]]));
880 tre_purge_regset(regset, tnfa, front_tag);
881 }
882
883 regset[0] = REGSET_UNSET;
884 regset_contains = 0;
885 }
886
887 break;
888 }
889 } /* end switch (node->type) */
890
891 break; /* end case: ADDTAGS_RECURSE */
892
893 case ADDTAGS_AFTER_ITERATION:
894 {
895 tre_iteration_t *iter;
896 tre_ast_node_t *orig;
897 int enter_tag;
898
899 node = tre_stack_pop_voidptr(stack);
900 orig = node->original ? node->original : node;
901 iter = (tre_iteration_t *)orig->obj;
902 enter_tag = tre_stack_pop_int(stack);
903 if (iter->minimal)
904 minimal_tag = enter_tag;
905
906 DPRINT(("After iteration\n"));
907 if (first_pass)
908 {
909 node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
910 DPRINT((" ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
911 node->num_tags));
912 }
913 else
914 {
915 /* node->last_matched_branch will have the start tag (the tag
916 just *before* the iteration). iter->arg->last_matched_branch
917 will have the tag(s) inside the iteration, the ones that
918 may need to be reset if the iteration doesn't match. So
919 before we merge iter->arg into node, we need to set up
920 a new tre_last_matched_t and tre_last_matched_branch_t,
921 using any of the inside tags as cmp_tag (we choose the first
922 tag found by bit_ffs). If there are no inside tags, we
923 don't bother creating the extra structures. */
924 tre_last_matched_branch_pre_t *b =
925 iter->arg->last_matched_branch;
926
927 if (b && b->n_tags > 0)
928 {
929 tre_last_matched_pre_t *u;
930
931 bit_ffs(b->tags, num_tags, &b->cmp_tag);
932 DPRINT((" ADDTAGS_AFTER_ITERATION: n_tags=%d "
933 "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
934
935 u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
936 sizeof(tre_last_matched_branch_pre_t)
937 + bitstr_size(tnfa->num_tags));
938 if (!u)
939 {
940 status = REG_ESPACE;
941 break;
942 }
943 u->branches = b;
944 u->n_branches = 1;
945 u->start_tag = b->cmp_tag;
946 u->tot_branches = b->tot_branches;
947 u->tot_last_matched = 1 + b->tot_last_matched;
948 u->tot_tags = b->tot_tags;
949
950 b = (tre_last_matched_branch_pre_t *)(u + 1);
951 b->last_matched = u;
952 b->n_last_matched = 1;
953 b->tot_branches = 1 + u->tot_branches;
954 b->tot_last_matched = u->tot_last_matched;
955 b->tot_tags = u->tot_tags;
956
957 iter->arg->last_matched_branch = b;
958 }
959 status = tre_merge_branches(mem, node, iter->arg, 0,
960 tnfa->num_tags);
961 if (status != REG_OK)
962 break;
963
964 if (iter->minimal)
965 {
966 /* Add a union with a left EMPTY literal and the right
967 being iter->arg. This should force the tags inside
968 the minimal iteration to prefer being unset */
969 if (iter->min == 0 && iter->max <= 1)
970 {
971 tre_ast_node_t *u, *e;
972
973 e = tre_ast_new_literal(mem, EMPTY, -1, -1);
974 if (e == NULL)
975 {
976 status = REG_ESPACE;
977 break;
978 }
979 u = tre_ast_new_union(mem, e, iter->arg);
980 if (u == NULL)
981 {
982 status = REG_ESPACE;
983 break;
984 }
985 iter->arg = u;
986 }
987
988 direction = TRE_TAG_MINIMIZE;
989 }
990 else
991 direction = TRE_TAG_MAXIMIZE;
992 DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
993 }
994 break;
995 }
996
997 case ADDTAGS_AFTER_CAT_LEFT:
998 {
999 int new_tag = tre_stack_pop_int(stack);
1000 next_tag = tre_stack_pop_int(stack);
1001 DPRINT(("After cat left, tag = %d, next_tag = %d\n",
1002 tag, next_tag));
1003 if (new_tag >= 0)
1004 {
1005 DPRINT((" Setting tag to %d\n", new_tag));
1006 tag = new_tag;
1007 }
1008 break;
1009 }
1010
1011 case ADDTAGS_AFTER_CAT_RIGHT:
1012 {
1013 tre_catenation_t *cat;
1014
1015 DPRINT(("After cat right\n"));
1016 node = tre_stack_pop_voidptr(stack);
1017 cat = node->obj;
1018 if (first_pass)
1019 {
1020 node->num_tags = cat->left->num_tags + cat->right->num_tags;
1021 DPRINT((" ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1022 node->num_tags));
1023 }
1024 else
1025 {
1026 status = tre_merge_branches(mem, cat->left, cat->right, 0,
1027 tnfa->num_tags);
1028 if (status != REG_OK)
1029 break;
1030 status = tre_merge_branches(mem, node, cat->left, 0,
1031 tnfa->num_tags);
1032 }
1033 break;
1034 }
1035
1036 case ADDTAGS_AFTER_UNION_LEFT:
1037 DPRINT(("After union left\n"));
1038 /* Lift the bottom of the `regset' array so that when processing
1039 the right operand the items currently in the array are
1040 invisible. The original bottom was saved at ADDTAGS_UNION and
1041 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1042 while (*regset != REGSET_UNSET)
1043 regset++;
1044 regset_contains = 0;
1045 break;
1046
1047 case ADDTAGS_AFTER_UNION_RIGHT:
1048 {
1049 int added_tags;
1050 tre_ast_node_t *orig;
1051 tre_union_t *uni;
1052 /* Note: node may not be a UNION, but a CATENATION with a left
1053 * tag. So that is why we pass the original node->obj on the
1054 * stack, to get the union's true values. */
1055
1056 DPRINT(("After union right\n"));
1057 node = tre_stack_pop_voidptr(stack);
1058 orig = node->original ? node->original : node;
1059 uni = (tre_union_t *)orig->obj;
1060 added_tags = tre_stack_pop_int(stack);
1061 if (first_pass)
1062 {
1063 node->num_tags = uni->left->num_tags + uni->right->num_tags
1064 + added_tags;
1065 if (uni->left_tag > 0)
1066 node->num_tags++;
1067 if (uni->right_tag > 0)
1068 node->num_tags++;
1069 DPRINT((" ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1070 node->num_tags));
1071 }
1072 regset = tre_stack_pop_voidptr(stack);
1073
1074 /* Add tags after both children, the left child gets a smaller
1075 tag than the right child. This guarantees that we prefer
1076 the left child over the right child. */
1077 /* XXX - This is not always necessary (if the children have
1078 tags which must be seen for every match of that child). */
1079 if (!first_pass && node->make_branches)
1080 {
1081 tre_last_matched_branch_pre_t *lb =
1082 uni->left->last_matched_branch;
1083 tre_last_matched_branch_pre_t *rb =
1084 uni->right->last_matched_branch;
1085 tre_last_matched_pre_t *lu =
1086 uni->left->last_matched_in_progress;
1087 tre_last_matched_pre_t *ru =
1088 uni->right->last_matched_in_progress;
1089 tre_last_matched_pre_t *u;
1090 /* We don't need to call tre_merge_branches because these
1091 * tags don't participate in submatch ranges, so don't need
1092 * to be recorded. But we do set the cmp_tag entry of the
1093 * tre_last_matched_branch_pre_t, so we might call
1094 * tre_merge_branches if we need to create an empty
1095 * tre_last_matched_branch_pre_t. */
1096 if (uni->left_tag > 0)
1097 {
1098 DPRINT(("Setting t%d direction to maximize\n",
1099 uni->left_tag));
1100 status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1101 if (status != REG_OK)
1102 break;
1103 tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1104 if (!lb)
1105 {
1106 status = tre_merge_branches(mem, uni->left, NULL, -1,
1107 tnfa->num_tags);
1108 if (status != REG_OK)
1109 break;
1110 lb = uni->left->last_matched_branch;
1111 }
1112 lb->cmp_tag = uni->left_tag;
1113 }
1114 if (uni->right_tag > 0)
1115 {
1116 DPRINT(("Setting t%d direction to maximize\n",
1117 uni->right_tag));
1118 status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1119 if (status != REG_OK)
1120 break;
1121 tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1122 if (!rb)
1123 {
1124 status = tre_merge_branches(mem, uni->right, NULL, -1,
1125 tnfa->num_tags);
1126 if (status != REG_OK)
1127 break;
1128 rb = uni->right->last_matched_branch;
1129 }
1130 rb->cmp_tag = uni->right_tag;
1131 }
1132 /* Now merge the tre_last_matched_branch_pre_t into a
1133 tre_last_matched_pre_t */
1134 if (lu == NULL)
1135 {
1136 if (ru == NULL)
1137 {
1138 /* Create a new tre_last_matched_pre_t */
1139 u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1140 if (!u)
1141 {
1142 status = REG_ESPACE;
1143 break;
1144 }
1145 u->tot_last_matched = 1;
1146
1147 if (lb)
1148 {
1149 u->branches = lb;
1150 u->n_branches = 1;
1151 u->tot_branches += lb->tot_branches;
1152 u->tot_last_matched += lb->tot_last_matched;
1153 u->tot_tags += lb->tot_tags;
1154 if (rb)
1155 {
1156 lb->next = rb;
1157 u->n_branches++;
1158 u->tot_branches += rb->tot_branches;
1159 u->tot_last_matched += rb->tot_last_matched;
1160 u->tot_tags += rb->tot_tags;
1161 }
1162 }
1163 else if (rb)
1164 {
1165 u->branches = rb;
1166 u->n_branches = 1;
1167 u->tot_branches += rb->tot_branches;
1168 u->tot_last_matched += rb->tot_last_matched;
1169 u->tot_tags += rb->tot_tags;
1170 }
1171 }
1172 else
1173 {
1174 /* Use ru, and add lb */
1175 u = ru;
1176 if (lb)
1177 {
1178 lb->next = u->branches;
1179 u->branches = lb;
1180 u->n_branches++;
1181 u->tot_branches += lb->tot_branches;
1182 u->tot_last_matched += lb->tot_last_matched;
1183 u->tot_tags += lb->tot_tags;
1184 }
1185 }
1186 }
1187 else if (ru == NULL)
1188 {
1189 /* Use lu, and add rb */
1190 u = lu;
1191 if (rb)
1192 {
1193 rb->next = u->branches;
1194 u->branches = rb;
1195 u->n_branches++;
1196 u->tot_branches += rb->tot_branches;
1197 u->tot_last_matched += rb->tot_last_matched;
1198 u->tot_tags += rb->tot_tags;
1199 }
1200 }
1201 else
1202 {
1203 /* Merge lu and ru into lu */
1204 if (lu->branches)
1205 {
1206 if (ru->branches)
1207 {
1208 tre_last_matched_branch_pre_t *b = lu->branches;
1209 while (b->next) b = b->next;
1210 b->next = ru->branches;
1211 lu->n_branches += ru->n_branches;
1212 }
1213 }
1214 else if (ru->branches)
1215 {
1216 lu->branches = ru->branches;
1217 lu->n_branches = ru->n_branches;
1218 }
1219 lu->tot_branches += ru->tot_branches;
1220 lu->tot_last_matched += ru->tot_last_matched - 1;
1221 lu->tot_tags += ru->tot_tags;
1222 u = lu;
1223 }
1224 node->last_matched_in_progress = u;
1225 }
1226 direction = TRE_TAG_MAXIMIZE;
1227 break;
1228 }
1229
1230 case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1231 {
1232 tre_last_matched_branch_pre_t *b;
1233 tre_last_matched_pre_t *u;
1234 int start_tag;
1235
1236 DPRINT(("After union top\n"));
1237 node = tre_stack_pop_voidptr(stack);
1238 start_tag = tre_stack_pop_int(stack);
1239 b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1240 + bitstr_size(tnfa->num_tags));
1241 if (!b)
1242 {
1243 status = REG_ESPACE;
1244 break;
1245 }
1246
1247 u = node->last_matched_in_progress;
1248 u->start_tag = start_tag;
1249 b->tot_branches = 1 + u->tot_branches;
1250 b->tot_last_matched = u->tot_last_matched;
1251 b->tot_tags = u->tot_tags;
1252 b->last_matched = u;
1253 b->n_last_matched = 1;
1254 node->last_matched_branch = b;
1255 node->last_matched_in_progress = NULL;
1256 break;
1257 }
1258
1259 default:
1260 assert(0);
1261 break;
1262
1263 } /* end switch(symbol) */
1264 } /* end while(tre_stack_num_objects(stack) > bottom) */
1265
1266 if (status != REG_OK)
1267 {
1268 DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1269 goto error_post_compile;
1270 }
1271
1272 if (!first_pass)
1273 {
1274 int i;
1275 if (num_tags != tnfa->num_tags)
1276 {
1277 DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1278 tnfa->num_tags));
1279 status = REG_BADPAT;
1280 goto error_post_compile;
1281 }
1282
1283 tre_purge_regset(regset, tnfa, tag);
1284 DPRINT(("Setting t%d to %s\n", num_tags,
1285 tag_dir_str[direction]));
1286 tnfa->tag_directions[num_tags] = direction;
1287
1288 if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1289 {
1290 DPRINT(("Processed %d reorder tags instead of %d\n",
1291 (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1292 status = REG_BADPAT;
1293 goto error_post_compile;
1294 }
1295 *rtp = -1;
1296 #if TRE_DEBUG
1297 if (reorder_tags[0] >= 0)
1298 {
1299 DPRINT(("reorder_tags:\n"));
1300 for (rtp = reorder_tags; *rtp >= 0;)
1301 {
1302 DPRINT(("%d after ", *rtp++));
1303 DPRINT(("%d\n", *rtp++));
1304 }
1305 }
1306 else
1307 DPRINT(("No reorder_tags\n"));
1308 #endif /* TRE_DEBUG */
1309
1310 /* Initialize to_reorder */
1311 for (i = 0; i < num_tags; i++)
1312 to_reorder[i] = i;
1313 /* Use to_seq_order to convert reorder_tags values, and use those to
1314 * reorder to_reorder */
1315 for (rtp = reorder_tags; *rtp >= 0;)
1316 {
1317 int j, high, low;
1318 int ti = *rtp++;
1319
1320 /* Skip reordering the final tag */
1321 if (ti >= num_tags)
1322 {
1323 DPRINT(("Skipping reorder of %d\n", ti));
1324 rtp++;
1325 continue;
1326 }
1327 /* The number of the tag to reorder */
1328 high = to_reorder[ti];
1329 /* Reorder after this tag */
1330 low = to_reorder[*rtp++];
1331
1332 DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1333 if (low > high)
1334 {
1335 DPRINT(("Tag %d already before %d\n", high, low));
1336 continue;
1337 }
1338 for (j = 0; j < num_tags; j++)
1339 if (to_reorder[j] > low && to_reorder[j] < high)
1340 to_reorder[j]++;
1341 to_reorder[ti] = low + 1;
1342 #ifdef TRE_DEBUG
1343 DPRINT(("to_reorder=("));
1344 for (j = 0; j < num_tags; j++)
1345 {
1346 DPRINT(("%d", to_reorder[j]));
1347 if (j < num_tags - 1)
1348 DPRINT((","));
1349 }
1350 DPRINT((")\n"));
1351 #endif /* TRE_DEBUG */
1352 }
1353 /* Determine if reordering in really necessary */
1354 {
1355 int need_reorder = 0;
1356 for (i = 0; i < num_tags; i++)
1357 if(to_reorder[i] != i)
1358 {
1359 need_reorder = 1;
1360 break;
1361 }
1362 /* If need_reorder is not set, free reorder_tags, and set to NULL,
1363 * indicating no reordering is needed */
1364 if (!need_reorder)
1365 {
1366 DPRINT(("Don't need to reorder\n"));
1367 xfree(reorder_tags);
1368 reorder_tags = NULL;
1369 }
1370 }
1371 }
1372
1373 if (reorder_tags)
1374 {
1375 int i;
1376 tre_tag_direction_t *new_tag_directions;
1377 #if TRE_DEBUG
1378 DPRINT(("to_reorder:"));
1379 for (i = 0; i < num_tags; i++)
1380 DPRINT((" %d->%d", i, to_reorder[i]));
1381 DPRINT(("\n"));
1382 #endif /* TRE_DEBUG */
1383
1384 DPRINT(("Reordering submatch_data\n"));
1385 for (i = 0; i < (int)tnfa->num_submatches; i++)
1386 {
1387 #if TRE_DEBUG
1388 int so = tnfa->submatch_data[i].so_tag;
1389 int eo = tnfa->submatch_data[i].eo_tag;
1390 #endif /* TRE_DEBUG */
1391 tnfa->submatch_data[i].so_tag =
1392 to_reorder[tnfa->submatch_data[i].so_tag];
1393 tnfa->submatch_data[i].eo_tag =
1394 tnfa->submatch_data[i].eo_tag < num_tags ?
1395 to_reorder[tnfa->submatch_data[i].eo_tag] :
1396 tnfa->submatch_data[i].eo_tag;
1397 DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1398 tnfa->submatch_data[i].so_tag,
1399 tnfa->submatch_data[i].eo_tag));
1400 }
1401
1402 DPRINT(("Reordering tag_directions\n"));
1403 /* We only allocate num_tags directions and reorder them. The
1404 * num_tags-th direction (end tag) is left unchanged. */
1405 new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1406 if (new_tag_directions == NULL)
1407 {
1408 status = REG_ESPACE;
1409 goto error_post_compile;
1410 }
1411 for (i = 0; i < num_tags; i++)
1412 {
1413 new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1414 }
1415 #if TRE_DEBUG
1416 for (i = 0; i < num_tags; i++)
1417 {
1418 DPRINT(("t%d %s->%s\n", i,
1419 tag_dir_str[tnfa->tag_directions[i]],
1420 tag_dir_str[new_tag_directions[i]]));
1421 }
1422 DPRINT(("t%d %s->%s\n", num_tags,
1423 tag_dir_str[tnfa->tag_directions[num_tags]],
1424 tag_dir_str[tnfa->tag_directions[num_tags]]));
1425 #endif /* TRE_DEBUG */
1426 memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1427 xfree(new_tag_directions);
1428
1429 DPRINT(("Reordering minimal_tags\n"));
1430 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1431 tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1432 to_reorder[tnfa->minimal_tags[i]] :
1433 tnfa->minimal_tags[i];
1434
1435 DPRINT(("Reordering AST tags\n"));
1436 STACK_PUSH(stack, voidptr, tree);
1437 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1438 {
1439 node = tre_stack_pop_voidptr(stack);
1440
1441 switch (node->type)
1442 {
1443 case LITERAL:
1444 {
1445 tre_literal_t *lit = (tre_literal_t *)node->obj;
1446 if (IS_TAG(lit))
1447 lit->code_max = to_reorder[lit->code_max];
1448 break;
1449 }
1450
1451 case UNION:
1452 {
1453 tre_union_t *uni = (tre_union_t *)node->obj;
1454 STACK_PUSHX(stack, voidptr, uni->right);
1455 STACK_PUSHX(stack, voidptr, uni->left);
1456 break;
1457 }
1458
1459 case CATENATION:
1460 {
1461 tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1462 STACK_PUSHX(stack, voidptr, cat->right);
1463 STACK_PUSHX(stack, voidptr, cat->left);
1464 break;
1465 }
1466
1467 case ITERATION:
1468 {
1469 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1470 STACK_PUSHX(stack, voidptr, iter->arg);
1471 break;
1472 }
1473
1474 default:
1475 assert(0);
1476 break;
1477 }
1478 }
1479 if (status != REG_OK)
1480 {
1481 DPRINT(("Error while reordering tags\n"));
1482 goto error_post_compile;
1483 }
1484 }
1485
1486
1487 if (!first_pass)
1488 {
1489 if (tree->last_matched_branch)
1490 {
1491 tre_last_matched_branch_t *buf, *b, *bb;
1492 tre_last_matched_branch_pre_t *bp;
1493 tre_last_matched_t *u, *uu;
1494 tre_last_matched_pre_t *up;
1495 int *t;
1496 int i;
1497 #ifdef TRE_DEBUG
1498 tre_last_matched_branch_t *_b;
1499 tre_last_matched_t *_u;
1500 int *_t;
1501
1502 DPRINT(("last_match_branch_pre:\n"));
1503 print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1504 #endif /* TRE_DEBUG */
1505 buf = (tre_last_matched_branch_t *)xcalloc(1,
1506 tree->last_matched_branch->tot_branches
1507 * sizeof(tre_last_matched_branch_t) +
1508 tree->last_matched_branch->tot_last_matched
1509 * sizeof(tre_last_matched_t) +
1510 tree->last_matched_branch->tot_tags *
1511 sizeof(int));
1512 if (!buf)
1513 {
1514 status = REG_ESPACE;
1515 goto error_post_compile;
1516 }
1517
1518 b = buf;
1519 u = (tre_last_matched_t *)(b +
1520 tree->last_matched_branch->tot_branches);
1521 t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1522 #ifdef TRE_DEBUG
1523 _b = b;
1524 _u = u;
1525 _t = t;
1526 #endif /* TRE_DEBUG */
1527 DPRINT(("Copying info_pre to info\n"));
1528 STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1529 STACK_PUSH(stack, int, 1);
1530 STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1531
1532 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1533 {
1534 switch (tre_stack_pop_int(stack))
1535 {
1536 case COPY_LAST_MATCHED_BRANCH:
1537 i = tre_stack_pop_int(stack);
1538 /* The tre_last_matched_branch_pre_t * is still on the
1539 stack */
1540 STACK_PUSHX(stack, voidptr, b);
1541 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1542 b += i;
1543 break;
1544
1545 case COPY_LAST_MATCHED_BRANCH_NEXT:
1546 bb = tre_stack_pop_voidptr(stack);
1547 bp = tre_stack_pop_voidptr(stack);
1548 bb->n_last_matched = bp->n_last_matched;
1549 bb->cmp_tag = bp->cmp_tag;
1550 if (bp->n_tags > 0)
1551 {
1552 int n;
1553 n = bb->n_tags = bp->n_tags;
1554 bb->tags = t;
1555 for (i = 0; i < num_tags; i++)
1556 if (bit_test(bp->tags, i))
1557 {
1558 *t++ = i;
1559 if (--n <= 0)
1560 break;
1561 }
1562 }
1563 if (bp->next)
1564 {
1565 STACK_PUSHX(stack, voidptr, bp->next);
1566 STACK_PUSHX(stack, voidptr, bb + 1);
1567 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1568 }
1569 if (bp->n_last_matched > 0)
1570 {
1571 bb->last_matched = u;
1572 STACK_PUSHX(stack, voidptr, bp->last_matched);
1573 STACK_PUSHX(stack, int, bp->n_last_matched);
1574 STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1575 }
1576 break;
1577
1578 case COPY_LAST_MATCHED:
1579 i = tre_stack_pop_int(stack);
1580 /* The tre_last_matched_pre_t * is still on the stack */
1581 STACK_PUSHX(stack, voidptr, u);
1582 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1583 u += i;
1584 break;
1585
1586 case COPY_LAST_MATCHED_NEXT:
1587 uu = tre_stack_pop_voidptr(stack);
1588 up = tre_stack_pop_voidptr(stack);
1589 uu->n_branches = up->n_branches;
1590 uu->branches = b;
1591 uu->start_tag = up->start_tag;
1592 if (up->next)
1593 {
1594 STACK_PUSHX(stack, voidptr, up->next);
1595 STACK_PUSHX(stack, voidptr, uu + 1);
1596 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1597 }
1598 STACK_PUSHX(stack, voidptr, up->branches);
1599 STACK_PUSHX(stack, int, up->n_branches);
1600 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1601 break;
1602 }
1603 }
1604 if (status != REG_OK)
1605 goto error_post_compile;
1606 #ifdef TRE_DEBUG
1607 DPRINT(("last_matched_branch:\n"));
1608 print_last_match_branch(buf, 0);
1609 if (b != _b + tree->last_matched_branch->tot_branches)
1610 DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1611 b, _b + tree->last_matched_branch->tot_branches));
1612 if (u != _u + tree->last_matched_branch->tot_last_matched)
1613 DPRINT(("u/%p != _u + "
1614 "tree->last_matched_branch->tot_last_matched/%p\n",
1615 u, _u + tree->last_matched_branch->tot_last_matched));
1616 if (t != _t + tree->last_matched_branch->tot_tags)
1617 DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1618 t, _t + tree->last_matched_branch->tot_tags));
1619 #endif /* TRE_DEBUG */
1620 tnfa->last_matched_branch = buf;
1621 }
1622 #ifdef TRE_DEBUG
1623 else
1624 DPRINT(("No last_match_branch_pre\n"));
1625 #endif /* TRE_DEBUG */
1626 }
1627
1628 DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
1629 first_pass? "First pass" : "Second pass", num_tags));
1630 #ifdef TRE_DEBUG
1631 tre_ast_print(tree);
1632 #endif /* TRE_DEBUG */
1633 DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1634 num_tags));
1635 assert(tree->num_tags == num_tags);
1636 tnfa->end_tag = num_tags;
1637 tnfa->num_tags = num_tags;
1638 tnfa->num_minimals = num_minimals;
1639 error_post_compile:
1640 xfree(reorder_tags);
1641 error_reorder_tags:
1642 xfree(orig_regset);
1643 error_regset:
1644 return status;
1645 }
1646
1647
1648
1649 /*
1650 AST to TNFA compilation routines.
1651 */
1652
1653 typedef enum {
1654 COPY_RECURSE,
1655 COPY_SET_RESULT_PTR
1656 } tre_copyast_symbol_t;
1657
1658 /* Flags for tre_copy_ast(). */
1659 #define COPY_REMOVE_TAGS 1
1660 #define COPY_MAXIMIZE_FIRST_TAG 2
1661
1662 static reg_errcode_t
1663 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1664 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1665 tre_ast_node_t **copy, int *max_pos)
1666 {
1667 reg_errcode_t status = REG_OK;
1668 int bottom = tre_stack_num_objects(stack);
1669 int num_copied = 0;
1670 int first_tag = 1;
1671 tre_ast_node_t **result = copy;
1672 tre_copyast_symbol_t symbol;
1673
1674 STACK_PUSH(stack, voidptr, ast);
1675 STACK_PUSH(stack, int, COPY_RECURSE);
1676
1677 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1678 {
1679 tre_ast_node_t *node;
1680 if (status != REG_OK)
1681 break;
1682
1683 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1684 switch (symbol)
1685 {
1686 case COPY_SET_RESULT_PTR:
1687 result = tre_stack_pop_voidptr(stack);
1688 break;
1689 case COPY_RECURSE:
1690 node = tre_stack_pop_voidptr(stack);
1691 switch (node->type)
1692 {
1693 case LITERAL:
1694 {
1695 tre_literal_t *lit = node->obj;
1696 int pos = lit->position;
1697 int min = lit->code_min;
1698 int max = lit->code_max;
1699 tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1700 lit->u.bracket_match_list :
1701 NULL;
1702 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1703 {
1704 /* XXX - e.g. [ab] has only one position but two
1705 nodes, so we are creating holes in the state space
1706 here. Not fatal, just wastes memory. */
1707 pos += *pos_add;
1708 num_copied++;
1709 }
1710 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1711 {
1712 /* Change this tag to empty. */
1713 min = EMPTY;
1714 max = pos = -1;
1715 }
1716 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1717 && first_tag)
1718 {
1719 /* Maximize the first tag. */
1720 if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
1721 tag_directions[max] = TRE_TAG_MAXIMIZE;
1722 first_tag = 0;
1723 }
1724 *result = tre_ast_new_literal(mem, min, max, pos);
1725 if (*result == NULL)
1726 status = REG_ESPACE;
1727
1728 if (pos > *max_pos)
1729 *max_pos = pos;
1730
1731 if (!IS_SPECIAL(lit))
1732 ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1733 = list;
1734 break;
1735 }
1736 case UNION:
1737 {
1738 tre_union_t *uni = node->obj;
1739 tre_union_t *tmp;
1740 *result = tre_ast_new_union(mem, uni->left, uni->right);
1741 if (*result == NULL)
1742 {
1743 status = REG_ESPACE;
1744 break;
1745 }
1746 tmp = (*result)->obj;
1747 result = &tmp->left;
1748 STACK_PUSHX(stack, voidptr, uni->right);
1749 STACK_PUSHX(stack, int, COPY_RECURSE);
1750 STACK_PUSHX(stack, voidptr, &tmp->right);
1751 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1752 STACK_PUSHX(stack, voidptr, uni->left);
1753 STACK_PUSHX(stack, int, COPY_RECURSE);
1754 break;
1755 }
1756 case CATENATION:
1757 {
1758 tre_catenation_t *cat = node->obj;
1759 tre_catenation_t *tmp;
1760 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1761 if (*result == NULL)
1762 {
1763 status = REG_ESPACE;
1764 break;
1765 }
1766 tmp = (*result)->obj;
1767 tmp->left = NULL;
1768 tmp->right = NULL;
1769 result = &tmp->left;
1770
1771 STACK_PUSHX(stack, voidptr, cat->right);
1772 STACK_PUSHX(stack, int, COPY_RECURSE);
1773 STACK_PUSHX(stack, voidptr, &tmp->right);
1774 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1775 STACK_PUSHX(stack, voidptr, cat->left);
1776 STACK_PUSHX(stack, int, COPY_RECURSE);
1777 break;
1778 }
1779 case ITERATION:
1780 {
1781 tre_iteration_t *iter = node->obj;
1782 STACK_PUSHX(stack, voidptr, iter->arg);
1783 STACK_PUSHX(stack, int, COPY_RECURSE);
1784 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1785 iter->max, iter->minimal);
1786 if (*result == NULL)
1787 {
1788 status = REG_ESPACE;
1789 break;
1790 }
1791 iter = (*result)->obj;
1792 result = &iter->arg;
1793 break;
1794 }
1795 default:
1796 assert(0);
1797 break;
1798 }
1799 break;
1800 }
1801 }
1802 *pos_add += num_copied;
1803 return status;
1804 }
1805
1806 typedef enum {
1807 EXPAND_RECURSE,
1808 EXPAND_AFTER_ITER
1809 } tre_expand_ast_symbol_t;
1810
1811 /* Expands each iteration node that has a finite nonzero minimum or maximum
1812 iteration count to a catenated sequence of copies of the node. */
1813 static reg_errcode_t
1814 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1815 int *position, tre_tag_direction_t *tag_directions,
1816 int __unused *max_depth)
1817 {
1818 reg_errcode_t status = REG_OK;
1819 int bottom = tre_stack_num_objects(stack);
1820 int pos_add = 0;
1821 int pos_add_total = 0;
1822 int max_pos = 0;
1823 #ifdef TRE_APPROX
1824 /* Current approximate matching parameters. */
1825 int params[TRE_PARAM_LAST];
1826 /* Approximate parameter nesting level. */
1827 int params_depth = 0;
1828 #endif /* TRE_APPROX */
1829 int iter_depth = 0;
1830 #ifdef TRE_APPROX
1831 int i;
1832 #endif /* TRE_APPROX */
1833
1834 #ifdef TRE_APPROX
1835 for (i = 0; i < TRE_PARAM_LAST; i++)
1836 params[i] = TRE_PARAM_DEFAULT;
1837 #endif /* TRE_APPROX */
1838
1839 STACK_PUSHR(stack, voidptr, ast);
1840 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1841 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1842 {
1843 tre_ast_node_t *node;
1844 tre_expand_ast_symbol_t symbol;
1845
1846 if (status != REG_OK)
1847 break;
1848
1849 DPRINT(("pos_add %d\n", pos_add));
1850
1851 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1852 node = tre_stack_pop_voidptr(stack);
1853 switch (symbol)
1854 {
1855 case EXPAND_RECURSE:
1856 switch (node->type)
1857 {
1858 case LITERAL:
1859 {
1860 tre_literal_t *lit= node->obj;
1861 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1862 {
1863 lit->position += pos_add;
1864 if (lit->position > max_pos)
1865 max_pos = lit->position;
1866 }
1867 break;
1868 }
1869 case UNION:
1870 {
1871 tre_union_t *uni = node->obj;
1872 STACK_PUSHX(stack, voidptr, uni->right);
1873 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1874 STACK_PUSHX(stack, voidptr, uni->left);
1875 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1876 break;
1877 }
1878 case CATENATION:
1879 {
1880 tre_catenation_t *cat = node->obj;
1881 STACK_PUSHX(stack, voidptr, cat->right);
1882 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1883 STACK_PUSHX(stack, voidptr, cat->left);
1884 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1885 break;
1886 }
1887 case ITERATION:
1888 {
1889 tre_iteration_t *iter = node->obj;
1890 STACK_PUSHX(stack, int, pos_add);
1891 STACK_PUSHX(stack, voidptr, node);
1892 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1893 STACK_PUSHX(stack, voidptr, iter->arg);
1894 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1895 /* If we are going to expand this node at EXPAND_AFTER_ITER
1896 then don't increase the `pos' fields of the nodes now, it
1897 will get done when expanding. */
1898 if (iter->min > 1 || iter->max > 1)
1899 pos_add = 0;
1900 iter_depth++;
1901 DPRINT(("iter\n"));
1902 break;
1903 }
1904 default:
1905 assert(0);
1906 break;
1907 }
1908 break;
1909 case EXPAND_AFTER_ITER:
1910 {
1911 tre_iteration_t *iter = node->obj;
1912 int pos_add_last;
1913 pos_add = tre_stack_pop_int(stack);
1914 pos_add_last = pos_add;
1915 /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1916 immediate replace the whole iteration with EMPTY. This
1917 unfortunately drops any submatches, and messes up setting the
1918 pmatch values (we can get tags of -1, and tag values in the
1919 billions). So we left it there and replace with EMPTY here. */
1920 if (iter->min == 0 && iter->max == 0)
1921 {
1922 tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1923 if (empty == NULL)
1924 return REG_ESPACE;
1925 node->obj = empty->obj;
1926 node->type = empty->type;
1927 }
1928 else if (iter->min > 1 || iter->max > 1)
1929 {
1930 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1931 int j;
1932 int pos_add_save = pos_add;
1933
1934 /* Create a catenated sequence of copies of the node. */
1935 for (j = 0; j < iter->min; j++)
1936 {
1937 tre_ast_node_t *copy;
1938 /* Remove tags from all but the last copy. */
1939 int flags = ((j + 1 < iter->min)
1940 ? COPY_REMOVE_TAGS
1941 : COPY_MAXIMIZE_FIRST_TAG);
1942 DPRINT((" pos_add %d\n", pos_add));
1943 pos_add_save = pos_add;
1944 status = tre_copy_ast(mem, stack, iter->arg, flags,
1945 &pos_add, tag_directions, &copy,
1946 &max_pos);
1947 if (status != REG_OK)
1948 return status;
1949 if (seq1 != NULL)
1950 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1951 else
1952 seq1 = copy;
1953 if (seq1 == NULL)
1954 return REG_ESPACE;
1955 }
1956
1957 if (iter->max == -1)
1958 {
1959 /* No upper limit. */
1960 pos_add_save = pos_add;
1961 status = tre_copy_ast(mem, stack, iter->arg, 0,
1962 &pos_add, NULL, &seq2, &max_pos);
1963 if (status != REG_OK)
1964 return status;
1965 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1966 if (seq2 == NULL)
1967 return REG_ESPACE;
1968 }
1969 else
1970 {
1971 for (j = iter->min; j < iter->max; j++)
1972 {
1973 tre_ast_node_t *tmp, *copy;
1974 pos_add_save = pos_add;
1975 status = tre_copy_ast(mem, stack, iter->arg, 0,
1976 &pos_add, NULL, &copy, &max_pos);
1977 if (status != REG_OK)
1978 return status;
1979 if (seq2 != NULL)
1980 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1981 else
1982 seq2 = copy;
1983 if (seq2 == NULL)
1984 return REG_ESPACE;
1985 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1986 if (tmp == NULL)
1987 return REG_ESPACE;
1988 seq2 = tre_ast_new_union(mem, tmp, seq2);
1989 if (seq2 == NULL)
1990 return REG_ESPACE;
1991 }
1992 }
1993
1994 pos_add = pos_add_save;
1995 if (seq1 == NULL)
1996 seq1 = seq2;
1997 else if (seq2 != NULL)
1998 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1999 if (seq1 == NULL)
2000 return REG_ESPACE;
2001 node->obj = seq1->obj;
2002 node->type = seq1->type;
2003 }
2004
2005 iter_depth--;
2006 pos_add_total += pos_add - pos_add_last;
2007 if (iter_depth == 0)
2008 pos_add = pos_add_total;
2009
2010 #ifdef TRE_APPROX
2011 /* If approximate parameters are specified, surround the result
2012 with two parameter setting nodes. The one on the left sets
2013 the specified parameters, and the one on the right restores
2014 the old parameters. */
2015 if (iter->params)
2016 {
2017 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
2018 int *old_params;
2019
2020 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2021 if (!tmp_l)
2022 return REG_ESPACE;
2023 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
2024 iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
2025 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2026 if (!tmp_r)
2027 return REG_ESPACE;
2028 old_params = tre_mem_alloc(mem, sizeof(*old_params)
2029 * TRE_PARAM_LAST);
2030 if (!old_params)
2031 return REG_ESPACE;
2032 for (i = 0; i < TRE_PARAM_LAST; i++)
2033 old_params[i] = params[i];
2034 ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
2035 old_params[TRE_PARAM_DEPTH] = params_depth;
2036 /* XXX - this is the only place where ast_new_node is
2037 needed -- should be moved inside AST module. */
2038 node_copy = tre_ast_new_node(mem, ITERATION,
2039 sizeof(tre_iteration_t));
2040 if (!node_copy)
2041 return REG_ESPACE;
2042 node_copy->obj = node->obj;
2043 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
2044 if (!tmp_node)
2045 return REG_ESPACE;
2046 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
2047 if (!tmp_node)
2048 return REG_ESPACE;
2049 /* Replace the contents of `node' with `tmp_node'. */
2050 memcpy(node, tmp_node, sizeof(*node));
2051 node->obj = tmp_node->obj;
2052 node->type = tmp_node->type;
2053 params_depth++;
2054 if (params_depth > *max_depth)
2055 *max_depth = params_depth;
2056 }
2057 #endif /* TRE_APPROX */
2058 break;
2059 }
2060 default:
2061 assert(0);
2062 break;
2063 }
2064 }
2065
2066 *position += pos_add_total;
2067
2068 /* `max_pos' should never be larger than `*position' if the above
2069 code works, but just an extra safeguard let's make sure
2070 `*position' is set large enough so enough memory will be
2071 allocated for the transition table. */
2072 if (max_pos > *position)
2073 *position = max_pos;
2074
2075 #ifdef TRE_DEBUG
2076 DPRINT(("Expanded AST:\n"));
2077 tre_ast_print(ast);
2078 DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
2079 #endif
2080
2081 return status;
2082 }
2083
2084 static tre_pos_and_tags_t *
2085 tre_set_empty(tre_mem_t mem)
2086 {
2087 tre_pos_and_tags_t *new_set;
2088
2089 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2090 if (new_set == NULL)
2091 return NULL;
2092
2093 new_set[0].position = -1;
2094 new_set[0].code_min = -1;
2095 new_set[0].code_max = -1;
2096
2097 return new_set;
2098 }
2099
2100 static tre_pos_and_tags_t *
2101 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2102 tre_bracket_match_list_t *bracket_match_list, int backref)
2103 {
2104 tre_pos_and_tags_t *new_set;
2105
2106 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2107 if (new_set == NULL)
2108 return NULL;
2109
2110 new_set[0].position = position;
2111 new_set[0].code_min = code_min;
2112 new_set[0].code_max = code_max;
2113 new_set[0].bracket_match_list = bracket_match_list;
2114 new_set[0].backref = backref;
2115 new_set[1].position = -1;
2116 new_set[1].code_min = -1;
2117 new_set[1].code_max = -1;
2118
2119 return new_set;
2120 }
2121
2122 static tre_pos_and_tags_t *
2123 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2124 int *tags, int assertions, int *params)
2125 {
2126 int s1, s2, i, j;
2127 tre_pos_and_tags_t *new_set;
2128 int *new_tags;
2129 int num_tags;
2130
2131 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2132 for (s1 = 0; set1[s1].position >= 0; s1++);
2133 for (s2 = 0; set2[s2].position >= 0; s2++);
2134 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2135 if (!new_set )
2136 return NULL;
2137
2138 for (s1 = 0; set1[s1].position >= 0; s1++)
2139 {
2140 new_set[s1].position = set1[s1].position;
2141 new_set[s1].code_min = set1[s1].code_min;
2142 new_set[s1].code_max = set1[s1].code_max;
2143 new_set[s1].assertions = set1[s1].assertions | assertions;
2144 new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
2145 new_set[s1].backref = set1[s1].backref;
2146 if (set1[s1].tags == NULL && tags == NULL)
2147 new_set[s1].tags = NULL;
2148 else
2149 {
2150 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2151 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2152 * (i + num_tags + 1)));
2153 if (new_tags == NULL)
2154 return NULL;
2155 for (j = 0; j < i; j++)
2156 new_tags[j] = set1[s1].tags[j];
2157 for (i = 0; i < num_tags; i++)
2158 new_tags[j + i] = tags[i];
2159 new_tags[j + i] = -1;
2160 new_set[s1].tags = new_tags;
2161 }
2162 if (set1[s1].params)
2163 new_set[s1].params = set1[s1].params;
2164 if (params)
2165 {
2166 if (!new_set[s1].params)
2167 new_set[s1].params = params;
2168 else
2169 {
2170 new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
2171 TRE_PARAM_LAST);
2172 if (!new_set[s1].params)
2173 return NULL;
2174 for (i = 0; i < TRE_PARAM_LAST; i++)
2175 if (params[i] != TRE_PARAM_UNSET)
2176 new_set[s1].params[i] = params[i];
2177 }
2178 }
2179 }
2180
2181 for (s2 = 0; set2[s2].position >= 0; s2++)
2182 {
2183 new_set[s1 + s2].position = set2[s2].position;
2184 new_set[s1 + s2].code_min = set2[s2].code_min;
2185 new_set[s1 + s2].code_max = set2[s2].code_max;
2186 /* XXX - why not | assertions here as well? */
2187 new_set[s1 + s2].assertions = set2[s2].assertions;
2188 new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
2189 new_set[s1 + s2].backref = set2[s2].backref;
2190 if (set2[s2].tags == NULL)
2191 new_set[s1 + s2].tags = NULL;
2192 else
2193 {
2194 for (i = 0; set2[s2].tags[i] >= 0; i++);
2195 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2196 if (new_tags == NULL)
2197 return NULL;
2198 for (j = 0; j < i; j++)
2199 new_tags[j] = set2[s2].tags[j];
2200 new_tags[j] = -1;
2201 new_set[s1 + s2].tags = new_tags;
2202 }
2203 if (set2[s2].params)
2204 new_set[s1 + s2].params = set2[s2].params;
2205 if (params)
2206 {
2207 if (!new_set[s1 + s2].params)
2208 new_set[s1 + s2].params = params;
2209 else
2210 {
2211 new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
2212 TRE_PARAM_LAST);
2213 if (!new_set[s1 + s2].params)
2214 return NULL;
2215 for (i = 0; i < TRE_PARAM_LAST; i++)
2216 if (params[i] != TRE_PARAM_UNSET)
2217 new_set[s1 + s2].params[i] = params[i];
2218 }
2219 }
2220 }
2221 new_set[s1 + s2].position = -1;
2222 return new_set;
2223 }
2224
2225 /* Finds the empty path through `node' which is the one that should be
2226 taken according to POSIX.2 rules, and adds the tags on that path to
2227 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2228 set to the number of tags seen on the path. */
2229 static reg_errcode_t
2230 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2231 int *assertions, int *params, int *num_tags_seen,
2232 int *params_seen)
2233 {
2234 tre_literal_t *lit;
2235 tre_union_t *uni;
2236 tre_catenation_t *cat;
2237 tre_iteration_t *iter;
2238 int i;
2239 int bottom = tre_stack_num_objects(stack);
2240 reg_errcode_t status = REG_OK;
2241 if (num_tags_seen)
2242 *num_tags_seen = 0;
2243 if (params_seen)
2244 *params_seen = 0;
2245
2246 status = tre_stack_push_voidptr(stack, node);
2247
2248 /* Walk through the tree recursively. */
2249 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2250 {
2251 node = tre_stack_pop_voidptr(stack);
2252
2253 switch (node->type)
2254 {
2255 case LITERAL:
2256 lit = (tre_literal_t *)node->obj;
2257 switch (lit->code_min)
2258 {
2259 case TAG:
2260 if (lit->code_max >= 0)
2261 {
2262 if (tags != NULL)
2263 {
2264 /* Add the tag to `tags'. */
2265 for (i = 0; tags[i] >= 0; i++)
2266 if (tags[i] == lit->code_max)
2267 break;
2268 if (tags[i] < 0)
2269 {
2270 tags[i] = lit->code_max;
2271 tags[i + 1] = -1;
2272 }
2273 }
2274 if (num_tags_seen)
2275 (*num_tags_seen)++;
2276 }
2277 break;
2278 case ASSERTION:
2279 assert(lit->code_max >= 1
2280 || lit->code_max <= ASSERT_LAST);
2281 if (assertions != NULL)
2282 *assertions |= lit->code_max;
2283 break;
2284 case PARAMETER:
2285 if (params != NULL)
2286 for (i = 0; i < TRE_PARAM_LAST; i++)
2287 params[i] = lit->u.params[i];
2288 if (params_seen != NULL)
2289 *params_seen = 1;
2290 break;
2291 case EMPTY:
2292 break;
2293 default:
2294 assert(0);
2295 break;
2296 }
2297 break;
2298
2299 case UNION:
2300 /* Subexpressions starting earlier take priority over ones
2301 starting later, so we prefer the left subexpression over the
2302 right subexpression. */
2303 uni = (tre_union_t *)node->obj;
2304 if (uni->left->nullable)
2305 STACK_PUSHX(stack, voidptr, uni->left)
2306 else if (uni->right->nullable)
2307 STACK_PUSHX(stack, voidptr, uni->right)
2308 else
2309 assert(0);
2310 break;
2311
2312 case CATENATION:
2313 /* The path must go through both children. */
2314 cat = (tre_catenation_t *)node->obj;
2315 assert(cat->left->nullable);
2316 assert(cat->right->nullable);
2317 STACK_PUSHX(stack, voidptr, cat->left);
2318 STACK_PUSHX(stack, voidptr, cat->right);
2319 break;
2320
2321 case ITERATION:
2322 /* A match with an empty string is preferred over no match at
2323 all, so we go through the argument if possible. */
2324 iter = (tre_iteration_t *)node->obj;
2325 if (iter->arg->nullable)
2326 STACK_PUSHX(stack, voidptr, iter->arg);
2327 break;
2328
2329 default:
2330 assert(0);
2331 break;
2332 }
2333 }
2334
2335 return status;
2336 }
2337
2338
2339 typedef enum {
2340 NFL_RECURSE,
2341 NFL_POST_UNION,
2342 NFL_POST_CATENATION,
2343 NFL_POST_ITERATION
2344 } tre_nfl_stack_symbol_t;
2345
2346
2347 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2348 the nodes of the AST `tree'. */
2349 static reg_errcode_t
2350 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2351 {
2352 int bottom = tre_stack_num_objects(stack);
2353
2354 STACK_PUSHR(stack, voidptr, tree);
2355 STACK_PUSHR(stack, int, NFL_RECURSE);
2356
2357 while (tre_stack_num_objects(stack) > bottom)
2358 {
2359 tre_nfl_stack_symbol_t symbol;
2360 tre_ast_node_t *node;
2361
2362 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2363 node = tre_stack_pop_voidptr(stack);
2364 switch (symbol)
2365 {
2366 case NFL_RECURSE:
2367 switch (node->type)
2368 {
2369 case LITERAL:
2370 {
2371 tre_literal_t *lit = (tre_literal_t *)node->obj;
2372 if (IS_BACKREF(lit))
2373 {
2374 /* Back references: nullable = false, firstpos = {i},
2375 lastpos = {i}. */
2376 node->nullable = 0;
2377 node->firstpos = tre_set_one(mem, lit->position, 0,
2378 TRE_CHAR_MAX, NULL, -1);
2379 if (!node->firstpos)
2380 return REG_ESPACE;
2381 node->lastpos = tre_set_one(mem, lit->position, 0,
2382 TRE_CHAR_MAX, NULL,
2383 (int)lit->code_max);
2384 if (!node->lastpos)
2385 return REG_ESPACE;
2386 }
2387 else if (lit->code_min < 0)
2388 {
2389 /* Tags, empty strings, params, and zero width assertions:
2390 nullable = true, firstpos = {}, and lastpos = {}. */
2391 node->nullable = 1;
2392 node->firstpos = tre_set_empty(mem);
2393 if (!node->firstpos)
2394 return REG_ESPACE;
2395 node->lastpos = tre_set_empty(mem);
2396 if (!node->lastpos)
2397 return REG_ESPACE;
2398 }
2399 else
2400 {
2401 /* Literal at position i: nullable = false, firstpos = {i},
2402 lastpos = {i}. */
2403 node->nullable = 0;
2404 node->firstpos =
2405 tre_set_one(mem, lit->position, (int)lit->code_min,
2406 (int)lit->code_max, NULL, -1);
2407 if (!node->firstpos)
2408 return REG_ESPACE;
2409 node->lastpos = tre_set_one(mem, lit->position,
2410 (int)lit->code_min,
2411 (int)lit->code_max,
2412 lit->u.bracket_match_list,
2413 -1);
2414 if (!node->lastpos)
2415 return REG_ESPACE;
2416 }
2417 break;
2418 }
2419
2420 case UNION:
2421 /* Compute the attributes for the two subtrees, and after that
2422 for this node. */
2423 STACK_PUSHR(stack, voidptr, node);
2424 STACK_PUSHR(stack, int, NFL_POST_UNION);
2425 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2426 STACK_PUSHR(stack, int, NFL_RECURSE);
2427 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2428 STACK_PUSHR(stack, int, NFL_RECURSE);
2429 break;
2430
2431 case CATENATION:
2432 /* Compute the attributes for the two subtrees, and after that
2433 for this node. */
2434 STACK_PUSHR(stack, voidptr, node);
2435 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2436 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2437 STACK_PUSHR(stack, int, NFL_RECURSE);
2438 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2439 STACK_PUSHR(stack, int, NFL_RECURSE);
2440 break;
2441
2442 case ITERATION:
2443 /* Compute the attributes for the subtree, and after that for
2444 this node. */
2445 STACK_PUSHR(stack, voidptr, node);
2446 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2447 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2448 STACK_PUSHR(stack, int, NFL_RECURSE);
2449 break;
2450 }
2451 break; /* end case: NFL_RECURSE */
2452
2453 case NFL_POST_UNION:
2454 {
2455 tre_union_t *uni = (tre_union_t *)node->obj;
2456 node->nullable = uni->left->nullable || uni->right->nullable;
2457 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2458 uni->right->firstpos, NULL, 0, NULL);
2459 if (!node->firstpos)
2460 return REG_ESPACE;
2461 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2462 uni->right->lastpos, NULL, 0, NULL);
2463 if (!node->lastpos)
2464 return REG_ESPACE;
2465 break;
2466 }
2467
2468 case NFL_POST_ITERATION:
2469 {
2470 int num_tags, *tags, assertions, params_seen;
2471 int *params;
2472 reg_errcode_t status;
2473 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2474
2475 /* From Ville Laurikari's original 2001 Master's thesis, the
2476 firstpos(n) and lastpos(n) of an iteration is just the
2477 corresponding values of the iteration's argument. Unfortunately,
2478 this isn't sufficient for the following BRE:
2479
2480 \(a*\)*b\(\1\) matched against ab
2481
2482 The backreference wants to force the first subexpression to
2483 be the empty string, but there is no transition for this. So
2484 we need to modify the lastpos(n) of an iteration to be the
2485 equivalent of that of catentation. Using the same notation as
2486 in the thesis, lastpos(n) is redefined as:
2487
2488 if nullable(c1) then
2489 lastpos(c1) U
2490 addtags(lastpos(c1),
2491 emptymatch(c1))
2492 else
2493 lastpos(c1)
2494
2495 where c1 is the argument node. firstpos(n) remains the same. */
2496
2497 /* Compute lastpos. */
2498 if (iter->min == 0 || iter->arg->nullable)
2499 {
2500 node->nullable = 1;
2501 if (iter->arg->nullable)
2502 {
2503 /* The arg matches the empty string. Make a first pass
2504 with tre_match_empty() to get the number of tags and
2505 parameters. */
2506 status = tre_match_empty(stack, iter->arg,
2507 NULL, NULL, NULL, &num_tags,
2508 &params_seen);
2509 if (status != REG_OK)
2510 return status;
2511 /* Allocate arrays for the tags and parameters. */
2512 tags = xmalloc(sizeof(int) * (num_tags + 1));
2513 if (!tags)
2514 return REG_ESPACE;
2515 tags[0] = -1;
2516 assertions = 0;
2517 params = NULL;
2518 if (params_seen)
2519 {
2520 params = tre_mem_alloc(mem, sizeof(*params)
2521 * TRE_PARAM_LAST);
2522 if (!params)
2523 {
2524 xfree(tags);
2525 return REG_ESPACE;
2526 }
2527 }
2528 /* Second pass with tre_mach_empty() to get the list of
2529 tags and parameters. */
2530 status = tre_match_empty(stack, iter->arg, tags,
2531 &assertions, params, NULL, NULL);
2532 if (status != REG_OK)
2533 {
2534 xfree(tags);
2535 return status;
2536 }
2537 node->lastpos =
2538 tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2539 tags, assertions, params);
2540 xfree(tags);
2541 if (!node->lastpos)
2542 return REG_ESPACE;
2543 }
2544 else
2545 node->lastpos = iter->arg->lastpos;
2546 }
2547 else
2548 {
2549 node->nullable = 0;
2550 node->lastpos = iter->arg->lastpos;
2551 }
2552 node->firstpos = iter->arg->firstpos;
2553 break;
2554 }
2555
2556 case NFL_POST_CATENATION:
2557 {
2558 int num_tags, *tags, assertions, params_seen;
2559 int *params;
2560 reg_errcode_t status;
2561 tre_catenation_t *cat = node->obj;
2562 node->nullable = cat->left->nullable && cat->right->nullable;
2563
2564 /* Compute firstpos. */
2565 if (cat->left->nullable)
2566 {
2567 /* The left side matches the empty string. Make a first pass
2568 with tre_match_empty() to get the number of tags and
2569 parameters. */
2570 status = tre_match_empty(stack, cat->left,
2571 NULL, NULL, NULL, &num_tags,
2572 &params_seen);
2573 if (status != REG_OK)
2574 return status;
2575 /* Allocate arrays for the tags and parameters. */
2576 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2577 if (!tags)
2578 return REG_ESPACE;
2579 tags[0] = -1;
2580 assertions = 0;
2581 params = NULL;
2582 if (params_seen)
2583 {
2584 params = tre_mem_alloc(mem, sizeof(*params)
2585 * TRE_PARAM_LAST);
2586 if (!params)
2587 {
2588 xfree(tags);
2589 return REG_ESPACE;
2590 }
2591 }
2592 /* Second pass with tre_mach_empty() to get the list of
2593 tags and parameters. */
2594 status = tre_match_empty(stack, cat->left, tags,
2595 &assertions, params, NULL, NULL);
2596 if (status != REG_OK)
2597 {
2598 xfree(tags);
2599 return status;
2600 }
2601 node->firstpos =
2602 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2603 tags, assertions, params);
2604 xfree(tags);
2605 if (!node->firstpos)
2606 return REG_ESPACE;
2607 }
2608 else
2609 {
2610 node->firstpos = cat->left->firstpos;
2611 }
2612
2613 /* Compute lastpos. */
2614 if (cat->right->nullable)
2615 {
2616 /* The right side matches the empty string. Make a first pass
2617 with tre_match_empty() to get the number of tags and
2618 parameters. */
2619 status = tre_match_empty(stack, cat->right,
2620 NULL, NULL, NULL, &num_tags,
2621 &params_seen);
2622 if (status != REG_OK)
2623 return status;
2624 /* Allocate arrays for the tags and parameters. */
2625 tags = xmalloc(sizeof(int) * (num_tags + 1));
2626 if (!tags)
2627 return REG_ESPACE;
2628 tags[0] = -1;
2629 assertions = 0;
2630 params = NULL;
2631 if (params_seen)
2632 {
2633 params = tre_mem_alloc(mem, sizeof(*params)
2634 * TRE_PARAM_LAST);
2635 if (!params)
2636 {
2637 xfree(tags);
2638 return REG_ESPACE;
2639 }
2640 }
2641 /* Second pass with tre_mach_empty() to get the list of
2642 tags and parameters. */
2643 status = tre_match_empty(stack, cat->right, tags,
2644 &assertions, params, NULL, NULL);
2645 if (status != REG_OK)
2646 {
2647 xfree(tags);
2648 return status;
2649 }
2650 node->lastpos =
2651 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2652 tags, assertions, params);
2653 xfree(tags);
2654 if (!node->lastpos)
2655 return REG_ESPACE;
2656 }
2657 else
2658 {
2659 node->lastpos = cat->right->lastpos;
2660 }
2661 break;
2662 }
2663
2664 default:
2665 assert(0);
2666 break;
2667 }
2668 }
2669
2670 return REG_OK;
2671 }
2672
2673
2674 /* Adds a transition from each position in `p1' to each position in `p2'. */
2675 static reg_errcode_t
2676 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2677 tre_tnfa_transition_t *transitions,
2678 int *counts, int *offs)
2679 {
2680 tre_pos_and_tags_t *orig_p2 = p2;
2681 tre_tnfa_transition_t *trans;
2682 int i, j, k, l, dup, prev_p2_pos;
2683
2684 if (transitions != NULL)
2685 while (p1->position >= 0)
2686 {
2687 p2 = orig_p2;
2688 prev_p2_pos = -1;
2689 while (p2->position >= 0)
2690 {
2691 /* Optimization: if this position was already handled, skip it. */
2692 if (p2->position == prev_p2_pos)
2693 {
2694 p2++;
2695 continue;
2696 }
2697 prev_p2_pos = p2->position;
2698 /* Set `trans' to point to the next unused transition from
2699 position `p1->position'. */
2700 trans = transitions + offs[p1->position];
2701 while (trans->state != NULL)
2702 {
2703 #if 0
2704 /* If we find a previous transition from `p1->position' to
2705 `p2->position', it is overwritten. This can happen only
2706 if there are nested loops in the regexp, like in "((a)*)*".
2707 In POSIX.2 repetition using the outer loop is always
2708 preferred over using the inner loop. Therefore the
2709 transition for the inner loop is useless and can be thrown
2710 away. */
2711 /* XXX - The same position is used for all nodes in a bracket
2712 expression, so this optimization cannot be used (it will
2713 break bracket expressions) unless I figure out a way to
2714 detect it here. */
2715 if (trans->state_id == p2->position)
2716 {
2717 DPRINT(("*"));
2718 break;
2719 }
2720 #endif
2721 trans++;
2722 }
2723
2724 if (trans->state == NULL)
2725 (trans + 1)->state = NULL;
2726 /* Use the character ranges, assertions, etc. from `p1' for
2727 the transition from `p1' to `p2'. */
2728 trans->code_min = p1->code_min;
2729 trans->code_max = p1->code_max;
2730 trans->state = transitions + offs[p2->position];
2731 trans->state_id = p2->position;
2732 trans->assertions = p1->assertions | p2->assertions
2733 | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
2734 if (p1->backref >= 0)
2735 {
2736 assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
2737 assert(p2->backref < 0);
2738 trans->u.backref = p1->backref;
2739 trans->assertions |= ASSERT_BACKREF;
2740 }
2741 if (p1->bracket_match_list != NULL)
2742 {
2743 trans->u.bracket_match_list =
2744 xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2745 if (trans->u.bracket_match_list == NULL)
2746 return REG_ESPACE;
2747 memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2748 SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2749 }
2750
2751 /* Find out how many tags this transition has. */
2752 i = 0;
2753 if (p1->tags != NULL)
2754 while(p1->tags[i] >= 0)
2755 i++;
2756 j = 0;
2757 if (p2->tags != NULL)
2758 while(p2->tags[j] >= 0)
2759 j++;
2760
2761 /* If we are overwriting a transition, free the old tag array. */
2762 if (trans->tags != NULL)
2763 xfree(trans->tags);
2764 trans->tags = NULL;
2765
2766 /* If there were any tags, allocate an array and fill it. */
2767 if (i + j > 0)
2768 {
2769 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2770 if (!trans->tags)
2771 return REG_ESPACE;
2772 i = 0;
2773 if (p1->tags != NULL)
2774 while(p1->tags[i] >= 0)
2775 {
2776 trans->tags[i] = p1->tags[i];
2777 i++;
2778 }
2779 l = i;
2780 j = 0;
2781 if (p2->tags != NULL)
2782 while (p2->tags[j] >= 0)
2783 {
2784 /* Don't add duplicates. */
2785 dup = 0;
2786 for (k = 0; k < i; k++)
2787 if (trans->tags[k] == p2->tags[j])
2788 {
2789 dup = 1;
2790 break;
2791 }
2792 if (!dup)
2793 trans->tags[l++] = p2->tags[j];
2794 j++;
2795 }
2796 trans->tags[l] = -1;
2797 }
2798
2799 /* Set the parameter array. If both `p2' and `p1' have same
2800 parameters, the values in `p2' override those in `p1'. */
2801 if (p1->params || p2->params)
2802 {
2803 if (!trans->params)
2804 trans->params = xmalloc(sizeof(*trans->params)
2805 * TRE_PARAM_LAST);
2806 if (!trans->params)
2807 return REG_ESPACE;
2808 for (i = 0; i < TRE_PARAM_LAST; i++)
2809 {
2810 trans->params[i] = TRE_PARAM_UNSET;
2811 if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
2812 trans->params[i] = p1->params[i];
2813 if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
2814 trans->params[i] = p2->params[i];
2815 }
2816 }
2817 else
2818 {
2819 if (trans->params)
2820 xfree(trans->params);
2821 trans->params = NULL;
2822 }
2823
2824
2825 #ifdef TRE_DEBUG
2826 {
2827 int *tags;
2828
2829 DPRINT((" %2d -> %2d on %3d", p1->position, p2->position,
2830 p1->code_min));
2831 if (p1->code_max != p1->code_min)
2832 DPRINT(("-%3d", p1->code_max));
2833 tags = trans->tags;
2834 if (tags)
2835 {
2836 DPRINT((", tags ["));
2837 while (*tags >= 0)
2838 {
2839 DPRINT(("%d", *tags));
2840 tags++;
2841 if (*tags >= 0)
2842 DPRINT((","));
2843 }
2844 DPRINT(("]"));
2845 }
2846 if (trans->assertions)
2847 DPRINT((", assert %d", trans->assertions));
2848 if (trans->assertions & ASSERT_BACKREF)
2849 DPRINT((", backref %d", trans->u.backref));
2850 else if (trans->assertions & ASSERT_BRACKET_MATCH)
2851 DPRINT((", bracket_match_list %p",
2852 trans->u.bracket_match_list));
2853 if (trans->params)
2854 {
2855 DPRINT((", "));
2856 tre_print_params(trans->params);
2857 }
2858 DPRINT(("\n"));
2859 }
2860 #endif /* TRE_DEBUG */
2861 p2++;
2862 }
2863 p1++;
2864 }
2865 else
2866 /* Compute a maximum limit for the number of transitions leaving
2867 from each state. */
2868 while (p1->position >= 0)
2869 {
2870 p2 = orig_p2;
2871 while (p2->position >= 0)
2872 {
2873 counts[p1->position]++;
2874 p2++;
2875 }
2876 p1++;
2877 }
2878 return REG_OK;
2879 }
2880
2881 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2882 labelled with one character range (there are no transitions on empty
2883 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2884 the regexp. */
2885 static reg_errcode_t
2886 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2887 int *counts, int *offs)
2888 {
2889 tre_union_t *uni;
2890 tre_catenation_t *cat;
2891 tre_iteration_t *iter;
2892 reg_errcode_t errcode = REG_OK;
2893
2894 /* XXX - recurse using a stack!. */
2895 switch (node->type)
2896 {
2897 case LITERAL:
2898 break;
2899 case UNION:
2900 uni = (tre_union_t *)node->obj;
2901 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2902 if (errcode != REG_OK)
2903 return errcode;
2904 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2905 break;
2906
2907 case CATENATION:
2908 cat = (tre_catenation_t *)node->obj;
2909 /* Add a transition from each position in cat->left->lastpos
2910 to each position in cat->right->firstpos. */
2911 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2912 transitions, counts, offs);
2913 if (errcode != REG_OK)
2914 return errcode;
2915 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2916 if (errcode != REG_OK)
2917 return errcode;
2918 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2919 break;
2920
2921 case ITERATION:
2922 iter = (tre_iteration_t *)node->obj;
2923 assert(iter->max == -1 || iter->max == 1);
2924
2925 if (iter->max == -1)
2926 {
2927 assert(iter->min == 0 || iter->min == 1);
2928 /* Add a transition from each last position in the iterated
2929 expression to each first position. */
2930 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2931 transitions, counts, offs);
2932 if (errcode != REG_OK)
2933 return errcode;
2934 }
2935 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2936 break;
2937 }
2938 return errcode;
2939 }
2940
2941
2942 #define ERROR_EXIT(err) \
2943 do \
2944 { \
2945 errcode = err; \
2946 if (/*CONSTCOND*/1) \
2947 goto error_exit; \
2948 } \
2949 while (/*CONSTCOND*/0)
2950
2951
2952 int
2953 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2954 locale_t loc)
2955 {
2956 tre_stack_t *stack;
2957 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2958 tre_pos_and_tags_t *p;
2959 int *counts = NULL, *offs = NULL;
2960 int i, add = 0;
2961 tre_tnfa_transition_t *transitions, *initial;
2962 tre_tnfa_t *tnfa = NULL;
2963 tre_submatch_data_t *submatch_data = NULL;
2964 tre_tag_direction_t *tag_directions = NULL;
2965 reg_errcode_t errcode;
2966 tre_mem_t mem;
2967
2968 /* Parse context. */
2969 tre_parse_ctx_t parse_ctx;
2970
2971 /* Allocate a stack used throughout the compilation process for various
2972 purposes. */
2973 stack = tre_stack_new(512, 10240, 128);
2974 if (!stack)
2975 return REG_ESPACE;
2976 /* Allocate a fast memory allocator. */
2977 mem = tre_mem_new();
2978 if (!mem)
2979 {
2980 tre_stack_destroy(stack);
2981 return REG_ESPACE;
2982 }
2983
2984 /* Parse the regexp. */
2985 memset(&parse_ctx, 0, sizeof(parse_ctx));
2986 parse_ctx.mem = mem;
2987 parse_ctx.stack = stack;
2988 parse_ctx.re = regex;
2989 parse_ctx.len = n;
2990 /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2991 are also set */
2992 if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2993 cflags &= ~REG_UNGREEDY;
2994 parse_ctx.cflags = cflags;
2995 parse_ctx.max_backref = -1;
2996 parse_ctx.loc = loc;
2997 parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2998
2999 DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
3000 errcode = tre_parse(&parse_ctx);
3001 if (errcode != REG_OK)
3002 ERROR_EXIT(errcode);
3003 preg->re_nsub = parse_ctx.submatch_id - 1;
3004 tree = parse_ctx.result;
3005
3006 /* Back references and approximate matching cannot currently be used
3007 in the same regexp. */
3008 if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
3009 ERROR_EXIT(REG_BADPAT);
3010
3011 #ifdef TRE_DEBUG
3012 tre_ast_print(tree);
3013 #endif /* TRE_DEBUG */
3014
3015 /* Referring to nonexistent subexpressions is illegal. */
3016 if (parse_ctx.max_backref > (int)preg->re_nsub)
3017 ERROR_EXIT(REG_ESUBREG);
3018
3019 /* Allocate the TNFA struct. */
3020 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
3021 if (tnfa == NULL)
3022 ERROR_EXIT(REG_ESPACE);
3023 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
3024 tnfa->have_approx = parse_ctx.have_approx;
3025 tnfa->num_submatches = parse_ctx.submatch_id;
3026 tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3027 - SUBMATCH_ID_INVISIBLE_START;
3028 tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3029 tnfa->loc = parse_ctx.loc;
3030
3031 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
3032 regexp does not have back references, this can be skipped. */
3033 if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
3034 {
3035 DPRINT(("tre_compile: setting up tags\n"));
3036
3037 /* Figure out how many tags we will need. */
3038 errcode = tre_add_tags(NULL, stack, tree, tnfa);
3039 if (errcode != REG_OK)
3040 ERROR_EXIT(errcode);
3041 #ifdef TRE_DEBUG
3042 tre_ast_print(tree);
3043 #endif /* TRE_DEBUG */
3044
3045 if (tnfa->num_tags > 0)
3046 {
3047 tag_directions = xmalloc(sizeof(*tag_directions)
3048 * (tnfa->num_tags + 1));
3049 if (tag_directions == NULL)
3050 ERROR_EXIT(REG_ESPACE);
3051 tnfa->tag_directions = tag_directions;
3052 memset(tag_directions, -1,
3053 sizeof(*tag_directions) * (tnfa->num_tags + 1));
3054 }
3055 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
3056 sizeof(*tnfa->minimal_tags));
3057 if (tnfa->minimal_tags == NULL)
3058 ERROR_EXIT(REG_ESPACE);
3059
3060 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
3061 sizeof(*submatch_data));
3062 if (submatch_data == NULL)
3063 ERROR_EXIT(REG_ESPACE);
3064 /* Set the eo_tag value to -1 to indicate that that corresponding
3065 * submatch has not be completed yet */
3066 for (i = 0; i < parse_ctx.submatch_id; i++)
3067 {
3068 submatch_data[i].eo_tag = -1;
3069 }
3070 tnfa->submatch_data = submatch_data;
3071
3072 errcode = tre_add_tags(mem, stack, tree, tnfa);
3073 if (errcode != REG_OK)
3074 ERROR_EXIT(errcode);
3075
3076 #ifdef TRE_DEBUG
3077 for (i = 0; i < parse_ctx.submatch_id; i++)
3078 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3079 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3080 for (i = 0; i <= tnfa->num_tags; i++)
3081 DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3082 #endif /* TRE_DEBUG */
3083 }
3084
3085 /* Expand iteration nodes. */
3086 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3087 tag_directions, &tnfa->params_depth);
3088 if (errcode != REG_OK)
3089 ERROR_EXIT(errcode);
3090
3091 /* Add a dummy node for the final state.
3092 XXX - For certain patterns this dummy node can be optimized away,
3093 for example "a*" or "ab*". Figure out a simple way to detect
3094 this possibility. */
3095 tmp_ast_l = tree;
3096 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3097 if (tmp_ast_r == NULL)
3098 ERROR_EXIT(REG_ESPACE);
3099
3100 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3101 if (tree == NULL)
3102 ERROR_EXIT(REG_ESPACE);
3103
3104 #ifdef TRE_DEBUG
3105 tre_ast_print(tree);
3106 DPRINT(("Number of states: %d\n", parse_ctx.position));
3107 if (submatch_data)
3108 for (i = 0; i < parse_ctx.submatch_id; i++)
3109 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3110 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3111 if (tag_directions)
3112 for (i = 0; i <= tnfa->num_tags; i++)
3113 DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3114 #endif /* TRE_DEBUG */
3115
3116 errcode = tre_compute_nfl(mem, stack, tree);
3117 if (errcode != REG_OK)
3118 ERROR_EXIT(errcode);
3119
3120 counts = xmalloc(sizeof(int) * parse_ctx.position);
3121 if (counts == NULL)
3122 ERROR_EXIT(REG_ESPACE);
3123
3124 offs = xmalloc(sizeof(int) * parse_ctx.position);
3125 if (offs == NULL)
3126 ERROR_EXIT(REG_ESPACE);
3127
3128 for (i = 0; i < parse_ctx.position; i++)
3129 counts[i] = 0;
3130 tre_ast_to_tnfa(tree, NULL, counts, NULL);
3131
3132 add = 0;
3133 for (i = 0; i < parse_ctx.position; i++)
3134 {
3135 offs[i] = add;
3136 add += counts[i] + 1;
3137 counts[i] = 0;
3138 }
3139 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
3140 if (transitions == NULL)
3141 ERROR_EXIT(REG_ESPACE);
3142 tnfa->transitions = transitions;
3143 tnfa->num_transitions = add;
3144
3145 DPRINT(("Converting to TNFA:\n"));
3146 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3147 if (errcode != REG_OK)
3148 ERROR_EXIT(errcode);
3149
3150 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3151 /* If in eight bit mode, compute a table of characters that can be the
3152 first character of a match. */
3153 tnfa->first_char = -1;
3154 if (TRE_MB_CUR_MAX_L(tnfa->loc) == 1 && !tmp_ast_l->nullable)
3155 {
3156 int count = 0;
3157 tre_cint_t k;
3158 DPRINT(("Characters that can start a match:"));
3159 tnfa->firstpos_chars = xcalloc(256, sizeof(char));
3160 if (tnfa->firstpos_chars == NULL)
3161 ERROR_EXIT(REG_ESPACE);
3162 for (p = tree->firstpos; p->position >= 0; p++)
3163 {
3164 tre_tnfa_transition_t *j = transitions + offs[p->position];
3165 while (j->state != NULL)
3166 {
3167 for (k = j->code_min; k <= j->code_max && k < 256; k++)
3168 {
3169 DPRINT((" %d", k));
3170 tnfa->firstpos_chars[k] = 1;
3171 count++;
3172 }
3173 j++;
3174 }
3175 }
3176 DPRINT(("\n"));
3177 #define TRE_OPTIMIZE_FIRST_CHAR 1
3178 #if TRE_OPTIMIZE_FIRST_CHAR
3179 if (count == 1)
3180 {
3181 for (k = 0; k < 256; k++)
3182 if (tnfa->firstpos_chars[k])
3183 {
3184 DPRINT(("first char must be %d\n", k));
3185 tnfa->first_char = k;
3186 xfree(tnfa->firstpos_chars);
3187 tnfa->firstpos_chars = NULL;
3188 break;
3189 }
3190 }
3191 #endif
3192
3193 }
3194 else
3195 tnfa->firstpos_chars = NULL;
3196 #else /* !USE_FIRSTPOS_CHARS */
3197
3198 /* Set first_char only if there is only one character that can be the
3199 first character of a match */
3200 tnfa->first_char = -1;
3201 if (!tmp_ast_l->nullable)
3202 {
3203 int scanning = 1;
3204 for (p = tree->firstpos; scanning && p->position >= 0; p++)
3205 {
3206 tre_tnfa_transition_t *j = transitions + offs[p->position];
3207 while (j->state != NULL)
3208 {
3209 if (j->code_min <= j->code_max)
3210 {
3211 if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3212 {
3213 tnfa->first_char = -1;
3214 scanning = 0;
3215 break;
3216 }
3217 tnfa->first_char = j->code_min;
3218 }
3219 j++;
3220 }
3221 }
3222 #ifdef TRE_DEBUG
3223 if (tnfa->first_char >= 0)
3224 DPRINT(("first char must be %d\n", tnfa->first_char));
3225 #endif /* TRE_DEBUG */
3226 }
3227 #endif /* !USE_FIRSTPOS_CHARS */
3228
3229 p = tree->firstpos;
3230 i = 0;
3231 while (p->position >= 0)
3232 {
3233 i++;
3234
3235 #ifdef TRE_DEBUG
3236 {
3237 int *tags;
3238 DPRINT(("initial: %d", p->position));
3239 tags = p->tags;
3240 if (tags != NULL)
3241 {
3242 if (*tags >= 0)
3243 DPRINT(("/"));
3244 while (*tags >= 0)
3245 {
3246 DPRINT(("%d", *tags));
3247 tags++;
3248 if (*tags >= 0)
3249 DPRINT((","));
3250 }
3251 }
3252 DPRINT((", assert %d", p->assertions));
3253 if (p->params)
3254 {
3255 DPRINT((", "));
3256 tre_print_params(p->params);
3257 }
3258 DPRINT(("\n"));
3259 }
3260 #endif /* TRE_DEBUG */
3261
3262 p++;
3263 }
3264
3265 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3266 if (initial == NULL)
3267 ERROR_EXIT(REG_ESPACE);
3268 tnfa->initial = initial;
3269
3270 i = 0;
3271 for (p = tree->firstpos; p->position >= 0; p++)
3272 {
3273 initial[i].state = transitions + offs[p->position];
3274 initial[i].state_id = p->position;
3275 initial[i].tags = NULL;
3276 /* Copy the arrays p->tags, and p->params, they are allocated
3277 from a tre_mem object. */
3278 if (p->tags)
3279 {
3280 int j;
3281 for (j = 0; p->tags[j] >= 0; j++);
3282 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
3283 if (!initial[i].tags)
3284 ERROR_EXIT(REG_ESPACE);
3285 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3286 }
3287 initial[i].params = NULL;
3288 if (p->params)
3289 {
3290 initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
3291 if (!initial[i].params)
3292 ERROR_EXIT(REG_ESPACE);
3293 memcpy(initial[i].params, p->params,
3294 sizeof(*p->params) * TRE_PARAM_LAST);
3295 }
3296 initial[i].assertions = p->assertions;
3297 i++;
3298 }
3299 initial[i].state = NULL;
3300
3301 tnfa->num_transitions = add;
3302 tnfa->final = transitions + offs[tree->lastpos[0].position];
3303 tnfa->num_states = parse_ctx.position;
3304 tnfa->cflags = cflags;
3305
3306 DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3307 (void *)tnfa->final));
3308
3309 tre_mem_destroy(mem);
3310 tre_stack_destroy(stack);
3311 xfree(counts);
3312 xfree(offs);
3313
3314 #ifdef TRE_USE_SYSTEM_REGEX_H
3315 preg->re_magic = RE_MAGIC;
3316 #endif /* TRE_USE_SYSTEM_REGEX_H */
3317 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3318 #ifdef __LIBC__
3319 /* In Libc, we need to retain the locale. Outside Libc, we already called
3320 duplocale() which does the retaining. */
3321 XL_RETAIN(tnfa->loc);
3322 #endif /* __LIBC__ */
3323 return REG_OK;
3324
3325 error_exit:
3326 /* Free everything that was allocated and return the error code. */
3327 tre_mem_destroy(mem);
3328 if (stack != NULL)
3329 tre_stack_destroy(stack);
3330 if (counts != NULL)
3331 xfree(counts);
3332 if (offs != NULL)
3333 xfree(offs);
3334
3335 /* Set tnfa into preg, so that calling tre_free() will free the contents
3336 of tnfa. But in Libc, NULL out the loc field since we never retained
3337 the locale. Outside Libc, we let tre_free() call freelocale(). */
3338 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3339 #ifdef __LIBC__
3340 if(tnfa) tnfa->loc = NULL;
3341 #endif /* __LIBC__ */
3342
3343 tre_free(preg);
3344 return errcode;
3345 }
3346
3347
3348
3349
3350 void
3351 tre_free(regex_t *preg)
3352 {
3353 tre_tnfa_t *tnfa;
3354 unsigned int i;
3355 tre_tnfa_transition_t *trans;
3356
3357 #ifdef TRE_USE_SYSTEM_REGEX_H
3358 preg->re_magic = 0;
3359 #endif /* TRE_USE_SYSTEM_REGEX_H */
3360 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3361 if (!tnfa)
3362 return;
3363 preg->TRE_REGEX_T_FIELD = NULL;
3364
3365 for (i = 0; i < tnfa->num_transitions; i++)
3366 if (tnfa->transitions[i].state)
3367 {
3368 if (tnfa->transitions[i].tags)
3369 xfree(tnfa->transitions[i].tags);
3370 if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3371 xfree(tnfa->transitions[i].u.bracket_match_list);
3372 if (tnfa->transitions[i].params)
3373 xfree(tnfa->transitions[i].params);
3374 }
3375 if (tnfa->transitions)
3376 xfree(tnfa->transitions);
3377
3378 if (tnfa->initial)
3379 {
3380 for (trans = tnfa->initial; trans->state; trans++)
3381 {
3382 if (trans->tags)
3383 xfree(trans->tags);
3384 if (trans->params)
3385 xfree(trans->params);
3386 }
3387 xfree(tnfa->initial);
3388 }
3389
3390 if (tnfa->submatch_data)
3391 {
3392 xfree(tnfa->submatch_data);
3393 }
3394
3395 if (tnfa->tag_directions)
3396 xfree(tnfa->tag_directions);
3397 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3398 if (tnfa->firstpos_chars)
3399 xfree(tnfa->firstpos_chars);
3400 #endif /* USE_FIRSTPOS_CHARS */
3401 if (tnfa->minimal_tags)
3402 xfree(tnfa->minimal_tags);
3403
3404 if (tnfa->loc)
3405 #ifdef __LIBC__
3406 XL_RELEASE(tnfa->loc);
3407 #else /* !__LIBC__ */
3408 freelocale(tnfa->loc);
3409 #endif /* !__LIBC__ */
3410
3411 if (tnfa->last_matched_branch)
3412 xfree(tnfa->last_matched_branch);
3413
3414 xfree(tnfa);
3415 }
3416
3417 #ifndef __LIBC__
3418 char *
3419 tre_version(void)
3420 {
3421 static char str[256];
3422 char *version;
3423
3424 if (str[0] == 0)
3425 {
3426 (void) tre_config(TRE_CONFIG_VERSION, &version);
3427 (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
3428 }
3429 return str;
3430 }
3431
3432 int
3433 tre_config(int query, void *result)
3434 {
3435 int *int_result = result;
3436 const char **string_result = result;
3437
3438 switch (query)
3439 {
3440 case TRE_CONFIG_APPROX:
3441 #ifdef TRE_APPROX
3442 *int_result = 1;
3443 #else /* !TRE_APPROX */
3444 *int_result = 0;
3445 #endif /* !TRE_APPROX */
3446 return REG_OK;
3447
3448 case TRE_CONFIG_WCHAR:
3449 #ifdef TRE_WCHAR
3450 *int_result = 1;
3451 #else /* !TRE_WCHAR */
3452 *int_result = 0;
3453 #endif /* !TRE_WCHAR */
3454 return REG_OK;
3455
3456 case TRE_CONFIG_MULTIBYTE:
3457 #ifdef TRE_MULTIBYTE
3458 *int_result = 1;
3459 #else /* !TRE_MULTIBYTE */
3460 *int_result = 0;
3461 #endif /* !TRE_MULTIBYTE */
3462 return REG_OK;
3463
3464 case TRE_CONFIG_SYSTEM_ABI:
3465 #ifdef TRE_CONFIG_SYSTEM_ABI
3466 *int_result = 1;
3467 #else /* !TRE_CONFIG_SYSTEM_ABI */
3468 *int_result = 0;
3469 #endif /* !TRE_CONFIG_SYSTEM_ABI */
3470 return REG_OK;
3471
3472 case TRE_CONFIG_VERSION:
3473 *string_result = TRE_VERSION;
3474 return REG_OK;
3475 }
3476
3477 return REG_NOMATCH;
3478 }
3479 #endif /* !__LIBC__ */
3480
3481 #pragma clang diagnostic push
3482 /* EOF */