]> git.saurik.com Git - apple/libc.git/blob - regex/TRE/lib/tre-compile.c
Libc-1081.1.3.tar.gz
[apple/libc.git] / regex / TRE / lib / tre-compile.c
1 /*
2 tre-compile.c - TRE regex compiler
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 /*
10 TODO:
11 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
12 function calls.
13 */
14
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif /* HAVE_CONFIG_H */
19 #include <stdio.h>
20 #include <assert.h>
21 #include <string.h>
22 #include <limits.h>
23
24 #include "tre-internal.h"
25 #include "tre-mem.h"
26 #include "tre-stack.h"
27 #include "tre-ast.h"
28 #include "tre-parse.h"
29 #include "tre-compile.h"
30 #include "tre.h"
31 #include "tre-last-matched.h"
32 #include "xmalloc.h"
33
34 /*
35 The bit_ffs() macro in bitstring.h is flawed. Replace it with a working one.
36 */
37 #undef bit_ffs
38 #define bit_ffs(name, nbits, value) { \
39 register bitstr_t *_name = name; \
40 register int _byte, _nbits = nbits; \
41 register int _stopbyte = _bit_byte(_nbits), _value = -1; \
42 for (_byte = 0; _byte <= _stopbyte; ++_byte) \
43 if (_name[_byte]) { \
44 _value = _byte << 3; \
45 for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
46 ++_value, _stopbyte >>= 1); \
47 break; \
48 } \
49 *(value) = _value; \
50 }
51
52 /*
53 Algorithms to setup tags so that submatch addressing can be done.
54 */
55
56
57 #ifdef TRE_DEBUG
58 static const char *tag_dir_str[] = {
59 "minimize",
60 "maximize",
61 "left-maximize"
62 };
63
64 static const char _indent[] = " ";
65
66 static void
67 print_indent(int indent)
68 {
69 while (indent-- > 0)
70 DPRINT((_indent));
71 }
72
73 static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
74 int num_tags);
75 static void
76 print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
77 int num_tags)
78 {
79 tre_last_matched_pre_t *u = branch->last_matched;
80 int n_last_matched = 0;
81
82 while (u)
83 {
84 n_last_matched++;
85 u = u->next;
86 }
87
88 print_indent(indent);
89 DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
90 branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
91 print_indent(indent);
92 DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
93 n_last_matched));
94 if (branch->n_last_matched != n_last_matched)
95 DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
96 if (branch->cmp_tag > 0)
97 {
98 int i;
99 const char *sep = " tags=";
100 print_indent(indent);
101 DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
102 for (i = 0; i < num_tags; i++)
103 if (bit_test(branch->tags, i))
104 {
105 DPRINT(("%s%d", sep, i));
106 sep = ",";
107 }
108 DPRINT(("\n"));
109 }
110
111 u = branch->last_matched;
112 indent++;
113 while (u)
114 {
115 print_last_matched_pre(u, indent, num_tags);
116 u = u->next;
117 }
118 }
119
120 static void
121 print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
122 {
123 tre_last_matched_branch_pre_t *b = lm->branches;
124 int n_branches = 0;
125
126 while (b)
127 {
128 n_branches++;
129 b = b->next;
130 }
131
132 print_indent(indent);
133 DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
134 lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
135 print_indent(indent);
136 DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
137 lm->n_branches, n_branches));
138 if (lm->n_branches != n_branches)
139 DPRINT(("*** mismatch between n and branches ***\n"));
140
141 b = lm->branches;
142 indent++;
143 while (b)
144 {
145 print_last_match_branch_pre(b, indent, num_tags);
146 b = b->next;
147 }
148 }
149
150 static void print_last_matched(tre_last_matched_t *lm, int indent);
151 static void
152 print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
153 {
154 tre_last_matched_t *u;
155 int i;
156
157 print_indent(indent);
158 DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
159 if (branch->cmp_tag > 0)
160 {
161 print_indent(indent);
162 DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
163 if (branch->n_tags > 0)
164 {
165 const char *sep = " tags=";
166 for (i = 0; i < branch->n_tags; i++)
167 {
168 DPRINT(("%s%d", sep, branch->tags[i]));
169 sep = ",";
170 }
171 }
172 DPRINT(("\n"));
173 }
174
175 u = branch->last_matched;
176 indent++;
177 for (i = branch->n_last_matched; i > 0; i--, u++)
178 print_last_matched(u, indent);
179 }
180
181 static void
182 print_last_matched(tre_last_matched_t *lm, int indent)
183 {
184 int i;
185 tre_last_matched_branch_t *b;
186
187 print_indent(indent);
188 DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
189 lm->start_tag));
190
191 b = lm->branches;
192 indent++;
193 for (i = lm->n_branches; i > 0; i--, b++)
194 print_last_match_branch(b, indent);
195 }
196 #endif /* TRE_DEBUG */
197
198
199 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
200 one if needed. If tag_id > 0, add that tag as well (a negative tag_id will
201 create an unset tre_last_matched_branch_pre_t. */
202 static reg_errcode_t
203 tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
204 int tag_id, int num_tags)
205 {
206 tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
207 tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
208
209 if (db)
210 {
211 if (sb)
212 {
213 bitstr_t *l = db->tags;
214 bitstr_t *r = sb->tags;
215 int i = bitstr_size(num_tags);
216
217 while(i-- > 0)
218 *l++ |= *r++;
219 /* db and sb are the info from two parallel sub-trees, so the tags
220 must be mutually exclusive, and we can just add their numbers */
221 db->n_tags += sb->n_tags;
222 db->tot_tags += sb->tot_tags;
223 if (db->last_matched)
224 {
225 if (sb->last_matched)
226 {
227 tre_last_matched_pre_t *u = db->last_matched;
228
229 while(u->next)
230 u = u->next;
231 u->next = sb->last_matched;
232 db->n_last_matched += sb->n_last_matched;
233 db->tot_branches += sb->tot_branches;
234 db->tot_last_matched += sb->tot_last_matched;
235 }
236 }
237 else if (sb->last_matched)
238 {
239 db->last_matched = sb->last_matched;
240 db->n_last_matched = sb->n_last_matched;
241 db->tot_branches = sb->tot_branches;
242 db->tot_last_matched = sb->tot_last_matched;
243 }
244 }
245 }
246 else
247 db = sb;
248
249 if (tag_id != 0)
250 {
251 if (!db)
252 {
253 db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
254 + bitstr_size(num_tags));
255 if (db == NULL)
256 return REG_ESPACE;
257 db->tot_branches = 1;
258 }
259 if (tag_id > 0)
260 {
261 /* tag_id is a new tag, and shouldn't exist in db's tags,
262 so we can always increment n_tags */
263 bit_set(db->tags, tag_id);
264 db->n_tags++;
265 db->tot_tags++;
266 }
267 }
268 dst->last_matched_branch = db;
269 return REG_OK;
270 }
271
272
273 /* Inserts a catenation node to the root of the tree given in `node'.
274 As the left child a new tag with number `tag_id' to `node' is added,
275 and the right child is the old root. */
276 static reg_errcode_t
277 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
278 {
279 tre_catenation_t *c;
280
281 DPRINT(("add_tag_left: tag %d\n", tag_id));
282
283 c = tre_mem_alloc(mem, sizeof(*c));
284 if (c == NULL)
285 return REG_ESPACE;
286 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
287 if (c->left == NULL)
288 return REG_ESPACE;
289 c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
290 if (c->right == NULL)
291 return REG_ESPACE;
292
293 c->right->obj = node->obj;
294 c->right->type = node->type;
295 c->right->last_matched_branch = node->last_matched_branch;
296 c->right->nullable = -1;
297 c->right->submatch_id = -1;
298 node->obj = c;
299 node->type = CATENATION;
300 node->original = c->right;
301 return REG_OK;
302 }
303
304 /* Inserts a catenation node to the root of the tree given in `node'.
305 As the right child a new tag with number `tag_id' to `node' is added,
306 and the left child is the old root. */
307 static reg_errcode_t
308 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
309 {
310 tre_catenation_t *c;
311
312 DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
313
314 c = tre_mem_alloc(mem, sizeof(*c));
315 if (c == NULL)
316 return REG_ESPACE;
317 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
318 if (c->right == NULL)
319 return REG_ESPACE;
320 c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
321 if (c->left == NULL)
322 return REG_ESPACE;
323
324 c->left->obj = node->obj;
325 c->left->type = node->type;
326 c->left->last_matched_branch = node->last_matched_branch;
327 c->left->nullable = -1;
328 c->left->submatch_id = -1;
329 node->obj = c;
330 node->type = CATENATION;
331 node->original = c->left;
332 return REG_OK;
333 }
334
335 typedef enum {
336 ADDTAGS_RECURSE,
337 ADDTAGS_RECURSE_NOT_TOP_UNION,
338 ADDTAGS_AFTER_ITERATION,
339 ADDTAGS_AFTER_UNION_LEFT,
340 ADDTAGS_AFTER_UNION_RIGHT,
341 ADDTAGS_AFTER_CAT_LEFT,
342 ADDTAGS_AFTER_CAT_RIGHT,
343 ADDTAGS_SET_SUBMATCH_END,
344 ADDTAGS_UNION_RECURSE,
345 ADDTAGS_UNION_RIGHT_RECURSE,
346 ADDTAGS_AFTER_UNION_TOP,
347 } tre_addtags_symbol_t;
348
349 enum {
350 COPY_LAST_MATCHED_BRANCH,
351 COPY_LAST_MATCHED_BRANCH_NEXT,
352 COPY_LAST_MATCHED,
353 COPY_LAST_MATCHED_NEXT,
354 };
355
356
357 #define REGSET_UNSET ((unsigned)-1)
358
359 /* Go through `regset' and set submatch data for submatches that are
360 using this tag. */
361 static void
362 tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
363 {
364 int i;
365
366 for (i = 0; regset[i] != REGSET_UNSET; i++)
367 {
368 int id = regset[i] / 2;
369 int start = !(regset[i] % 2);
370 if (id >= SUBMATCH_ID_INVISIBLE_START)
371 continue;
372 DPRINT((" Using tag %d for %s offset of "
373 "submatch %d\n", tag,
374 start ? "start" : "end", id));
375 if (start)
376 tnfa->submatch_data[id].so_tag = tag;
377 else
378 tnfa->submatch_data[id].eo_tag = tag;
379 }
380 regset[0] = -1;
381 }
382
383
384 #define REGSET_HAS_STARTS 0x1
385 #define REGSET_HAS_ENDS 0x2
386
387
388 /* Adds tags to appropriate locations in the parse tree in `tree', so that
389 subexpressions marked for submatch addressing can be traced. */
390 static reg_errcode_t
391 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
392 tre_tnfa_t *tnfa)
393 {
394 reg_errcode_t status = REG_OK;
395 tre_addtags_symbol_t symbol;
396 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
397 int bottom = tre_stack_num_objects(stack);
398 /* True for first pass (counting number of needed tags) */
399 int first_pass = (mem == NULL || tnfa == NULL);
400 unsigned *regset, *orig_regset;
401 int regset_contains = 0;
402 int num_tags = 0; /* Total number of tags. */
403 int num_minimals = 0; /* Number of special minimal tags. */
404 int tag = 0; /* The tag that is to be added next. */
405 int next_tag = 1; /* Next tag to use after this one. */
406 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
407 int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
408 * the first is the tag to reorder, the second
409 * is the tag after which the first is reordered */
410 int *rtp; /* Pointer used to fill in reorder_tags and
411 * tag_order */
412 int *to_reorder; /* Transform array converting sequential order to
413 * that specified by reorder_tags */
414 int id;
415
416 tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
417 if (!first_pass)
418 {
419 DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
420 tnfa->end_tag = 0;
421 tnfa->minimal_tags[0] = -1;
422 }
423
424 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
425 + tnfa->num_submatches_invisible + 1) * 2));
426 if (regset == NULL)
427 {
428 status = REG_ESPACE;
429 goto error_regset;
430 }
431 regset[0] = REGSET_UNSET;
432 orig_regset = regset;
433
434 if (!first_pass)
435 {
436 /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
437 * to_reorder in one batch (assuming all are the same type) */
438 rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
439 ((2 * tnfa->num_reorder_tags + 1) +
440 tnfa->num_tags));
441 if (reorder_tags == NULL)
442 {
443 status = REG_ESPACE;
444 goto error_reorder_tags;
445 }
446 to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
447 }
448
449 STACK_PUSH(stack, voidptr, node);
450 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
451
452 while (tre_stack_num_objects(stack) > bottom)
453 {
454 if (status != REG_OK)
455 break;
456
457 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
458 switch (symbol)
459 {
460 int top_union;
461
462 case ADDTAGS_SET_SUBMATCH_END:
463 {
464 int i;
465
466 id = tre_stack_pop_int(stack);
467 node = tre_stack_pop_voidptr(stack);
468 /* Add end of this submatch to regset. */
469 for (i = 0; regset[i] != REGSET_UNSET; i++);
470 regset[i] = id * 2 + 1;
471 regset[i + 1] = -1;
472 regset_contains |= REGSET_HAS_ENDS;
473
474 /* Always put a tag after a minimal iterator. */
475 if (minimal_tag >= 0)
476 {
477 if (first_pass)
478 {
479 node->num_tags++;
480 DPRINT((" ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
481 node->num_tags));
482 }
483 else
484 {
485 int i;
486 status = tre_merge_branches(mem, node, NULL, tag,
487 tnfa->num_tags);
488 if (status != REG_OK)
489 break;
490 status = tre_add_tag_right(mem, node, tag);
491 if (status != REG_OK)
492 break;
493 tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
494 DPRINT(("Setting t%d direction to %s\n", tag,
495 tag_dir_str[tnfa->tag_directions[tag]]));
496 DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
497 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
498 tnfa->minimal_tags[i] = tag;
499 tnfa->minimal_tags[i + 1] = minimal_tag;
500 tnfa->minimal_tags[i + 2] = -1;
501
502 DPRINT((" Minimal end: t%d reordered to "
503 "after t%d\n", tag, minimal_tag));
504 /* Append to tag_order, move "tag" after
505 * "minimal_tag" */
506 *rtp++ = tag;
507 *rtp++ = minimal_tag;
508
509 num_minimals++;
510 tre_purge_regset(regset, tnfa, tag);
511 }
512
513 minimal_tag = -1;
514 DPRINT((" ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
515 regset[0] = REGSET_UNSET;
516 regset_contains = 0;
517 tag = next_tag;
518 num_tags++;
519 next_tag++;
520 }
521 break;
522 }
523
524 case ADDTAGS_RECURSE_NOT_TOP_UNION:
525 /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
526 * indicating that if a union is being processed, it is not the
527 * top-most of a series */
528 top_union = 0;
529 goto do_addtags_recurse;
530
531 case ADDTAGS_RECURSE:
532 /* Setting top_union to 1 means that if a union is begin processed,
533 * it is the top-most of a series, and should recurse through the
534 * series to set the left_tag and right_tag values */
535 top_union = 1;
536
537 do_addtags_recurse:
538 node = tre_stack_pop_voidptr(stack);
539
540 id = node->submatch_id;
541 if (id >= 0)
542 {
543 int i;
544
545
546 /* Add start of this submatch to regset. */
547 for (i = 0; regset[i] != REGSET_UNSET; i++);
548 regset[i] = id * 2;
549 regset[i + 1] = -1;
550 regset_contains |= REGSET_HAS_STARTS;
551
552 /* Add end of this submatch to regset after processing this
553 node. */
554 STACK_PUSH(stack, voidptr, node);
555 STACK_PUSHX(stack, int, id);
556 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
557 }
558
559 switch (node->type)
560 {
561 case LITERAL:
562 {
563 tre_literal_t *lit = node->obj;
564
565 if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
566 {
567 DPRINT(("Literal %d-%d\n",
568 (int)lit->code_min, (int)lit->code_max));
569 if (regset_contains)
570 {
571 /* Regset is not empty, so add a tag before the
572 literal or backref. */
573 if (first_pass)
574 {
575 DPRINT((" ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
576 node->num_tags = 1;
577 }
578 else
579 {
580 status = tre_merge_branches(mem, node, NULL, tag,
581 tnfa->num_tags);
582 if (status != REG_OK)
583 break;
584 status = tre_add_tag_left(mem, node, tag);
585 if (status != REG_OK)
586 break;
587 if (regset_contains == REGSET_HAS_STARTS)
588 tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
589 else
590 tnfa->tag_directions[tag] = direction;
591 DPRINT(("Setting t%d direction to %s\n", tag,
592 tag_dir_str[tnfa->tag_directions[tag]]));
593 tre_purge_regset(regset, tnfa, tag);
594
595 if (IS_BACKREF(lit))
596 {
597 int b = lit->code_max;
598 int t = tnfa->submatch_data[b].so_tag;
599 /* Fail if the referenced submatch hasn't been
600 * completed yet */
601 if (tnfa->submatch_data[b].eo_tag < 0)
602 {
603 status = REG_ESUBREG;
604 break;
605 }
606 if (t < tag)
607 {
608 DPRINT((" Backref %d start: "
609 "t%d reordered to before t%d\n",
610 b, tag, t));
611 if(t > 0)
612 t--;
613 /* Append to tag_order, move "tag" after
614 * "t" */
615 *rtp++ = tag;
616 *rtp++ = t;
617 }
618 #if TRE_DEBUG
619 else
620 DPRINT((" Backref %d start: "
621 "(t%d already before t%d)\n",
622 b, tag, t));
623 #endif /* TRE_DEBUG */
624 }
625 }
626
627 DPRINT((" ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
628 tag));
629 regset[0] = REGSET_UNSET;
630 regset_contains = 0;
631 tag = next_tag;
632 num_tags++;
633 next_tag++;
634 }
635 }
636 else
637 {
638 assert(!IS_TAG(lit));
639 }
640 break;
641 }
642 case CATENATION:
643 {
644 tre_catenation_t *cat = node->obj;
645 tre_ast_node_t *left = cat->left;
646 tre_ast_node_t *right = cat->right;
647 int reserved_tag = -1;
648 DPRINT(("Catenation, next_tag = %d\n", next_tag));
649
650
651 /* After processing right child. */
652 STACK_PUSHX(stack, voidptr, node);
653 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
654
655 /* Process right child. */
656 STACK_PUSHX(stack, voidptr, right);
657 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
658
659 /* After processing left child. */
660 STACK_PUSHX(stack, int, next_tag + left->num_tags);
661 DPRINT((" Pushing %d for after left\n",
662 next_tag + left->num_tags));
663 if (left->num_tags > 0 && right->num_tags > 0)
664 {
665 /* Reserve the next tag to the right child. */
666 DPRINT((" ADDTAGS_RECURSE:CATENATION num_tags++ "
667 "Reserving next_tag %d to right child\n",
668 next_tag));
669 reserved_tag = next_tag;
670 next_tag++;
671 }
672 STACK_PUSHX(stack, int, reserved_tag);
673 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
674
675 /* Process left child. */
676 STACK_PUSHX(stack, voidptr, left);
677 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
678
679 }
680 break;
681 case ITERATION:
682 {
683 tre_iteration_t *iter = node->obj;
684 DPRINT(("Iteration\n"));
685
686 if (first_pass)
687 STACK_PUSHX(stack, int, regset_contains != 0);
688 STACK_PUSHX(stack, int, tag);
689 STACK_PUSHX(stack, voidptr, node);
690 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
691
692 STACK_PUSHX(stack, voidptr, iter->arg);
693 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
694
695 /* Regset is not empty, so add a tag here (this always happens
696 because iterators always get submatch id, even if in the
697 invisible range) */
698 if (regset_contains)
699 {
700 if (!first_pass)
701 {
702 status = tre_merge_branches(mem, node, NULL, tag,
703 tnfa->num_tags);
704 if (status != REG_OK)
705 break;
706 status = tre_add_tag_left(mem, node, tag);
707 if (status != REG_OK)
708 break;
709 if (regset_contains == REGSET_HAS_STARTS && tag != 0)
710 tnfa->tag_directions[tag] = iter->minimal ?
711 TRE_TAG_MINIMIZE :
712 TRE_TAG_LEFT_MAXIMIZE;
713 else
714 tnfa->tag_directions[tag] = direction;
715 DPRINT(("Setting t%d direction to %s\n", tag,
716 tag_dir_str[tnfa->tag_directions[tag]]));
717 tre_purge_regset(regset, tnfa, tag);
718 }
719
720 DPRINT((" ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
721 tag));
722 regset[0] = REGSET_UNSET;
723 regset_contains = 0;
724 tag = next_tag;
725 num_tags++;
726 next_tag++;
727 }
728 direction = TRE_TAG_LEFT_MAXIMIZE;
729 DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
730 }
731 break;
732 case UNION:
733 {
734 tre_union_t *uni;
735 tre_ast_node_t *left;
736 tre_ast_node_t *right;
737 int front_tag = -1;
738
739 DPRINT(("Union\n"));
740
741 if (regset_contains)
742 {
743 DPRINT((" UNION num_tags++ tag=%d\n", tag));
744 front_tag = tag;
745 tag = next_tag;
746 num_tags++;
747 next_tag++;
748 }
749
750 /* For the top union, walk the tree of consecutive unions,
751 * setting the left_tag and right_tag values in increasing
752 * order (left to right priority) */
753 if (top_union &&
754 (node->num_submatches -
755 (node->submatch_id >= 0 &&
756 node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
757 {
758 tre_ast_node_t *n;
759 int last = tre_stack_num_objects(stack);
760
761 STACK_PUSH(stack, voidptr, node);
762 STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
763
764 while (tre_stack_num_objects(stack) > last)
765 {
766 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
767 switch (symbol)
768 {
769 case ADDTAGS_UNION_RECURSE:
770 n = tre_stack_pop_voidptr(stack);
771 uni = n->obj;
772 left = uni->left;
773
774 /* Since the top union has num_submatches > 0,
775 * we set all the consecutive union's
776 * make_branches to 1 to force the generation
777 * of end tags for each union branch. */
778 n->make_branches = 1;
779
780 STACK_PUSH(stack, voidptr, n);
781 STACK_PUSH(stack, int,
782 ADDTAGS_UNION_RIGHT_RECURSE);
783
784 if (left->type == UNION)
785 {
786 STACK_PUSH(stack, voidptr, left);
787 STACK_PUSH(stack, int,
788 ADDTAGS_UNION_RECURSE);
789 }
790 else
791 {
792 DPRINT((" ADDTAGS_UNION_RECURSE "
793 "num_tags++ tag=%d\n", tag));
794 uni->left_tag = tag;
795 tag = next_tag;
796 num_tags++;
797 next_tag++;
798 }
799 break;
800
801 case ADDTAGS_UNION_RIGHT_RECURSE:
802 n = tre_stack_pop_voidptr(stack);
803 uni = n->obj;
804 right = uni->right;
805
806 if (right->type == UNION)
807 {
808 STACK_PUSH(stack, voidptr, right);
809 STACK_PUSH(stack, int,
810 ADDTAGS_UNION_RECURSE);
811 }
812 else
813 {
814 DPRINT((" ADDTAGS_UNION_RIGHT_RECURSE "
815 "num_tags++ tag=%d\n", tag));
816 uni->right_tag = tag;
817 tag = next_tag;
818 num_tags++;
819 next_tag++;
820 }
821
822 break;
823
824 default:
825 assert(0);
826 break;
827
828 } /* end switch(symbol) */
829 } /* end while(tre_stack_num_objects(stack) > last */
830 if (!first_pass)
831 {
832 STACK_PUSHX(stack, int, front_tag);
833 STACK_PUSHX(stack, voidptr, node);
834 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
835 }
836 } /* end if (top_union && ...) */
837
838 uni = node->obj;
839 left = uni->left;
840 right = uni->right;
841
842 /* After processing right child. */
843 STACK_PUSHX(stack, voidptr, regset);
844 STACK_PUSHX(stack, int, regset_contains != 0);
845 STACK_PUSHX(stack, voidptr, node);
846 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
847
848 /* Process right child. */
849 STACK_PUSHX(stack, voidptr, right);
850 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
851
852 /* After processing left child. */
853 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
854
855 /* Process left child. */
856 STACK_PUSHX(stack, voidptr, left);
857 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
858
859 /* Regset is not empty, so add a tag here. */
860 if (regset_contains)
861 {
862 if (!first_pass)
863 {
864 status = tre_merge_branches(mem, node, NULL, front_tag,
865 tnfa->num_tags);
866 if (status != REG_OK)
867 break;
868 status = tre_add_tag_left(mem, node, front_tag);
869 if (status != REG_OK)
870 break;
871 if (regset_contains == REGSET_HAS_STARTS)
872 tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
873 else
874 tnfa->tag_directions[front_tag] = direction;
875 DPRINT(("Setting t%d direction to %s\n", front_tag,
876 tag_dir_str[tnfa->tag_directions[front_tag]]));
877 tre_purge_regset(regset, tnfa, front_tag);
878 }
879
880 regset[0] = REGSET_UNSET;
881 regset_contains = 0;
882 }
883
884 break;
885 }
886 } /* end switch (node->type) */
887
888 break; /* end case: ADDTAGS_RECURSE */
889
890 case ADDTAGS_AFTER_ITERATION:
891 {
892 tre_iteration_t *iter;
893 tre_ast_node_t *orig;
894 int enter_tag;
895
896 node = tre_stack_pop_voidptr(stack);
897 orig = node->original ? node->original : node;
898 iter = (tre_iteration_t *)orig->obj;
899 enter_tag = tre_stack_pop_int(stack);
900 if (iter->minimal)
901 minimal_tag = enter_tag;
902
903 DPRINT(("After iteration\n"));
904 if (first_pass)
905 {
906 node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
907 DPRINT((" ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
908 node->num_tags));
909 }
910 else
911 {
912 /* node->last_matched_branch will have the start tag (the tag
913 just *before* the iteration). iter->arg->last_matched_branch
914 will have the tag(s) inside the iteration, the ones that
915 may need to be reset if the iteration doesn't match. So
916 before we merge iter->arg into node, we need to set up
917 a new tre_last_matched_t and tre_last_matched_branch_t,
918 using any of the inside tags as cmp_tag (we choose the first
919 tag found by bit_ffs). If there are no inside tags, we
920 don't bother creating the extra structures. */
921 tre_last_matched_branch_pre_t *b =
922 iter->arg->last_matched_branch;
923
924 if (b && b->n_tags > 0)
925 {
926 tre_last_matched_pre_t *u;
927
928 bit_ffs(b->tags, num_tags, &b->cmp_tag);
929 DPRINT((" ADDTAGS_AFTER_ITERATION: n_tags=%d "
930 "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
931
932 u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
933 sizeof(tre_last_matched_branch_pre_t)
934 + bitstr_size(tnfa->num_tags));
935 if (!u)
936 {
937 status = REG_ESPACE;
938 break;
939 }
940 u->branches = b;
941 u->n_branches = 1;
942 u->start_tag = b->cmp_tag;
943 u->tot_branches = b->tot_branches;
944 u->tot_last_matched = 1 + b->tot_last_matched;
945 u->tot_tags = b->tot_tags;
946
947 b = (tre_last_matched_branch_pre_t *)(u + 1);
948 b->last_matched = u;
949 b->n_last_matched = 1;
950 b->tot_branches = 1 + u->tot_branches;
951 b->tot_last_matched = u->tot_last_matched;
952 b->tot_tags = u->tot_tags;
953
954 iter->arg->last_matched_branch = b;
955 }
956 status = tre_merge_branches(mem, node, iter->arg, 0,
957 tnfa->num_tags);
958 if (status != REG_OK)
959 break;
960
961 if (iter->minimal)
962 {
963 /* Add a union with a left EMPTY literal and the right
964 being iter->arg. This should force the tags inside
965 the minimal iteration to prefer being unset */
966 if (iter->min == 0 && iter->max <= 1)
967 {
968 tre_ast_node_t *u, *e;
969
970 e = tre_ast_new_literal(mem, EMPTY, -1, -1);
971 if (e == NULL)
972 {
973 status = REG_ESPACE;
974 break;
975 }
976 u = tre_ast_new_union(mem, e, iter->arg);
977 if (u == NULL)
978 {
979 status = REG_ESPACE;
980 break;
981 }
982 iter->arg = u;
983 }
984
985 direction = TRE_TAG_MINIMIZE;
986 }
987 else
988 direction = TRE_TAG_MAXIMIZE;
989 DPRINT((" Setting direction to %s\n", tag_dir_str[direction]));
990 }
991 break;
992 }
993
994 case ADDTAGS_AFTER_CAT_LEFT:
995 {
996 int new_tag = tre_stack_pop_int(stack);
997 next_tag = tre_stack_pop_int(stack);
998 DPRINT(("After cat left, tag = %d, next_tag = %d\n",
999 tag, next_tag));
1000 if (new_tag >= 0)
1001 {
1002 DPRINT((" Setting tag to %d\n", new_tag));
1003 tag = new_tag;
1004 }
1005 break;
1006 }
1007
1008 case ADDTAGS_AFTER_CAT_RIGHT:
1009 {
1010 tre_catenation_t *cat;
1011
1012 DPRINT(("After cat right\n"));
1013 node = tre_stack_pop_voidptr(stack);
1014 cat = node->obj;
1015 if (first_pass)
1016 {
1017 node->num_tags = cat->left->num_tags + cat->right->num_tags;
1018 DPRINT((" ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1019 node->num_tags));
1020 }
1021 else
1022 {
1023 status = tre_merge_branches(mem, cat->left, cat->right, 0,
1024 tnfa->num_tags);
1025 if (status != REG_OK)
1026 break;
1027 status = tre_merge_branches(mem, node, cat->left, 0,
1028 tnfa->num_tags);
1029 }
1030 break;
1031 }
1032
1033 case ADDTAGS_AFTER_UNION_LEFT:
1034 DPRINT(("After union left\n"));
1035 /* Lift the bottom of the `regset' array so that when processing
1036 the right operand the items currently in the array are
1037 invisible. The original bottom was saved at ADDTAGS_UNION and
1038 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1039 while (*regset != REGSET_UNSET)
1040 regset++;
1041 regset_contains = 0;
1042 break;
1043
1044 case ADDTAGS_AFTER_UNION_RIGHT:
1045 {
1046 int added_tags;
1047 tre_ast_node_t *orig;
1048 tre_union_t *uni;
1049 /* Note: node may not be a UNION, but a CATENATION with a left
1050 * tag. So that is why we pass the original node->obj on the
1051 * stack, to get the union's true values. */
1052
1053 DPRINT(("After union right\n"));
1054 node = tre_stack_pop_voidptr(stack);
1055 orig = node->original ? node->original : node;
1056 uni = (tre_union_t *)orig->obj;
1057 added_tags = tre_stack_pop_int(stack);
1058 if (first_pass)
1059 {
1060 node->num_tags = uni->left->num_tags + uni->right->num_tags
1061 + added_tags;
1062 if (uni->left_tag > 0)
1063 node->num_tags++;
1064 if (uni->right_tag > 0)
1065 node->num_tags++;
1066 DPRINT((" ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1067 node->num_tags));
1068 }
1069 regset = tre_stack_pop_voidptr(stack);
1070
1071 /* Add tags after both children, the left child gets a smaller
1072 tag than the right child. This guarantees that we prefer
1073 the left child over the right child. */
1074 /* XXX - This is not always necessary (if the children have
1075 tags which must be seen for every match of that child). */
1076 if (!first_pass && node->make_branches)
1077 {
1078 tre_last_matched_branch_pre_t *lb =
1079 uni->left->last_matched_branch;
1080 tre_last_matched_branch_pre_t *rb =
1081 uni->right->last_matched_branch;
1082 tre_last_matched_pre_t *lu =
1083 uni->left->last_matched_in_progress;
1084 tre_last_matched_pre_t *ru =
1085 uni->right->last_matched_in_progress;
1086 tre_last_matched_pre_t *u;
1087 /* We don't need to call tre_merge_branches because these
1088 * tags don't participate in submatch ranges, so don't need
1089 * to be recorded. But we do set the cmp_tag entry of the
1090 * tre_last_matched_branch_pre_t, so we might call
1091 * tre_merge_branches if we need to create an empty
1092 * tre_last_matched_branch_pre_t. */
1093 if (uni->left_tag > 0)
1094 {
1095 DPRINT(("Setting t%d direction to maximize\n",
1096 uni->left_tag));
1097 status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1098 if (status != REG_OK)
1099 break;
1100 tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1101 if (!lb)
1102 {
1103 status = tre_merge_branches(mem, uni->left, NULL, -1,
1104 tnfa->num_tags);
1105 if (status != REG_OK)
1106 break;
1107 lb = uni->left->last_matched_branch;
1108 }
1109 lb->cmp_tag = uni->left_tag;
1110 }
1111 if (uni->right_tag > 0)
1112 {
1113 DPRINT(("Setting t%d direction to maximize\n",
1114 uni->right_tag));
1115 status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1116 if (status != REG_OK)
1117 break;
1118 tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1119 if (!rb)
1120 {
1121 status = tre_merge_branches(mem, uni->right, NULL, -1,
1122 tnfa->num_tags);
1123 if (status != REG_OK)
1124 break;
1125 rb = uni->right->last_matched_branch;
1126 }
1127 rb->cmp_tag = uni->right_tag;
1128 }
1129 /* Now merge the tre_last_matched_branch_pre_t into a
1130 tre_last_matched_pre_t */
1131 if (lu == NULL)
1132 {
1133 if (ru == NULL)
1134 {
1135 /* Create a new tre_last_matched_pre_t */
1136 u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1137 if (!u)
1138 {
1139 status = REG_ESPACE;
1140 break;
1141 }
1142 u->tot_last_matched = 1;
1143
1144 if (lb)
1145 {
1146 u->branches = lb;
1147 u->n_branches = 1;
1148 u->tot_branches += lb->tot_branches;
1149 u->tot_last_matched += lb->tot_last_matched;
1150 u->tot_tags += lb->tot_tags;
1151 if (rb)
1152 {
1153 lb->next = rb;
1154 u->n_branches++;
1155 u->tot_branches += rb->tot_branches;
1156 u->tot_last_matched += rb->tot_last_matched;
1157 u->tot_tags += rb->tot_tags;
1158 }
1159 }
1160 else if (rb)
1161 {
1162 u->branches = rb;
1163 u->n_branches = 1;
1164 u->tot_branches += rb->tot_branches;
1165 u->tot_last_matched += rb->tot_last_matched;
1166 u->tot_tags += rb->tot_tags;
1167 }
1168 }
1169 else
1170 {
1171 /* Use ru, and add lb */
1172 u = ru;
1173 if (lb)
1174 {
1175 lb->next = u->branches;
1176 u->branches = lb;
1177 u->n_branches++;
1178 u->tot_branches += lb->tot_branches;
1179 u->tot_last_matched += lb->tot_last_matched;
1180 u->tot_tags += lb->tot_tags;
1181 }
1182 }
1183 }
1184 else if (ru == NULL)
1185 {
1186 /* Use lu, and add rb */
1187 u = lu;
1188 if (rb)
1189 {
1190 rb->next = u->branches;
1191 u->branches = rb;
1192 u->n_branches++;
1193 u->tot_branches += rb->tot_branches;
1194 u->tot_last_matched += rb->tot_last_matched;
1195 u->tot_tags += rb->tot_tags;
1196 }
1197 }
1198 else
1199 {
1200 /* Merge lu and ru into lu */
1201 if (lu->branches)
1202 {
1203 if (ru->branches)
1204 {
1205 tre_last_matched_branch_pre_t *b = lu->branches;
1206 while (b->next) b = b->next;
1207 b->next = ru->branches;
1208 lu->n_branches += ru->n_branches;
1209 }
1210 }
1211 else if (ru->branches)
1212 {
1213 lu->branches = ru->branches;
1214 lu->n_branches = ru->n_branches;
1215 }
1216 lu->tot_branches += ru->tot_branches;
1217 lu->tot_last_matched += ru->tot_last_matched - 1;
1218 lu->tot_tags += ru->tot_tags;
1219 u = lu;
1220 }
1221 node->last_matched_in_progress = u;
1222 }
1223 direction = TRE_TAG_MAXIMIZE;
1224 break;
1225 }
1226
1227 case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1228 {
1229 tre_last_matched_branch_pre_t *b;
1230 tre_last_matched_pre_t *u;
1231 int start_tag;
1232
1233 DPRINT(("After union top\n"));
1234 node = tre_stack_pop_voidptr(stack);
1235 start_tag = tre_stack_pop_int(stack);
1236 b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1237 + bitstr_size(tnfa->num_tags));
1238 if (!b)
1239 {
1240 status = REG_ESPACE;
1241 break;
1242 }
1243
1244 u = node->last_matched_in_progress;
1245 u->start_tag = start_tag;
1246 b->tot_branches = 1 + u->tot_branches;
1247 b->tot_last_matched = u->tot_last_matched;
1248 b->tot_tags = u->tot_tags;
1249 b->last_matched = u;
1250 b->n_last_matched = 1;
1251 node->last_matched_branch = b;
1252 node->last_matched_in_progress = NULL;
1253 break;
1254 }
1255
1256 default:
1257 assert(0);
1258 break;
1259
1260 } /* end switch(symbol) */
1261 } /* end while(tre_stack_num_objects(stack) > bottom) */
1262
1263 if (status != REG_OK)
1264 {
1265 DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1266 goto error_post_compile;
1267 }
1268
1269 if (!first_pass)
1270 {
1271 int i;
1272 if (num_tags != tnfa->num_tags)
1273 {
1274 DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1275 tnfa->num_tags));
1276 status = REG_BADPAT;
1277 goto error_post_compile;
1278 }
1279
1280 tre_purge_regset(regset, tnfa, tag);
1281 DPRINT(("Setting t%d to %s\n", num_tags,
1282 tag_dir_str[direction]));
1283 tnfa->tag_directions[num_tags] = direction;
1284
1285 if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1286 {
1287 DPRINT(("Processed %d reorder tags instead of %d\n",
1288 (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1289 status = REG_BADPAT;
1290 goto error_post_compile;
1291 }
1292 *rtp = -1;
1293 #if TRE_DEBUG
1294 if (reorder_tags[0] >= 0)
1295 {
1296 DPRINT(("reorder_tags:\n"));
1297 for (rtp = reorder_tags; *rtp >= 0;)
1298 {
1299 DPRINT(("%d after ", *rtp++));
1300 DPRINT(("%d\n", *rtp++));
1301 }
1302 }
1303 else
1304 DPRINT(("No reorder_tags\n"));
1305 #endif /* TRE_DEBUG */
1306
1307 /* Initialize to_reorder */
1308 for (i = 0; i < num_tags; i++)
1309 to_reorder[i] = i;
1310 /* Use to_seq_order to convert reorder_tags values, and use those to
1311 * reorder to_reorder */
1312 for (rtp = reorder_tags; *rtp >= 0;)
1313 {
1314 int j, high, low;
1315 int ti = *rtp++;
1316
1317 /* Skip reordering the final tag */
1318 if (ti >= num_tags)
1319 {
1320 DPRINT(("Skipping reorder of %d\n", ti));
1321 rtp++;
1322 continue;
1323 }
1324 /* The number of the tag to reorder */
1325 high = to_reorder[ti];
1326 /* Reorder after this tag */
1327 low = to_reorder[*rtp++];
1328
1329 DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1330 if (low > high)
1331 {
1332 DPRINT(("Tag %d already before %d\n", high, low));
1333 continue;
1334 }
1335 for (j = 0; j < num_tags; j++)
1336 if (to_reorder[j] > low && to_reorder[j] < high)
1337 to_reorder[j]++;
1338 to_reorder[ti] = low + 1;
1339 #ifdef TRE_DEBUG
1340 DPRINT(("to_reorder=("));
1341 for (j = 0; j < num_tags; j++)
1342 {
1343 DPRINT(("%d", to_reorder[j]));
1344 if (j < num_tags - 1)
1345 DPRINT((","));
1346 }
1347 DPRINT((")\n"));
1348 #endif /* TRE_DEBUG */
1349 }
1350 /* Determine if reordering in really necessary */
1351 {
1352 int need_reorder = 0;
1353 for (i = 0; i < num_tags; i++)
1354 if(to_reorder[i] != i)
1355 {
1356 need_reorder = 1;
1357 break;
1358 }
1359 /* If need_reorder is not set, free reorder_tags, and set to NULL,
1360 * indicating no reordering is needed */
1361 if (!need_reorder)
1362 {
1363 DPRINT(("Don't need to reorder\n"));
1364 xfree(reorder_tags);
1365 reorder_tags = NULL;
1366 }
1367 }
1368 }
1369
1370 if (reorder_tags)
1371 {
1372 int i;
1373 tre_tag_direction_t *new_tag_directions;
1374 #if TRE_DEBUG
1375 DPRINT(("to_reorder:"));
1376 for (i = 0; i < num_tags; i++)
1377 DPRINT((" %d->%d", i, to_reorder[i]));
1378 DPRINT(("\n"));
1379 #endif /* TRE_DEBUG */
1380
1381 DPRINT(("Reordering submatch_data\n"));
1382 for (i = 0; i < (int)tnfa->num_submatches; i++)
1383 {
1384 #if TRE_DEBUG
1385 int so = tnfa->submatch_data[i].so_tag;
1386 int eo = tnfa->submatch_data[i].eo_tag;
1387 #endif /* TRE_DEBUG */
1388 tnfa->submatch_data[i].so_tag =
1389 to_reorder[tnfa->submatch_data[i].so_tag];
1390 tnfa->submatch_data[i].eo_tag =
1391 tnfa->submatch_data[i].eo_tag < num_tags ?
1392 to_reorder[tnfa->submatch_data[i].eo_tag] :
1393 tnfa->submatch_data[i].eo_tag;
1394 DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1395 tnfa->submatch_data[i].so_tag,
1396 tnfa->submatch_data[i].eo_tag));
1397 }
1398
1399 DPRINT(("Reordering tag_directions\n"));
1400 /* We only allocate num_tags directions and reorder them. The
1401 * num_tags-th direction (end tag) is left unchanged. */
1402 new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1403 if (new_tag_directions == NULL)
1404 {
1405 status = REG_ESPACE;
1406 goto error_post_compile;
1407 }
1408 for (i = 0; i < num_tags; i++)
1409 {
1410 new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1411 }
1412 #if TRE_DEBUG
1413 for (i = 0; i < num_tags; i++)
1414 {
1415 DPRINT(("t%d %s->%s\n", i,
1416 tag_dir_str[tnfa->tag_directions[i]],
1417 tag_dir_str[new_tag_directions[i]]));
1418 }
1419 DPRINT(("t%d %s->%s\n", num_tags,
1420 tag_dir_str[tnfa->tag_directions[num_tags]],
1421 tag_dir_str[tnfa->tag_directions[num_tags]]));
1422 #endif /* TRE_DEBUG */
1423 memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1424 xfree(new_tag_directions);
1425
1426 DPRINT(("Reordering minimal_tags\n"));
1427 for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1428 tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1429 to_reorder[tnfa->minimal_tags[i]] :
1430 tnfa->minimal_tags[i];
1431
1432 DPRINT(("Reordering AST tags\n"));
1433 STACK_PUSH(stack, voidptr, tree);
1434 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1435 {
1436 node = tre_stack_pop_voidptr(stack);
1437
1438 switch (node->type)
1439 {
1440 case LITERAL:
1441 {
1442 tre_literal_t *lit = (tre_literal_t *)node->obj;
1443 if (IS_TAG(lit))
1444 lit->code_max = to_reorder[lit->code_max];
1445 break;
1446 }
1447
1448 case UNION:
1449 {
1450 tre_union_t *uni = (tre_union_t *)node->obj;
1451 STACK_PUSHX(stack, voidptr, uni->right);
1452 STACK_PUSHX(stack, voidptr, uni->left);
1453 break;
1454 }
1455
1456 case CATENATION:
1457 {
1458 tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1459 STACK_PUSHX(stack, voidptr, cat->right);
1460 STACK_PUSHX(stack, voidptr, cat->left);
1461 break;
1462 }
1463
1464 case ITERATION:
1465 {
1466 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1467 STACK_PUSHX(stack, voidptr, iter->arg);
1468 break;
1469 }
1470
1471 default:
1472 assert(0);
1473 break;
1474 }
1475 }
1476 if (status != REG_OK)
1477 {
1478 DPRINT(("Error while reordering tags\n"));
1479 goto error_post_compile;
1480 }
1481 }
1482
1483
1484 if (!first_pass)
1485 {
1486 if (tree->last_matched_branch)
1487 {
1488 tre_last_matched_branch_t *buf, *b, *bb;
1489 tre_last_matched_branch_pre_t *bp;
1490 tre_last_matched_t *u, *uu;
1491 tre_last_matched_pre_t *up;
1492 int *t;
1493 int i;
1494 #ifdef TRE_DEBUG
1495 tre_last_matched_branch_t *_b;
1496 tre_last_matched_t *_u;
1497 int *_t;
1498
1499 DPRINT(("last_match_branch_pre:\n"));
1500 print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1501 #endif /* TRE_DEBUG */
1502 buf = (tre_last_matched_branch_t *)xcalloc(1,
1503 tree->last_matched_branch->tot_branches
1504 * sizeof(tre_last_matched_branch_t) +
1505 tree->last_matched_branch->tot_last_matched
1506 * sizeof(tre_last_matched_t) +
1507 tree->last_matched_branch->tot_tags *
1508 sizeof(int));
1509 if (!buf)
1510 {
1511 status = REG_ESPACE;
1512 goto error_post_compile;
1513 }
1514
1515 b = buf;
1516 u = (tre_last_matched_t *)(b +
1517 tree->last_matched_branch->tot_branches);
1518 t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1519 #ifdef TRE_DEBUG
1520 _b = b;
1521 _u = u;
1522 _t = t;
1523 #endif /* TRE_DEBUG */
1524 DPRINT(("Copying info_pre to info\n"));
1525 STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1526 STACK_PUSH(stack, int, 1);
1527 STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1528
1529 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1530 {
1531 switch (tre_stack_pop_int(stack))
1532 {
1533 case COPY_LAST_MATCHED_BRANCH:
1534 i = tre_stack_pop_int(stack);
1535 /* The tre_last_matched_branch_pre_t * is still on the
1536 stack */
1537 STACK_PUSHX(stack, voidptr, b);
1538 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1539 b += i;
1540 break;
1541
1542 case COPY_LAST_MATCHED_BRANCH_NEXT:
1543 bb = tre_stack_pop_voidptr(stack);
1544 bp = tre_stack_pop_voidptr(stack);
1545 bb->n_last_matched = bp->n_last_matched;
1546 bb->cmp_tag = bp->cmp_tag;
1547 if (bp->n_tags > 0)
1548 {
1549 int n;
1550 n = bb->n_tags = bp->n_tags;
1551 bb->tags = t;
1552 for (i = 0; i < num_tags; i++)
1553 if (bit_test(bp->tags, i))
1554 {
1555 *t++ = i;
1556 if (--n <= 0)
1557 break;
1558 }
1559 }
1560 if (bp->next)
1561 {
1562 STACK_PUSHX(stack, voidptr, bp->next);
1563 STACK_PUSHX(stack, voidptr, bb + 1);
1564 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1565 }
1566 if (bp->n_last_matched > 0)
1567 {
1568 bb->last_matched = u;
1569 STACK_PUSHX(stack, voidptr, bp->last_matched);
1570 STACK_PUSHX(stack, int, bp->n_last_matched);
1571 STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1572 }
1573 break;
1574
1575 case COPY_LAST_MATCHED:
1576 i = tre_stack_pop_int(stack);
1577 /* The tre_last_matched_pre_t * is still on the stack */
1578 STACK_PUSHX(stack, voidptr, u);
1579 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1580 u += i;
1581 break;
1582
1583 case COPY_LAST_MATCHED_NEXT:
1584 uu = tre_stack_pop_voidptr(stack);
1585 up = tre_stack_pop_voidptr(stack);
1586 uu->n_branches = up->n_branches;
1587 uu->branches = b;
1588 uu->start_tag = up->start_tag;
1589 if (up->next)
1590 {
1591 STACK_PUSHX(stack, voidptr, up->next);
1592 STACK_PUSHX(stack, voidptr, uu + 1);
1593 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1594 }
1595 STACK_PUSHX(stack, voidptr, up->branches);
1596 STACK_PUSHX(stack, int, up->n_branches);
1597 STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1598 break;
1599 }
1600 }
1601 if (status != REG_OK)
1602 goto error_post_compile;
1603 #ifdef TRE_DEBUG
1604 DPRINT(("last_matched_branch:\n"));
1605 print_last_match_branch(buf, 0);
1606 if (b != _b + tree->last_matched_branch->tot_branches)
1607 DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1608 b, _b + tree->last_matched_branch->tot_branches));
1609 if (u != _u + tree->last_matched_branch->tot_last_matched)
1610 DPRINT(("u/%p != _u + "
1611 "tree->last_matched_branch->tot_last_matched/%p\n",
1612 u, _u + tree->last_matched_branch->tot_last_matched));
1613 if (t != _t + tree->last_matched_branch->tot_tags)
1614 DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1615 t, _t + tree->last_matched_branch->tot_tags));
1616 #endif /* TRE_DEBUG */
1617 tnfa->last_matched_branch = buf;
1618 }
1619 #ifdef TRE_DEBUG
1620 else
1621 DPRINT(("No last_match_branch_pre\n"));
1622 #endif /* TRE_DEBUG */
1623 }
1624
1625 DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
1626 first_pass? "First pass" : "Second pass", num_tags));
1627 #ifdef TRE_DEBUG
1628 tre_ast_print(tree);
1629 #endif /* TRE_DEBUG */
1630 DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1631 num_tags));
1632 assert(tree->num_tags == num_tags);
1633 tnfa->end_tag = num_tags;
1634 tnfa->num_tags = num_tags;
1635 tnfa->num_minimals = num_minimals;
1636 error_post_compile:
1637 xfree(reorder_tags);
1638 error_reorder_tags:
1639 xfree(orig_regset);
1640 error_regset:
1641 return status;
1642 }
1643
1644
1645
1646 /*
1647 AST to TNFA compilation routines.
1648 */
1649
1650 typedef enum {
1651 COPY_RECURSE,
1652 COPY_SET_RESULT_PTR
1653 } tre_copyast_symbol_t;
1654
1655 /* Flags for tre_copy_ast(). */
1656 #define COPY_REMOVE_TAGS 1
1657 #define COPY_MAXIMIZE_FIRST_TAG 2
1658
1659 static reg_errcode_t
1660 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1661 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1662 tre_ast_node_t **copy, int *max_pos)
1663 {
1664 reg_errcode_t status = REG_OK;
1665 int bottom = tre_stack_num_objects(stack);
1666 int num_copied = 0;
1667 int first_tag = 1;
1668 tre_ast_node_t **result = copy;
1669 tre_copyast_symbol_t symbol;
1670
1671 STACK_PUSH(stack, voidptr, ast);
1672 STACK_PUSH(stack, int, COPY_RECURSE);
1673
1674 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1675 {
1676 tre_ast_node_t *node;
1677 if (status != REG_OK)
1678 break;
1679
1680 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1681 switch (symbol)
1682 {
1683 case COPY_SET_RESULT_PTR:
1684 result = tre_stack_pop_voidptr(stack);
1685 break;
1686 case COPY_RECURSE:
1687 node = tre_stack_pop_voidptr(stack);
1688 switch (node->type)
1689 {
1690 case LITERAL:
1691 {
1692 tre_literal_t *lit = node->obj;
1693 int pos = lit->position;
1694 int min = lit->code_min;
1695 int max = lit->code_max;
1696 tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1697 lit->u.bracket_match_list :
1698 NULL;
1699 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1700 {
1701 /* XXX - e.g. [ab] has only one position but two
1702 nodes, so we are creating holes in the state space
1703 here. Not fatal, just wastes memory. */
1704 pos += *pos_add;
1705 num_copied++;
1706 }
1707 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1708 {
1709 /* Change this tag to empty. */
1710 min = EMPTY;
1711 max = pos = -1;
1712 }
1713 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1714 && first_tag)
1715 {
1716 /* Maximize the first tag. */
1717 if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
1718 tag_directions[max] = TRE_TAG_MAXIMIZE;
1719 first_tag = 0;
1720 }
1721 *result = tre_ast_new_literal(mem, min, max, pos);
1722 if (*result == NULL)
1723 status = REG_ESPACE;
1724
1725 if (pos > *max_pos)
1726 *max_pos = pos;
1727
1728 if (!IS_SPECIAL(lit))
1729 ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1730 = list;
1731 break;
1732 }
1733 case UNION:
1734 {
1735 tre_union_t *uni = node->obj;
1736 tre_union_t *tmp;
1737 *result = tre_ast_new_union(mem, uni->left, uni->right);
1738 if (*result == NULL)
1739 {
1740 status = REG_ESPACE;
1741 break;
1742 }
1743 tmp = (*result)->obj;
1744 result = &tmp->left;
1745 STACK_PUSHX(stack, voidptr, uni->right);
1746 STACK_PUSHX(stack, int, COPY_RECURSE);
1747 STACK_PUSHX(stack, voidptr, &tmp->right);
1748 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1749 STACK_PUSHX(stack, voidptr, uni->left);
1750 STACK_PUSHX(stack, int, COPY_RECURSE);
1751 break;
1752 }
1753 case CATENATION:
1754 {
1755 tre_catenation_t *cat = node->obj;
1756 tre_catenation_t *tmp;
1757 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1758 if (*result == NULL)
1759 {
1760 status = REG_ESPACE;
1761 break;
1762 }
1763 tmp = (*result)->obj;
1764 tmp->left = NULL;
1765 tmp->right = NULL;
1766 result = &tmp->left;
1767
1768 STACK_PUSHX(stack, voidptr, cat->right);
1769 STACK_PUSHX(stack, int, COPY_RECURSE);
1770 STACK_PUSHX(stack, voidptr, &tmp->right);
1771 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1772 STACK_PUSHX(stack, voidptr, cat->left);
1773 STACK_PUSHX(stack, int, COPY_RECURSE);
1774 break;
1775 }
1776 case ITERATION:
1777 {
1778 tre_iteration_t *iter = node->obj;
1779 STACK_PUSHX(stack, voidptr, iter->arg);
1780 STACK_PUSHX(stack, int, COPY_RECURSE);
1781 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1782 iter->max, iter->minimal);
1783 if (*result == NULL)
1784 {
1785 status = REG_ESPACE;
1786 break;
1787 }
1788 iter = (*result)->obj;
1789 result = &iter->arg;
1790 break;
1791 }
1792 default:
1793 assert(0);
1794 break;
1795 }
1796 break;
1797 }
1798 }
1799 *pos_add += num_copied;
1800 return status;
1801 }
1802
1803 typedef enum {
1804 EXPAND_RECURSE,
1805 EXPAND_AFTER_ITER
1806 } tre_expand_ast_symbol_t;
1807
1808 /* Expands each iteration node that has a finite nonzero minimum or maximum
1809 iteration count to a catenated sequence of copies of the node. */
1810 static reg_errcode_t
1811 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1812 int *position, tre_tag_direction_t *tag_directions,
1813 int __unused *max_depth)
1814 {
1815 reg_errcode_t status = REG_OK;
1816 int bottom = tre_stack_num_objects(stack);
1817 int pos_add = 0;
1818 int pos_add_total = 0;
1819 int max_pos = 0;
1820 #ifdef TRE_APPROX
1821 /* Current approximate matching parameters. */
1822 int params[TRE_PARAM_LAST];
1823 /* Approximate parameter nesting level. */
1824 int params_depth = 0;
1825 #endif /* TRE_APPROX */
1826 int iter_depth = 0;
1827 #ifdef TRE_APPROX
1828 int i;
1829 #endif /* TRE_APPROX */
1830
1831 #ifdef TRE_APPROX
1832 for (i = 0; i < TRE_PARAM_LAST; i++)
1833 params[i] = TRE_PARAM_DEFAULT;
1834 #endif /* TRE_APPROX */
1835
1836 STACK_PUSHR(stack, voidptr, ast);
1837 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1838 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1839 {
1840 tre_ast_node_t *node;
1841 tre_expand_ast_symbol_t symbol;
1842
1843 if (status != REG_OK)
1844 break;
1845
1846 DPRINT(("pos_add %d\n", pos_add));
1847
1848 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1849 node = tre_stack_pop_voidptr(stack);
1850 switch (symbol)
1851 {
1852 case EXPAND_RECURSE:
1853 switch (node->type)
1854 {
1855 case LITERAL:
1856 {
1857 tre_literal_t *lit= node->obj;
1858 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1859 {
1860 lit->position += pos_add;
1861 if (lit->position > max_pos)
1862 max_pos = lit->position;
1863 }
1864 break;
1865 }
1866 case UNION:
1867 {
1868 tre_union_t *uni = node->obj;
1869 STACK_PUSHX(stack, voidptr, uni->right);
1870 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1871 STACK_PUSHX(stack, voidptr, uni->left);
1872 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1873 break;
1874 }
1875 case CATENATION:
1876 {
1877 tre_catenation_t *cat = node->obj;
1878 STACK_PUSHX(stack, voidptr, cat->right);
1879 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1880 STACK_PUSHX(stack, voidptr, cat->left);
1881 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1882 break;
1883 }
1884 case ITERATION:
1885 {
1886 tre_iteration_t *iter = node->obj;
1887 STACK_PUSHX(stack, int, pos_add);
1888 STACK_PUSHX(stack, voidptr, node);
1889 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1890 STACK_PUSHX(stack, voidptr, iter->arg);
1891 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1892 /* If we are going to expand this node at EXPAND_AFTER_ITER
1893 then don't increase the `pos' fields of the nodes now, it
1894 will get done when expanding. */
1895 if (iter->min > 1 || iter->max > 1)
1896 pos_add = 0;
1897 iter_depth++;
1898 DPRINT(("iter\n"));
1899 break;
1900 }
1901 default:
1902 assert(0);
1903 break;
1904 }
1905 break;
1906 case EXPAND_AFTER_ITER:
1907 {
1908 tre_iteration_t *iter = node->obj;
1909 int pos_add_last;
1910 pos_add = tre_stack_pop_int(stack);
1911 pos_add_last = pos_add;
1912 /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1913 immediate replace the whole iteration with EMPTY. This
1914 unfortunately drops any submatches, and messes up setting the
1915 pmatch values (we can get tags of -1, and tag values in the
1916 billions). So we left it there and replace with EMPTY here. */
1917 if (iter->min == 0 && iter->max == 0)
1918 {
1919 tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1920 if (empty == NULL)
1921 return REG_ESPACE;
1922 node->obj = empty->obj;
1923 node->type = empty->type;
1924 }
1925 else if (iter->min > 1 || iter->max > 1)
1926 {
1927 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1928 int j;
1929 int pos_add_save = pos_add;
1930
1931 /* Create a catenated sequence of copies of the node. */
1932 for (j = 0; j < iter->min; j++)
1933 {
1934 tre_ast_node_t *copy;
1935 /* Remove tags from all but the last copy. */
1936 int flags = ((j + 1 < iter->min)
1937 ? COPY_REMOVE_TAGS
1938 : COPY_MAXIMIZE_FIRST_TAG);
1939 DPRINT((" pos_add %d\n", pos_add));
1940 pos_add_save = pos_add;
1941 status = tre_copy_ast(mem, stack, iter->arg, flags,
1942 &pos_add, tag_directions, &copy,
1943 &max_pos);
1944 if (status != REG_OK)
1945 return status;
1946 if (seq1 != NULL)
1947 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1948 else
1949 seq1 = copy;
1950 if (seq1 == NULL)
1951 return REG_ESPACE;
1952 }
1953
1954 if (iter->max == -1)
1955 {
1956 /* No upper limit. */
1957 pos_add_save = pos_add;
1958 status = tre_copy_ast(mem, stack, iter->arg, 0,
1959 &pos_add, NULL, &seq2, &max_pos);
1960 if (status != REG_OK)
1961 return status;
1962 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1963 if (seq2 == NULL)
1964 return REG_ESPACE;
1965 }
1966 else
1967 {
1968 for (j = iter->min; j < iter->max; j++)
1969 {
1970 tre_ast_node_t *tmp, *copy;
1971 pos_add_save = pos_add;
1972 status = tre_copy_ast(mem, stack, iter->arg, 0,
1973 &pos_add, NULL, &copy, &max_pos);
1974 if (status != REG_OK)
1975 return status;
1976 if (seq2 != NULL)
1977 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1978 else
1979 seq2 = copy;
1980 if (seq2 == NULL)
1981 return REG_ESPACE;
1982 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1983 if (tmp == NULL)
1984 return REG_ESPACE;
1985 seq2 = tre_ast_new_union(mem, tmp, seq2);
1986 if (seq2 == NULL)
1987 return REG_ESPACE;
1988 }
1989 }
1990
1991 pos_add = pos_add_save;
1992 if (seq1 == NULL)
1993 seq1 = seq2;
1994 else if (seq2 != NULL)
1995 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1996 if (seq1 == NULL)
1997 return REG_ESPACE;
1998 node->obj = seq1->obj;
1999 node->type = seq1->type;
2000 }
2001
2002 iter_depth--;
2003 pos_add_total += pos_add - pos_add_last;
2004 if (iter_depth == 0)
2005 pos_add = pos_add_total;
2006
2007 #ifdef TRE_APPROX
2008 /* If approximate parameters are specified, surround the result
2009 with two parameter setting nodes. The one on the left sets
2010 the specified parameters, and the one on the right restores
2011 the old parameters. */
2012 if (iter->params)
2013 {
2014 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
2015 int *old_params;
2016
2017 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2018 if (!tmp_l)
2019 return REG_ESPACE;
2020 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
2021 iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
2022 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2023 if (!tmp_r)
2024 return REG_ESPACE;
2025 old_params = tre_mem_alloc(mem, sizeof(*old_params)
2026 * TRE_PARAM_LAST);
2027 if (!old_params)
2028 return REG_ESPACE;
2029 for (i = 0; i < TRE_PARAM_LAST; i++)
2030 old_params[i] = params[i];
2031 ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
2032 old_params[TRE_PARAM_DEPTH] = params_depth;
2033 /* XXX - this is the only place where ast_new_node is
2034 needed -- should be moved inside AST module. */
2035 node_copy = tre_ast_new_node(mem, ITERATION,
2036 sizeof(tre_iteration_t));
2037 if (!node_copy)
2038 return REG_ESPACE;
2039 node_copy->obj = node->obj;
2040 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
2041 if (!tmp_node)
2042 return REG_ESPACE;
2043 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
2044 if (!tmp_node)
2045 return REG_ESPACE;
2046 /* Replace the contents of `node' with `tmp_node'. */
2047 memcpy(node, tmp_node, sizeof(*node));
2048 node->obj = tmp_node->obj;
2049 node->type = tmp_node->type;
2050 params_depth++;
2051 if (params_depth > *max_depth)
2052 *max_depth = params_depth;
2053 }
2054 #endif /* TRE_APPROX */
2055 break;
2056 }
2057 default:
2058 assert(0);
2059 break;
2060 }
2061 }
2062
2063 *position += pos_add_total;
2064
2065 /* `max_pos' should never be larger than `*position' if the above
2066 code works, but just an extra safeguard let's make sure
2067 `*position' is set large enough so enough memory will be
2068 allocated for the transition table. */
2069 if (max_pos > *position)
2070 *position = max_pos;
2071
2072 #ifdef TRE_DEBUG
2073 DPRINT(("Expanded AST:\n"));
2074 tre_ast_print(ast);
2075 DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
2076 #endif
2077
2078 return status;
2079 }
2080
2081 static tre_pos_and_tags_t *
2082 tre_set_empty(tre_mem_t mem)
2083 {
2084 tre_pos_and_tags_t *new_set;
2085
2086 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2087 if (new_set == NULL)
2088 return NULL;
2089
2090 new_set[0].position = -1;
2091 new_set[0].code_min = -1;
2092 new_set[0].code_max = -1;
2093
2094 return new_set;
2095 }
2096
2097 static tre_pos_and_tags_t *
2098 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2099 tre_bracket_match_list_t *bracket_match_list, int backref)
2100 {
2101 tre_pos_and_tags_t *new_set;
2102
2103 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2104 if (new_set == NULL)
2105 return NULL;
2106
2107 new_set[0].position = position;
2108 new_set[0].code_min = code_min;
2109 new_set[0].code_max = code_max;
2110 new_set[0].bracket_match_list = bracket_match_list;
2111 new_set[0].backref = backref;
2112 new_set[1].position = -1;
2113 new_set[1].code_min = -1;
2114 new_set[1].code_max = -1;
2115
2116 return new_set;
2117 }
2118
2119 static tre_pos_and_tags_t *
2120 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2121 int *tags, int assertions, int *params)
2122 {
2123 int s1, s2, i, j;
2124 tre_pos_and_tags_t *new_set;
2125 int *new_tags;
2126 int num_tags;
2127
2128 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2129 for (s1 = 0; set1[s1].position >= 0; s1++);
2130 for (s2 = 0; set2[s2].position >= 0; s2++);
2131 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2132 if (!new_set )
2133 return NULL;
2134
2135 for (s1 = 0; set1[s1].position >= 0; s1++)
2136 {
2137 new_set[s1].position = set1[s1].position;
2138 new_set[s1].code_min = set1[s1].code_min;
2139 new_set[s1].code_max = set1[s1].code_max;
2140 new_set[s1].assertions = set1[s1].assertions | assertions;
2141 new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
2142 new_set[s1].backref = set1[s1].backref;
2143 if (set1[s1].tags == NULL && tags == NULL)
2144 new_set[s1].tags = NULL;
2145 else
2146 {
2147 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2148 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2149 * (i + num_tags + 1)));
2150 if (new_tags == NULL)
2151 return NULL;
2152 for (j = 0; j < i; j++)
2153 new_tags[j] = set1[s1].tags[j];
2154 for (i = 0; i < num_tags; i++)
2155 new_tags[j + i] = tags[i];
2156 new_tags[j + i] = -1;
2157 new_set[s1].tags = new_tags;
2158 }
2159 if (set1[s1].params)
2160 new_set[s1].params = set1[s1].params;
2161 if (params)
2162 {
2163 if (!new_set[s1].params)
2164 new_set[s1].params = params;
2165 else
2166 {
2167 new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
2168 TRE_PARAM_LAST);
2169 if (!new_set[s1].params)
2170 return NULL;
2171 for (i = 0; i < TRE_PARAM_LAST; i++)
2172 if (params[i] != TRE_PARAM_UNSET)
2173 new_set[s1].params[i] = params[i];
2174 }
2175 }
2176 }
2177
2178 for (s2 = 0; set2[s2].position >= 0; s2++)
2179 {
2180 new_set[s1 + s2].position = set2[s2].position;
2181 new_set[s1 + s2].code_min = set2[s2].code_min;
2182 new_set[s1 + s2].code_max = set2[s2].code_max;
2183 /* XXX - why not | assertions here as well? */
2184 new_set[s1 + s2].assertions = set2[s2].assertions;
2185 new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
2186 new_set[s1 + s2].backref = set2[s2].backref;
2187 if (set2[s2].tags == NULL)
2188 new_set[s1 + s2].tags = NULL;
2189 else
2190 {
2191 for (i = 0; set2[s2].tags[i] >= 0; i++);
2192 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2193 if (new_tags == NULL)
2194 return NULL;
2195 for (j = 0; j < i; j++)
2196 new_tags[j] = set2[s2].tags[j];
2197 new_tags[j] = -1;
2198 new_set[s1 + s2].tags = new_tags;
2199 }
2200 if (set2[s2].params)
2201 new_set[s1 + s2].params = set2[s2].params;
2202 if (params)
2203 {
2204 if (!new_set[s1 + s2].params)
2205 new_set[s1 + s2].params = params;
2206 else
2207 {
2208 new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
2209 TRE_PARAM_LAST);
2210 if (!new_set[s1 + s2].params)
2211 return NULL;
2212 for (i = 0; i < TRE_PARAM_LAST; i++)
2213 if (params[i] != TRE_PARAM_UNSET)
2214 new_set[s1 + s2].params[i] = params[i];
2215 }
2216 }
2217 }
2218 new_set[s1 + s2].position = -1;
2219 return new_set;
2220 }
2221
2222 /* Finds the empty path through `node' which is the one that should be
2223 taken according to POSIX.2 rules, and adds the tags on that path to
2224 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2225 set to the number of tags seen on the path. */
2226 static reg_errcode_t
2227 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2228 int *assertions, int *params, int *num_tags_seen,
2229 int *params_seen)
2230 {
2231 tre_literal_t *lit;
2232 tre_union_t *uni;
2233 tre_catenation_t *cat;
2234 tre_iteration_t *iter;
2235 int i;
2236 int bottom = tre_stack_num_objects(stack);
2237 reg_errcode_t status = REG_OK;
2238 if (num_tags_seen)
2239 *num_tags_seen = 0;
2240 if (params_seen)
2241 *params_seen = 0;
2242
2243 status = tre_stack_push_voidptr(stack, node);
2244
2245 /* Walk through the tree recursively. */
2246 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2247 {
2248 node = tre_stack_pop_voidptr(stack);
2249
2250 switch (node->type)
2251 {
2252 case LITERAL:
2253 lit = (tre_literal_t *)node->obj;
2254 switch (lit->code_min)
2255 {
2256 case TAG:
2257 if (lit->code_max >= 0)
2258 {
2259 if (tags != NULL)
2260 {
2261 /* Add the tag to `tags'. */
2262 for (i = 0; tags[i] >= 0; i++)
2263 if (tags[i] == lit->code_max)
2264 break;
2265 if (tags[i] < 0)
2266 {
2267 tags[i] = lit->code_max;
2268 tags[i + 1] = -1;
2269 }
2270 }
2271 if (num_tags_seen)
2272 (*num_tags_seen)++;
2273 }
2274 break;
2275 case ASSERTION:
2276 assert(lit->code_max >= 1
2277 || lit->code_max <= ASSERT_LAST);
2278 if (assertions != NULL)
2279 *assertions |= lit->code_max;
2280 break;
2281 case PARAMETER:
2282 if (params != NULL)
2283 for (i = 0; i < TRE_PARAM_LAST; i++)
2284 params[i] = lit->u.params[i];
2285 if (params_seen != NULL)
2286 *params_seen = 1;
2287 break;
2288 case EMPTY:
2289 break;
2290 default:
2291 assert(0);
2292 break;
2293 }
2294 break;
2295
2296 case UNION:
2297 /* Subexpressions starting earlier take priority over ones
2298 starting later, so we prefer the left subexpression over the
2299 right subexpression. */
2300 uni = (tre_union_t *)node->obj;
2301 if (uni->left->nullable)
2302 STACK_PUSHX(stack, voidptr, uni->left)
2303 else if (uni->right->nullable)
2304 STACK_PUSHX(stack, voidptr, uni->right)
2305 else
2306 assert(0);
2307 break;
2308
2309 case CATENATION:
2310 /* The path must go through both children. */
2311 cat = (tre_catenation_t *)node->obj;
2312 assert(cat->left->nullable);
2313 assert(cat->right->nullable);
2314 STACK_PUSHX(stack, voidptr, cat->left);
2315 STACK_PUSHX(stack, voidptr, cat->right);
2316 break;
2317
2318 case ITERATION:
2319 /* A match with an empty string is preferred over no match at
2320 all, so we go through the argument if possible. */
2321 iter = (tre_iteration_t *)node->obj;
2322 if (iter->arg->nullable)
2323 STACK_PUSHX(stack, voidptr, iter->arg);
2324 break;
2325
2326 default:
2327 assert(0);
2328 break;
2329 }
2330 }
2331
2332 return status;
2333 }
2334
2335
2336 typedef enum {
2337 NFL_RECURSE,
2338 NFL_POST_UNION,
2339 NFL_POST_CATENATION,
2340 NFL_POST_ITERATION
2341 } tre_nfl_stack_symbol_t;
2342
2343
2344 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2345 the nodes of the AST `tree'. */
2346 static reg_errcode_t
2347 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2348 {
2349 int bottom = tre_stack_num_objects(stack);
2350
2351 STACK_PUSHR(stack, voidptr, tree);
2352 STACK_PUSHR(stack, int, NFL_RECURSE);
2353
2354 while (tre_stack_num_objects(stack) > bottom)
2355 {
2356 tre_nfl_stack_symbol_t symbol;
2357 tre_ast_node_t *node;
2358
2359 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2360 node = tre_stack_pop_voidptr(stack);
2361 switch (symbol)
2362 {
2363 case NFL_RECURSE:
2364 switch (node->type)
2365 {
2366 case LITERAL:
2367 {
2368 tre_literal_t *lit = (tre_literal_t *)node->obj;
2369 if (IS_BACKREF(lit))
2370 {
2371 /* Back references: nullable = false, firstpos = {i},
2372 lastpos = {i}. */
2373 node->nullable = 0;
2374 node->firstpos = tre_set_one(mem, lit->position, 0,
2375 TRE_CHAR_MAX, NULL, -1);
2376 if (!node->firstpos)
2377 return REG_ESPACE;
2378 node->lastpos = tre_set_one(mem, lit->position, 0,
2379 TRE_CHAR_MAX, NULL,
2380 (int)lit->code_max);
2381 if (!node->lastpos)
2382 return REG_ESPACE;
2383 }
2384 else if (lit->code_min < 0)
2385 {
2386 /* Tags, empty strings, params, and zero width assertions:
2387 nullable = true, firstpos = {}, and lastpos = {}. */
2388 node->nullable = 1;
2389 node->firstpos = tre_set_empty(mem);
2390 if (!node->firstpos)
2391 return REG_ESPACE;
2392 node->lastpos = tre_set_empty(mem);
2393 if (!node->lastpos)
2394 return REG_ESPACE;
2395 }
2396 else
2397 {
2398 /* Literal at position i: nullable = false, firstpos = {i},
2399 lastpos = {i}. */
2400 node->nullable = 0;
2401 node->firstpos =
2402 tre_set_one(mem, lit->position, (int)lit->code_min,
2403 (int)lit->code_max, NULL, -1);
2404 if (!node->firstpos)
2405 return REG_ESPACE;
2406 node->lastpos = tre_set_one(mem, lit->position,
2407 (int)lit->code_min,
2408 (int)lit->code_max,
2409 lit->u.bracket_match_list,
2410 -1);
2411 if (!node->lastpos)
2412 return REG_ESPACE;
2413 }
2414 break;
2415 }
2416
2417 case UNION:
2418 /* Compute the attributes for the two subtrees, and after that
2419 for this node. */
2420 STACK_PUSHR(stack, voidptr, node);
2421 STACK_PUSHR(stack, int, NFL_POST_UNION);
2422 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2423 STACK_PUSHR(stack, int, NFL_RECURSE);
2424 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2425 STACK_PUSHR(stack, int, NFL_RECURSE);
2426 break;
2427
2428 case CATENATION:
2429 /* Compute the attributes for the two subtrees, and after that
2430 for this node. */
2431 STACK_PUSHR(stack, voidptr, node);
2432 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2433 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2434 STACK_PUSHR(stack, int, NFL_RECURSE);
2435 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2436 STACK_PUSHR(stack, int, NFL_RECURSE);
2437 break;
2438
2439 case ITERATION:
2440 /* Compute the attributes for the subtree, and after that for
2441 this node. */
2442 STACK_PUSHR(stack, voidptr, node);
2443 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2444 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2445 STACK_PUSHR(stack, int, NFL_RECURSE);
2446 break;
2447 }
2448 break; /* end case: NFL_RECURSE */
2449
2450 case NFL_POST_UNION:
2451 {
2452 tre_union_t *uni = (tre_union_t *)node->obj;
2453 node->nullable = uni->left->nullable || uni->right->nullable;
2454 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2455 uni->right->firstpos, NULL, 0, NULL);
2456 if (!node->firstpos)
2457 return REG_ESPACE;
2458 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2459 uni->right->lastpos, NULL, 0, NULL);
2460 if (!node->lastpos)
2461 return REG_ESPACE;
2462 break;
2463 }
2464
2465 case NFL_POST_ITERATION:
2466 {
2467 int num_tags, *tags, assertions, params_seen;
2468 int *params;
2469 reg_errcode_t status;
2470 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2471
2472 /* From Ville Laurikari's original 2001 Master's thesis, the
2473 firstpos(n) and lastpos(n) of an iteration is just the
2474 corresponding values of the iteration's argument. Unfortunately,
2475 this isn't sufficient for the following BRE:
2476
2477 \(a*\)*b\(\1\) matched against ab
2478
2479 The backreference wants to force the first subexpression to
2480 be the empty string, but there is no transition for this. So
2481 we need to modify the lastpos(n) of an iteration to be the
2482 equivalent of that of catentation. Using the same notation as
2483 in the thesis, lastpos(n) is redefined as:
2484
2485 if nullable(c1) then
2486 lastpos(c1) U
2487 addtags(lastpos(c1),
2488 emptymatch(c1))
2489 else
2490 lastpos(c1)
2491
2492 where c1 is the argument node. firstpos(n) remains the same. */
2493
2494 /* Compute lastpos. */
2495 if (iter->min == 0 || iter->arg->nullable)
2496 {
2497 node->nullable = 1;
2498 if (iter->arg->nullable)
2499 {
2500 /* The arg matches the empty string. Make a first pass
2501 with tre_match_empty() to get the number of tags and
2502 parameters. */
2503 status = tre_match_empty(stack, iter->arg,
2504 NULL, NULL, NULL, &num_tags,
2505 &params_seen);
2506 if (status != REG_OK)
2507 return status;
2508 /* Allocate arrays for the tags and parameters. */
2509 tags = xmalloc(sizeof(int) * (num_tags + 1));
2510 if (!tags)
2511 return REG_ESPACE;
2512 tags[0] = -1;
2513 assertions = 0;
2514 params = NULL;
2515 if (params_seen)
2516 {
2517 params = tre_mem_alloc(mem, sizeof(*params)
2518 * TRE_PARAM_LAST);
2519 if (!params)
2520 {
2521 xfree(tags);
2522 return REG_ESPACE;
2523 }
2524 }
2525 /* Second pass with tre_mach_empty() to get the list of
2526 tags and parameters. */
2527 status = tre_match_empty(stack, iter->arg, tags,
2528 &assertions, params, NULL, NULL);
2529 if (status != REG_OK)
2530 {
2531 xfree(tags);
2532 return status;
2533 }
2534 node->lastpos =
2535 tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2536 tags, assertions, params);
2537 xfree(tags);
2538 if (!node->lastpos)
2539 return REG_ESPACE;
2540 }
2541 else
2542 node->lastpos = iter->arg->lastpos;
2543 }
2544 else
2545 {
2546 node->nullable = 0;
2547 node->lastpos = iter->arg->lastpos;
2548 }
2549 node->firstpos = iter->arg->firstpos;
2550 break;
2551 }
2552
2553 case NFL_POST_CATENATION:
2554 {
2555 int num_tags, *tags, assertions, params_seen;
2556 int *params;
2557 reg_errcode_t status;
2558 tre_catenation_t *cat = node->obj;
2559 node->nullable = cat->left->nullable && cat->right->nullable;
2560
2561 /* Compute firstpos. */
2562 if (cat->left->nullable)
2563 {
2564 /* The left side matches the empty string. Make a first pass
2565 with tre_match_empty() to get the number of tags and
2566 parameters. */
2567 status = tre_match_empty(stack, cat->left,
2568 NULL, NULL, NULL, &num_tags,
2569 &params_seen);
2570 if (status != REG_OK)
2571 return status;
2572 /* Allocate arrays for the tags and parameters. */
2573 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2574 if (!tags)
2575 return REG_ESPACE;
2576 tags[0] = -1;
2577 assertions = 0;
2578 params = NULL;
2579 if (params_seen)
2580 {
2581 params = tre_mem_alloc(mem, sizeof(*params)
2582 * TRE_PARAM_LAST);
2583 if (!params)
2584 {
2585 xfree(tags);
2586 return REG_ESPACE;
2587 }
2588 }
2589 /* Second pass with tre_mach_empty() to get the list of
2590 tags and parameters. */
2591 status = tre_match_empty(stack, cat->left, tags,
2592 &assertions, params, NULL, NULL);
2593 if (status != REG_OK)
2594 {
2595 xfree(tags);
2596 return status;
2597 }
2598 node->firstpos =
2599 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2600 tags, assertions, params);
2601 xfree(tags);
2602 if (!node->firstpos)
2603 return REG_ESPACE;
2604 }
2605 else
2606 {
2607 node->firstpos = cat->left->firstpos;
2608 }
2609
2610 /* Compute lastpos. */
2611 if (cat->right->nullable)
2612 {
2613 /* The right side matches the empty string. Make a first pass
2614 with tre_match_empty() to get the number of tags and
2615 parameters. */
2616 status = tre_match_empty(stack, cat->right,
2617 NULL, NULL, NULL, &num_tags,
2618 &params_seen);
2619 if (status != REG_OK)
2620 return status;
2621 /* Allocate arrays for the tags and parameters. */
2622 tags = xmalloc(sizeof(int) * (num_tags + 1));
2623 if (!tags)
2624 return REG_ESPACE;
2625 tags[0] = -1;
2626 assertions = 0;
2627 params = NULL;
2628 if (params_seen)
2629 {
2630 params = tre_mem_alloc(mem, sizeof(*params)
2631 * TRE_PARAM_LAST);
2632 if (!params)
2633 {
2634 xfree(tags);
2635 return REG_ESPACE;
2636 }
2637 }
2638 /* Second pass with tre_mach_empty() to get the list of
2639 tags and parameters. */
2640 status = tre_match_empty(stack, cat->right, tags,
2641 &assertions, params, NULL, NULL);
2642 if (status != REG_OK)
2643 {
2644 xfree(tags);
2645 return status;
2646 }
2647 node->lastpos =
2648 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2649 tags, assertions, params);
2650 xfree(tags);
2651 if (!node->lastpos)
2652 return REG_ESPACE;
2653 }
2654 else
2655 {
2656 node->lastpos = cat->right->lastpos;
2657 }
2658 break;
2659 }
2660
2661 default:
2662 assert(0);
2663 break;
2664 }
2665 }
2666
2667 return REG_OK;
2668 }
2669
2670
2671 /* Adds a transition from each position in `p1' to each position in `p2'. */
2672 static reg_errcode_t
2673 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2674 tre_tnfa_transition_t *transitions,
2675 int *counts, int *offs)
2676 {
2677 tre_pos_and_tags_t *orig_p2 = p2;
2678 tre_tnfa_transition_t *trans;
2679 int i, j, k, l, dup, prev_p2_pos;
2680
2681 if (transitions != NULL)
2682 while (p1->position >= 0)
2683 {
2684 p2 = orig_p2;
2685 prev_p2_pos = -1;
2686 while (p2->position >= 0)
2687 {
2688 /* Optimization: if this position was already handled, skip it. */
2689 if (p2->position == prev_p2_pos)
2690 {
2691 p2++;
2692 continue;
2693 }
2694 prev_p2_pos = p2->position;
2695 /* Set `trans' to point to the next unused transition from
2696 position `p1->position'. */
2697 trans = transitions + offs[p1->position];
2698 while (trans->state != NULL)
2699 {
2700 #if 0
2701 /* If we find a previous transition from `p1->position' to
2702 `p2->position', it is overwritten. This can happen only
2703 if there are nested loops in the regexp, like in "((a)*)*".
2704 In POSIX.2 repetition using the outer loop is always
2705 preferred over using the inner loop. Therefore the
2706 transition for the inner loop is useless and can be thrown
2707 away. */
2708 /* XXX - The same position is used for all nodes in a bracket
2709 expression, so this optimization cannot be used (it will
2710 break bracket expressions) unless I figure out a way to
2711 detect it here. */
2712 if (trans->state_id == p2->position)
2713 {
2714 DPRINT(("*"));
2715 break;
2716 }
2717 #endif
2718 trans++;
2719 }
2720
2721 if (trans->state == NULL)
2722 (trans + 1)->state = NULL;
2723 /* Use the character ranges, assertions, etc. from `p1' for
2724 the transition from `p1' to `p2'. */
2725 trans->code_min = p1->code_min;
2726 trans->code_max = p1->code_max;
2727 trans->state = transitions + offs[p2->position];
2728 trans->state_id = p2->position;
2729 trans->assertions = p1->assertions | p2->assertions
2730 | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
2731 if (p1->backref >= 0)
2732 {
2733 assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
2734 assert(p2->backref < 0);
2735 trans->u.backref = p1->backref;
2736 trans->assertions |= ASSERT_BACKREF;
2737 }
2738 if (p1->bracket_match_list != NULL)
2739 {
2740 trans->u.bracket_match_list =
2741 xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2742 if (trans->u.bracket_match_list == NULL)
2743 return REG_ESPACE;
2744 memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2745 SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2746 }
2747
2748 /* Find out how many tags this transition has. */
2749 i = 0;
2750 if (p1->tags != NULL)
2751 while(p1->tags[i] >= 0)
2752 i++;
2753 j = 0;
2754 if (p2->tags != NULL)
2755 while(p2->tags[j] >= 0)
2756 j++;
2757
2758 /* If we are overwriting a transition, free the old tag array. */
2759 if (trans->tags != NULL)
2760 xfree(trans->tags);
2761 trans->tags = NULL;
2762
2763 /* If there were any tags, allocate an array and fill it. */
2764 if (i + j > 0)
2765 {
2766 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2767 if (!trans->tags)
2768 return REG_ESPACE;
2769 i = 0;
2770 if (p1->tags != NULL)
2771 while(p1->tags[i] >= 0)
2772 {
2773 trans->tags[i] = p1->tags[i];
2774 i++;
2775 }
2776 l = i;
2777 j = 0;
2778 if (p2->tags != NULL)
2779 while (p2->tags[j] >= 0)
2780 {
2781 /* Don't add duplicates. */
2782 dup = 0;
2783 for (k = 0; k < i; k++)
2784 if (trans->tags[k] == p2->tags[j])
2785 {
2786 dup = 1;
2787 break;
2788 }
2789 if (!dup)
2790 trans->tags[l++] = p2->tags[j];
2791 j++;
2792 }
2793 trans->tags[l] = -1;
2794 }
2795
2796 /* Set the parameter array. If both `p2' and `p1' have same
2797 parameters, the values in `p2' override those in `p1'. */
2798 if (p1->params || p2->params)
2799 {
2800 if (!trans->params)
2801 trans->params = xmalloc(sizeof(*trans->params)
2802 * TRE_PARAM_LAST);
2803 if (!trans->params)
2804 return REG_ESPACE;
2805 for (i = 0; i < TRE_PARAM_LAST; i++)
2806 {
2807 trans->params[i] = TRE_PARAM_UNSET;
2808 if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
2809 trans->params[i] = p1->params[i];
2810 if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
2811 trans->params[i] = p2->params[i];
2812 }
2813 }
2814 else
2815 {
2816 if (trans->params)
2817 xfree(trans->params);
2818 trans->params = NULL;
2819 }
2820
2821
2822 #ifdef TRE_DEBUG
2823 {
2824 int *tags;
2825
2826 DPRINT((" %2d -> %2d on %3d", p1->position, p2->position,
2827 p1->code_min));
2828 if (p1->code_max != p1->code_min)
2829 DPRINT(("-%3d", p1->code_max));
2830 tags = trans->tags;
2831 if (tags)
2832 {
2833 DPRINT((", tags ["));
2834 while (*tags >= 0)
2835 {
2836 DPRINT(("%d", *tags));
2837 tags++;
2838 if (*tags >= 0)
2839 DPRINT((","));
2840 }
2841 DPRINT(("]"));
2842 }
2843 if (trans->assertions)
2844 DPRINT((", assert %d", trans->assertions));
2845 if (trans->assertions & ASSERT_BACKREF)
2846 DPRINT((", backref %d", trans->u.backref));
2847 else if (trans->assertions & ASSERT_BRACKET_MATCH)
2848 DPRINT((", bracket_match_list %p",
2849 trans->u.bracket_match_list));
2850 if (trans->params)
2851 {
2852 DPRINT((", "));
2853 tre_print_params(trans->params);
2854 }
2855 DPRINT(("\n"));
2856 }
2857 #endif /* TRE_DEBUG */
2858 p2++;
2859 }
2860 p1++;
2861 }
2862 else
2863 /* Compute a maximum limit for the number of transitions leaving
2864 from each state. */
2865 while (p1->position >= 0)
2866 {
2867 p2 = orig_p2;
2868 while (p2->position >= 0)
2869 {
2870 counts[p1->position]++;
2871 p2++;
2872 }
2873 p1++;
2874 }
2875 return REG_OK;
2876 }
2877
2878 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2879 labelled with one character range (there are no transitions on empty
2880 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2881 the regexp. */
2882 static reg_errcode_t
2883 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2884 int *counts, int *offs)
2885 {
2886 tre_union_t *uni;
2887 tre_catenation_t *cat;
2888 tre_iteration_t *iter;
2889 reg_errcode_t errcode = REG_OK;
2890
2891 /* XXX - recurse using a stack!. */
2892 switch (node->type)
2893 {
2894 case LITERAL:
2895 break;
2896 case UNION:
2897 uni = (tre_union_t *)node->obj;
2898 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2899 if (errcode != REG_OK)
2900 return errcode;
2901 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2902 break;
2903
2904 case CATENATION:
2905 cat = (tre_catenation_t *)node->obj;
2906 /* Add a transition from each position in cat->left->lastpos
2907 to each position in cat->right->firstpos. */
2908 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2909 transitions, counts, offs);
2910 if (errcode != REG_OK)
2911 return errcode;
2912 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2913 if (errcode != REG_OK)
2914 return errcode;
2915 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2916 break;
2917
2918 case ITERATION:
2919 iter = (tre_iteration_t *)node->obj;
2920 assert(iter->max == -1 || iter->max == 1);
2921
2922 if (iter->max == -1)
2923 {
2924 assert(iter->min == 0 || iter->min == 1);
2925 /* Add a transition from each last position in the iterated
2926 expression to each first position. */
2927 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2928 transitions, counts, offs);
2929 if (errcode != REG_OK)
2930 return errcode;
2931 }
2932 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2933 break;
2934 }
2935 return errcode;
2936 }
2937
2938
2939 #define ERROR_EXIT(err) \
2940 do \
2941 { \
2942 errcode = err; \
2943 if (/*CONSTCOND*/1) \
2944 goto error_exit; \
2945 } \
2946 while (/*CONSTCOND*/0)
2947
2948
2949 int
2950 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2951 locale_t loc)
2952 {
2953 tre_stack_t *stack;
2954 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2955 tre_pos_and_tags_t *p;
2956 int *counts = NULL, *offs = NULL;
2957 int i, add = 0;
2958 tre_tnfa_transition_t *transitions, *initial;
2959 tre_tnfa_t *tnfa = NULL;
2960 tre_submatch_data_t *submatch_data = NULL;
2961 tre_tag_direction_t *tag_directions = NULL;
2962 reg_errcode_t errcode;
2963 tre_mem_t mem;
2964
2965 /* Parse context. */
2966 tre_parse_ctx_t parse_ctx;
2967
2968 /* Allocate a stack used throughout the compilation process for various
2969 purposes. */
2970 stack = tre_stack_new(512, 10240, 128);
2971 if (!stack)
2972 return REG_ESPACE;
2973 /* Allocate a fast memory allocator. */
2974 mem = tre_mem_new();
2975 if (!mem)
2976 {
2977 tre_stack_destroy(stack);
2978 return REG_ESPACE;
2979 }
2980
2981 /* Parse the regexp. */
2982 memset(&parse_ctx, 0, sizeof(parse_ctx));
2983 parse_ctx.mem = mem;
2984 parse_ctx.stack = stack;
2985 parse_ctx.re = regex;
2986 parse_ctx.len = n;
2987 /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2988 are also set */
2989 if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2990 cflags &= ~REG_UNGREEDY;
2991 parse_ctx.cflags = cflags;
2992 parse_ctx.max_backref = -1;
2993 parse_ctx.loc = loc;
2994 parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2995
2996 DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
2997 errcode = tre_parse(&parse_ctx);
2998 if (errcode != REG_OK)
2999 ERROR_EXIT(errcode);
3000 preg->re_nsub = parse_ctx.submatch_id - 1;
3001 tree = parse_ctx.result;
3002
3003 /* Back references and approximate matching cannot currently be used
3004 in the same regexp. */
3005 if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
3006 ERROR_EXIT(REG_BADPAT);
3007
3008 #ifdef TRE_DEBUG
3009 tre_ast_print(tree);
3010 #endif /* TRE_DEBUG */
3011
3012 /* Referring to nonexistent subexpressions is illegal. */
3013 if (parse_ctx.max_backref > (int)preg->re_nsub)
3014 ERROR_EXIT(REG_ESUBREG);
3015
3016 /* Allocate the TNFA struct. */
3017 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
3018 if (tnfa == NULL)
3019 ERROR_EXIT(REG_ESPACE);
3020 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
3021 tnfa->have_approx = parse_ctx.have_approx;
3022 tnfa->num_submatches = parse_ctx.submatch_id;
3023 tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3024 - SUBMATCH_ID_INVISIBLE_START;
3025 tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3026 tnfa->loc = parse_ctx.loc;
3027
3028 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
3029 regexp does not have back references, this can be skipped. */
3030 if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
3031 {
3032 DPRINT(("tre_compile: setting up tags\n"));
3033
3034 /* Figure out how many tags we will need. */
3035 errcode = tre_add_tags(NULL, stack, tree, tnfa);
3036 if (errcode != REG_OK)
3037 ERROR_EXIT(errcode);
3038 #ifdef TRE_DEBUG
3039 tre_ast_print(tree);
3040 #endif /* TRE_DEBUG */
3041
3042 if (tnfa->num_tags > 0)
3043 {
3044 tag_directions = xmalloc(sizeof(*tag_directions)
3045 * (tnfa->num_tags + 1));
3046 if (tag_directions == NULL)
3047 ERROR_EXIT(REG_ESPACE);
3048 tnfa->tag_directions = tag_directions;
3049 memset(tag_directions, -1,
3050 sizeof(*tag_directions) * (tnfa->num_tags + 1));
3051 }
3052 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
3053 sizeof(*tnfa->minimal_tags));
3054 if (tnfa->minimal_tags == NULL)
3055 ERROR_EXIT(REG_ESPACE);
3056
3057 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
3058 sizeof(*submatch_data));
3059 if (submatch_data == NULL)
3060 ERROR_EXIT(REG_ESPACE);
3061 /* Set the eo_tag value to -1 to indicate that that corresponding
3062 * submatch has not be completed yet */
3063 for (i = 0; i < parse_ctx.submatch_id; i++)
3064 {
3065 submatch_data[i].eo_tag = -1;
3066 }
3067 tnfa->submatch_data = submatch_data;
3068
3069 errcode = tre_add_tags(mem, stack, tree, tnfa);
3070 if (errcode != REG_OK)
3071 ERROR_EXIT(errcode);
3072
3073 #ifdef TRE_DEBUG
3074 for (i = 0; i < parse_ctx.submatch_id; i++)
3075 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3076 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3077 for (i = 0; i <= tnfa->num_tags; i++)
3078 DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3079 #endif /* TRE_DEBUG */
3080 }
3081
3082 /* Expand iteration nodes. */
3083 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3084 tag_directions, &tnfa->params_depth);
3085 if (errcode != REG_OK)
3086 ERROR_EXIT(errcode);
3087
3088 /* Add a dummy node for the final state.
3089 XXX - For certain patterns this dummy node can be optimized away,
3090 for example "a*" or "ab*". Figure out a simple way to detect
3091 this possibility. */
3092 tmp_ast_l = tree;
3093 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3094 if (tmp_ast_r == NULL)
3095 ERROR_EXIT(REG_ESPACE);
3096
3097 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3098 if (tree == NULL)
3099 ERROR_EXIT(REG_ESPACE);
3100
3101 #ifdef TRE_DEBUG
3102 tre_ast_print(tree);
3103 DPRINT(("Number of states: %d\n", parse_ctx.position));
3104 if (submatch_data)
3105 for (i = 0; i < parse_ctx.submatch_id; i++)
3106 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3107 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3108 if (tag_directions)
3109 for (i = 0; i <= tnfa->num_tags; i++)
3110 DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3111 #endif /* TRE_DEBUG */
3112
3113 errcode = tre_compute_nfl(mem, stack, tree);
3114 if (errcode != REG_OK)
3115 ERROR_EXIT(errcode);
3116
3117 counts = xmalloc(sizeof(int) * parse_ctx.position);
3118 if (counts == NULL)
3119 ERROR_EXIT(REG_ESPACE);
3120
3121 offs = xmalloc(sizeof(int) * parse_ctx.position);
3122 if (offs == NULL)
3123 ERROR_EXIT(REG_ESPACE);
3124
3125 for (i = 0; i < parse_ctx.position; i++)
3126 counts[i] = 0;
3127 tre_ast_to_tnfa(tree, NULL, counts, NULL);
3128
3129 add = 0;
3130 for (i = 0; i < parse_ctx.position; i++)
3131 {
3132 offs[i] = add;
3133 add += counts[i] + 1;
3134 counts[i] = 0;
3135 }
3136 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
3137 if (transitions == NULL)
3138 ERROR_EXIT(REG_ESPACE);
3139 tnfa->transitions = transitions;
3140 tnfa->num_transitions = add;
3141
3142 DPRINT(("Converting to TNFA:\n"));
3143 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3144 if (errcode != REG_OK)
3145 ERROR_EXIT(errcode);
3146
3147 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3148 /* If in eight bit mode, compute a table of characters that can be the
3149 first character of a match. */
3150 tnfa->first_char = -1;
3151 if (TRE_MB_CUR_MAX_L(tnfa->loc) == 1 && !tmp_ast_l->nullable)
3152 {
3153 int count = 0;
3154 tre_cint_t k;
3155 DPRINT(("Characters that can start a match:"));
3156 tnfa->firstpos_chars = xcalloc(256, sizeof(char));
3157 if (tnfa->firstpos_chars == NULL)
3158 ERROR_EXIT(REG_ESPACE);
3159 for (p = tree->firstpos; p->position >= 0; p++)
3160 {
3161 tre_tnfa_transition_t *j = transitions + offs[p->position];
3162 while (j->state != NULL)
3163 {
3164 for (k = j->code_min; k <= j->code_max && k < 256; k++)
3165 {
3166 DPRINT((" %d", k));
3167 tnfa->firstpos_chars[k] = 1;
3168 count++;
3169 }
3170 j++;
3171 }
3172 }
3173 DPRINT(("\n"));
3174 #define TRE_OPTIMIZE_FIRST_CHAR 1
3175 #if TRE_OPTIMIZE_FIRST_CHAR
3176 if (count == 1)
3177 {
3178 for (k = 0; k < 256; k++)
3179 if (tnfa->firstpos_chars[k])
3180 {
3181 DPRINT(("first char must be %d\n", k));
3182 tnfa->first_char = k;
3183 xfree(tnfa->firstpos_chars);
3184 tnfa->firstpos_chars = NULL;
3185 break;
3186 }
3187 }
3188 #endif
3189
3190 }
3191 else
3192 tnfa->firstpos_chars = NULL;
3193 #else /* !USE_FIRSTPOS_CHARS */
3194
3195 /* Set first_char only if there is only one character that can be the
3196 first character of a match */
3197 tnfa->first_char = -1;
3198 if (!tmp_ast_l->nullable)
3199 {
3200 int scanning = 1;
3201 for (p = tree->firstpos; scanning && p->position >= 0; p++)
3202 {
3203 tre_tnfa_transition_t *j = transitions + offs[p->position];
3204 while (j->state != NULL)
3205 {
3206 if (j->code_min <= j->code_max)
3207 {
3208 if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3209 {
3210 tnfa->first_char = -1;
3211 scanning = 0;
3212 break;
3213 }
3214 tnfa->first_char = j->code_min;
3215 }
3216 j++;
3217 }
3218 }
3219 #ifdef TRE_DEBUG
3220 if (tnfa->first_char >= 0)
3221 DPRINT(("first char must be %d\n", tnfa->first_char));
3222 #endif /* TRE_DEBUG */
3223 }
3224 #endif /* !USE_FIRSTPOS_CHARS */
3225
3226 p = tree->firstpos;
3227 i = 0;
3228 while (p->position >= 0)
3229 {
3230 i++;
3231
3232 #ifdef TRE_DEBUG
3233 {
3234 int *tags;
3235 DPRINT(("initial: %d", p->position));
3236 tags = p->tags;
3237 if (tags != NULL)
3238 {
3239 if (*tags >= 0)
3240 DPRINT(("/"));
3241 while (*tags >= 0)
3242 {
3243 DPRINT(("%d", *tags));
3244 tags++;
3245 if (*tags >= 0)
3246 DPRINT((","));
3247 }
3248 }
3249 DPRINT((", assert %d", p->assertions));
3250 if (p->params)
3251 {
3252 DPRINT((", "));
3253 tre_print_params(p->params);
3254 }
3255 DPRINT(("\n"));
3256 }
3257 #endif /* TRE_DEBUG */
3258
3259 p++;
3260 }
3261
3262 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3263 if (initial == NULL)
3264 ERROR_EXIT(REG_ESPACE);
3265 tnfa->initial = initial;
3266
3267 i = 0;
3268 for (p = tree->firstpos; p->position >= 0; p++)
3269 {
3270 initial[i].state = transitions + offs[p->position];
3271 initial[i].state_id = p->position;
3272 initial[i].tags = NULL;
3273 /* Copy the arrays p->tags, and p->params, they are allocated
3274 from a tre_mem object. */
3275 if (p->tags)
3276 {
3277 int j;
3278 for (j = 0; p->tags[j] >= 0; j++);
3279 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
3280 if (!initial[i].tags)
3281 ERROR_EXIT(REG_ESPACE);
3282 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3283 }
3284 initial[i].params = NULL;
3285 if (p->params)
3286 {
3287 initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
3288 if (!initial[i].params)
3289 ERROR_EXIT(REG_ESPACE);
3290 memcpy(initial[i].params, p->params,
3291 sizeof(*p->params) * TRE_PARAM_LAST);
3292 }
3293 initial[i].assertions = p->assertions;
3294 i++;
3295 }
3296 initial[i].state = NULL;
3297
3298 tnfa->num_transitions = add;
3299 tnfa->final = transitions + offs[tree->lastpos[0].position];
3300 tnfa->num_states = parse_ctx.position;
3301 tnfa->cflags = cflags;
3302
3303 DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3304 (void *)tnfa->final));
3305
3306 tre_mem_destroy(mem);
3307 tre_stack_destroy(stack);
3308 xfree(counts);
3309 xfree(offs);
3310
3311 #ifdef TRE_USE_SYSTEM_REGEX_H
3312 preg->re_magic = RE_MAGIC;
3313 #endif /* TRE_USE_SYSTEM_REGEX_H */
3314 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3315 #ifdef __LIBC__
3316 /* In Libc, we need to retain the locale. Outside Libc, we already called
3317 duplocale() which does the retaining. */
3318 XL_RETAIN(tnfa->loc);
3319 #endif /* __LIBC__ */
3320 return REG_OK;
3321
3322 error_exit:
3323 /* Free everything that was allocated and return the error code. */
3324 tre_mem_destroy(mem);
3325 if (stack != NULL)
3326 tre_stack_destroy(stack);
3327 if (counts != NULL)
3328 xfree(counts);
3329 if (offs != NULL)
3330 xfree(offs);
3331
3332 /* Set tnfa into preg, so that calling tre_free() will free the contents
3333 of tnfa. But in Libc, NULL out the loc field since we never retained
3334 the locale. Outside Libc, we let tre_free() call freelocale(). */
3335 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3336 #ifdef __LIBC__
3337 if(tnfa) tnfa->loc = NULL;
3338 #endif /* __LIBC__ */
3339
3340 tre_free(preg);
3341 return errcode;
3342 }
3343
3344
3345
3346
3347 void
3348 tre_free(regex_t *preg)
3349 {
3350 tre_tnfa_t *tnfa;
3351 unsigned int i;
3352 tre_tnfa_transition_t *trans;
3353
3354 #ifdef TRE_USE_SYSTEM_REGEX_H
3355 preg->re_magic = 0;
3356 #endif /* TRE_USE_SYSTEM_REGEX_H */
3357 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3358 if (!tnfa)
3359 return;
3360 preg->TRE_REGEX_T_FIELD = NULL;
3361
3362 for (i = 0; i < tnfa->num_transitions; i++)
3363 if (tnfa->transitions[i].state)
3364 {
3365 if (tnfa->transitions[i].tags)
3366 xfree(tnfa->transitions[i].tags);
3367 if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3368 xfree(tnfa->transitions[i].u.bracket_match_list);
3369 if (tnfa->transitions[i].params)
3370 xfree(tnfa->transitions[i].params);
3371 }
3372 if (tnfa->transitions)
3373 xfree(tnfa->transitions);
3374
3375 if (tnfa->initial)
3376 {
3377 for (trans = tnfa->initial; trans->state; trans++)
3378 {
3379 if (trans->tags)
3380 xfree(trans->tags);
3381 if (trans->params)
3382 xfree(trans->params);
3383 }
3384 xfree(tnfa->initial);
3385 }
3386
3387 if (tnfa->submatch_data)
3388 {
3389 xfree(tnfa->submatch_data);
3390 }
3391
3392 if (tnfa->tag_directions)
3393 xfree(tnfa->tag_directions);
3394 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3395 if (tnfa->firstpos_chars)
3396 xfree(tnfa->firstpos_chars);
3397 #endif /* USE_FIRSTPOS_CHARS */
3398 if (tnfa->minimal_tags)
3399 xfree(tnfa->minimal_tags);
3400
3401 if (tnfa->loc)
3402 #ifdef __LIBC__
3403 XL_RELEASE(tnfa->loc);
3404 #else /* !__LIBC__ */
3405 freelocale(tnfa->loc);
3406 #endif /* !__LIBC__ */
3407
3408 if (tnfa->last_matched_branch)
3409 xfree(tnfa->last_matched_branch);
3410
3411 xfree(tnfa);
3412 }
3413
3414 #ifndef __LIBC__
3415 char *
3416 tre_version(void)
3417 {
3418 static char str[256];
3419 char *version;
3420
3421 if (str[0] == 0)
3422 {
3423 (void) tre_config(TRE_CONFIG_VERSION, &version);
3424 (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
3425 }
3426 return str;
3427 }
3428
3429 int
3430 tre_config(int query, void *result)
3431 {
3432 int *int_result = result;
3433 const char **string_result = result;
3434
3435 switch (query)
3436 {
3437 case TRE_CONFIG_APPROX:
3438 #ifdef TRE_APPROX
3439 *int_result = 1;
3440 #else /* !TRE_APPROX */
3441 *int_result = 0;
3442 #endif /* !TRE_APPROX */
3443 return REG_OK;
3444
3445 case TRE_CONFIG_WCHAR:
3446 #ifdef TRE_WCHAR
3447 *int_result = 1;
3448 #else /* !TRE_WCHAR */
3449 *int_result = 0;
3450 #endif /* !TRE_WCHAR */
3451 return REG_OK;
3452
3453 case TRE_CONFIG_MULTIBYTE:
3454 #ifdef TRE_MULTIBYTE
3455 *int_result = 1;
3456 #else /* !TRE_MULTIBYTE */
3457 *int_result = 0;
3458 #endif /* !TRE_MULTIBYTE */
3459 return REG_OK;
3460
3461 case TRE_CONFIG_SYSTEM_ABI:
3462 #ifdef TRE_CONFIG_SYSTEM_ABI
3463 *int_result = 1;
3464 #else /* !TRE_CONFIG_SYSTEM_ABI */
3465 *int_result = 0;
3466 #endif /* !TRE_CONFIG_SYSTEM_ABI */
3467 return REG_OK;
3468
3469 case TRE_CONFIG_VERSION:
3470 *string_result = TRE_VERSION;
3471 return REG_OK;
3472 }
3473
3474 return REG_NOMATCH;
3475 }
3476 #endif /* !__LIBC__ */
3477
3478
3479 /* EOF */