2 tre-compile.c - TRE regex compiler
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
11 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
18 #endif /* HAVE_CONFIG_H */
24 #include "tre-internal.h"
26 #include "tre-stack.h"
28 #include "tre-parse.h"
29 #include "tre-compile.h"
31 #include "tre-last-matched.h"
34 #pragma clang diagnostic push
35 #pragma clang diagnostic ignored "-Wunreachable-code"
38 The bit_ffs() macro in bitstring.h is flawed. Replace it with a working one.
41 #define bit_ffs(name, nbits, value) { \
42 register bitstr_t *_name = name; \
43 register int _byte, _nbits = nbits; \
44 register int _stopbyte = _bit_byte(_nbits), _value = -1; \
45 for (_byte = 0; _byte <= _stopbyte; ++_byte) \
47 _value = _byte << 3; \
48 for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
49 ++_value, _stopbyte >>= 1); \
56 Algorithms to setup tags so that submatch addressing can be done.
61 static const char *tag_dir_str
[] = {
67 static const char _indent
[] = " ";
70 print_indent(int indent
)
76 static void print_last_matched_pre(tre_last_matched_pre_t
*lm
, int indent
,
79 print_last_match_branch_pre(tre_last_matched_branch_pre_t
*branch
, int indent
,
82 tre_last_matched_pre_t
*u
= branch
->last_matched
;
83 int n_last_matched
= 0;
92 DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
93 branch
->tot_branches
, branch
->tot_last_matched
, branch
->tot_tags
));
95 DPRINT(("..n_last_matched=%d last_matched=%d\n", branch
->n_last_matched
,
97 if (branch
->n_last_matched
!= n_last_matched
)
98 DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
99 if (branch
->cmp_tag
> 0)
102 const char *sep
= " tags=";
103 print_indent(indent
);
104 DPRINT(("..cmp_tag=%d n_tags=%d", branch
->cmp_tag
, branch
->n_tags
));
105 for (i
= 0; i
< num_tags
; i
++)
106 if (bit_test(branch
->tags
, i
))
108 DPRINT(("%s%d", sep
, i
));
114 u
= branch
->last_matched
;
118 print_last_matched_pre(u
, indent
, num_tags
);
124 print_last_matched_pre(tre_last_matched_pre_t
*lm
, int indent
, int num_tags
)
126 tre_last_matched_branch_pre_t
*b
= lm
->branches
;
135 print_indent(indent
);
136 DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
137 lm
->tot_branches
, lm
->tot_last_matched
, lm
->tot_tags
));
138 print_indent(indent
);
139 DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm
->start_tag
,
140 lm
->n_branches
, n_branches
));
141 if (lm
->n_branches
!= n_branches
)
142 DPRINT(("*** mismatch between n and branches ***\n"));
148 print_last_match_branch_pre(b
, indent
, num_tags
);
153 static void print_last_matched(tre_last_matched_t
*lm
, int indent
);
155 print_last_match_branch(tre_last_matched_branch_t
*branch
, int indent
)
157 tre_last_matched_t
*u
;
160 print_indent(indent
);
161 DPRINT(("BRANCH: n_last_matched=%d\n", branch
->n_last_matched
));
162 if (branch
->cmp_tag
> 0)
164 print_indent(indent
);
165 DPRINT(("..cmp_tag=%d n_tags=%d", branch
->cmp_tag
, branch
->n_tags
));
166 if (branch
->n_tags
> 0)
168 const char *sep
= " tags=";
169 for (i
= 0; i
< branch
->n_tags
; i
++)
171 DPRINT(("%s%d", sep
, branch
->tags
[i
]));
178 u
= branch
->last_matched
;
180 for (i
= branch
->n_last_matched
; i
> 0; i
--, u
++)
181 print_last_matched(u
, indent
);
185 print_last_matched(tre_last_matched_t
*lm
, int indent
)
188 tre_last_matched_branch_t
*b
;
190 print_indent(indent
);
191 DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm
->n_branches
,
196 for (i
= lm
->n_branches
; i
> 0; i
--, b
++)
197 print_last_match_branch(b
, indent
);
199 #endif /* TRE_DEBUG */
202 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
203 one if needed. If tag_id > 0, add that tag as well (a negative tag_id will
204 create an unset tre_last_matched_branch_pre_t. */
206 tre_merge_branches(tre_mem_t mem
, tre_ast_node_t
*dst
, tre_ast_node_t
*src
,
207 int tag_id
, int num_tags
)
209 tre_last_matched_branch_pre_t
*db
= dst
->last_matched_branch
;
210 tre_last_matched_branch_pre_t
*sb
= (src
? src
->last_matched_branch
: NULL
);
216 bitstr_t
*l
= db
->tags
;
217 bitstr_t
*r
= sb
->tags
;
218 int i
= bitstr_size(num_tags
);
222 /* db and sb are the info from two parallel sub-trees, so the tags
223 must be mutually exclusive, and we can just add their numbers */
224 db
->n_tags
+= sb
->n_tags
;
225 db
->tot_tags
+= sb
->tot_tags
;
226 if (db
->last_matched
)
228 if (sb
->last_matched
)
230 tre_last_matched_pre_t
*u
= db
->last_matched
;
234 u
->next
= sb
->last_matched
;
235 db
->n_last_matched
+= sb
->n_last_matched
;
236 db
->tot_branches
+= sb
->tot_branches
;
237 db
->tot_last_matched
+= sb
->tot_last_matched
;
240 else if (sb
->last_matched
)
242 db
->last_matched
= sb
->last_matched
;
243 db
->n_last_matched
= sb
->n_last_matched
;
244 db
->tot_branches
= sb
->tot_branches
;
245 db
->tot_last_matched
= sb
->tot_last_matched
;
256 db
= tre_mem_calloc(mem
, sizeof(tre_last_matched_branch_pre_t
)
257 + bitstr_size(num_tags
));
260 db
->tot_branches
= 1;
264 /* tag_id is a new tag, and shouldn't exist in db's tags,
265 so we can always increment n_tags */
266 bit_set(db
->tags
, tag_id
);
271 dst
->last_matched_branch
= db
;
276 /* Inserts a catenation node to the root of the tree given in `node'.
277 As the left child a new tag with number `tag_id' to `node' is added,
278 and the right child is the old root. */
280 tre_add_tag_left(tre_mem_t mem
, tre_ast_node_t
*node
, int tag_id
)
284 DPRINT(("add_tag_left: tag %d\n", tag_id
));
286 c
= tre_mem_alloc(mem
, sizeof(*c
));
289 c
->left
= tre_ast_new_literal(mem
, TAG
, tag_id
, -1);
292 c
->right
= tre_mem_calloc(mem
, sizeof(tre_ast_node_t
));
293 if (c
->right
== NULL
)
296 c
->right
->obj
= node
->obj
;
297 c
->right
->type
= node
->type
;
298 c
->right
->last_matched_branch
= node
->last_matched_branch
;
299 c
->right
->nullable
= -1;
300 c
->right
->submatch_id
= -1;
302 node
->type
= CATENATION
;
303 node
->original
= c
->right
;
307 /* Inserts a catenation node to the root of the tree given in `node'.
308 As the right child a new tag with number `tag_id' to `node' is added,
309 and the left child is the old root. */
311 tre_add_tag_right(tre_mem_t mem
, tre_ast_node_t
*node
, int tag_id
)
315 DPRINT(("tre_add_tag_right: tag %d\n", tag_id
));
317 c
= tre_mem_alloc(mem
, sizeof(*c
));
320 c
->right
= tre_ast_new_literal(mem
, TAG
, tag_id
, -1);
321 if (c
->right
== NULL
)
323 c
->left
= tre_mem_calloc(mem
, sizeof(tre_ast_node_t
));
327 c
->left
->obj
= node
->obj
;
328 c
->left
->type
= node
->type
;
329 c
->left
->last_matched_branch
= node
->last_matched_branch
;
330 c
->left
->nullable
= -1;
331 c
->left
->submatch_id
= -1;
333 node
->type
= CATENATION
;
334 node
->original
= c
->left
;
340 ADDTAGS_RECURSE_NOT_TOP_UNION
,
341 ADDTAGS_AFTER_ITERATION
,
342 ADDTAGS_AFTER_UNION_LEFT
,
343 ADDTAGS_AFTER_UNION_RIGHT
,
344 ADDTAGS_AFTER_CAT_LEFT
,
345 ADDTAGS_AFTER_CAT_RIGHT
,
346 ADDTAGS_SET_SUBMATCH_END
,
347 ADDTAGS_UNION_RECURSE
,
348 ADDTAGS_UNION_RIGHT_RECURSE
,
349 ADDTAGS_AFTER_UNION_TOP
,
350 } tre_addtags_symbol_t
;
353 COPY_LAST_MATCHED_BRANCH
,
354 COPY_LAST_MATCHED_BRANCH_NEXT
,
356 COPY_LAST_MATCHED_NEXT
,
360 #define REGSET_UNSET ((unsigned)-1)
362 /* Go through `regset' and set submatch data for submatches that are
365 tre_purge_regset(unsigned *regset
, tre_tnfa_t
*tnfa
, int tag
)
369 for (i
= 0; regset
[i
] != REGSET_UNSET
; i
++)
371 int id
= regset
[i
] / 2;
372 int start
= !(regset
[i
] % 2);
373 if (id
>= SUBMATCH_ID_INVISIBLE_START
)
375 DPRINT((" Using tag %d for %s offset of "
376 "submatch %d\n", tag
,
377 start
? "start" : "end", id
));
379 tnfa
->submatch_data
[id
].so_tag
= tag
;
381 tnfa
->submatch_data
[id
].eo_tag
= tag
;
387 #define REGSET_HAS_STARTS 0x1
388 #define REGSET_HAS_ENDS 0x2
391 /* Adds tags to appropriate locations in the parse tree in `tree', so that
392 subexpressions marked for submatch addressing can be traced. */
394 tre_add_tags(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*tree
,
397 reg_errcode_t status
= REG_OK
;
398 tre_addtags_symbol_t symbol
;
399 tre_ast_node_t
*node
= tree
; /* Tree node we are currently looking at. */
400 int bottom
= tre_stack_num_objects(stack
);
401 /* True for first pass (counting number of needed tags) */
402 int first_pass
= (mem
== NULL
|| tnfa
== NULL
);
403 unsigned *regset
, *orig_regset
;
404 int regset_contains
= 0;
405 int num_tags
= 0; /* Total number of tags. */
406 int num_minimals
= 0; /* Number of special minimal tags. */
407 int tag
= 0; /* The tag that is to be added next. */
408 int next_tag
= 1; /* Next tag to use after this one. */
409 int minimal_tag
= -1; /* Tag that marks the beginning of a minimal match. */
410 int *reorder_tags
= NULL
; /* Tag reorder array: a pair for each reorder,
411 * the first is the tag to reorder, the second
412 * is the tag after which the first is reordered */
413 int *rtp
; /* Pointer used to fill in reorder_tags and
415 int *to_reorder
; /* Transform array converting sequential order to
416 * that specified by reorder_tags */
419 tre_tag_direction_t direction
= TRE_TAG_LEFT_MAXIMIZE
;
422 DPRINT(("Initializing direction to %s\n", tag_dir_str
[direction
]));
424 tnfa
->minimal_tags
[0] = -1;
427 regset
= xmalloc(sizeof(*regset
) * ((tnfa
->num_submatches
428 + tnfa
->num_submatches_invisible
+ 1) * 2));
434 regset
[0] = REGSET_UNSET
;
435 orig_regset
= regset
;
439 /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
440 * to_reorder in one batch (assuming all are the same type) */
441 rtp
= reorder_tags
= xmalloc(sizeof(*reorder_tags
) *
442 ((2 * tnfa
->num_reorder_tags
+ 1) +
444 if (reorder_tags
== NULL
)
447 goto error_reorder_tags
;
449 to_reorder
= reorder_tags
+ (2 * tnfa
->num_reorder_tags
+ 1);
452 STACK_PUSH(stack
, voidptr
, node
);
453 STACK_PUSH(stack
, int, ADDTAGS_RECURSE
);
455 while (tre_stack_num_objects(stack
) > bottom
)
457 if (status
!= REG_OK
)
460 symbol
= (tre_addtags_symbol_t
)tre_stack_pop_int(stack
);
465 case ADDTAGS_SET_SUBMATCH_END
:
469 id
= tre_stack_pop_int(stack
);
470 node
= tre_stack_pop_voidptr(stack
);
471 /* Add end of this submatch to regset. */
472 for (i
= 0; regset
[i
] != REGSET_UNSET
; i
++);
473 regset
[i
] = id
* 2 + 1;
475 regset_contains
|= REGSET_HAS_ENDS
;
477 /* Always put a tag after a minimal iterator. */
478 if (minimal_tag
>= 0)
483 DPRINT((" ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
489 status
= tre_merge_branches(mem
, node
, NULL
, tag
,
491 if (status
!= REG_OK
)
493 status
= tre_add_tag_right(mem
, node
, tag
);
494 if (status
!= REG_OK
)
496 tnfa
->tag_directions
[tag
] = TRE_TAG_MINIMIZE
;
497 DPRINT(("Setting t%d direction to %s\n", tag
,
498 tag_dir_str
[tnfa
->tag_directions
[tag
]]));
499 DPRINT(("Minimal %d, %d\n", minimal_tag
, tag
));
500 for (i
= 0; tnfa
->minimal_tags
[i
] >= 0; i
++);
501 tnfa
->minimal_tags
[i
] = tag
;
502 tnfa
->minimal_tags
[i
+ 1] = minimal_tag
;
503 tnfa
->minimal_tags
[i
+ 2] = -1;
505 DPRINT((" Minimal end: t%d reordered to "
506 "after t%d\n", tag
, minimal_tag
));
507 /* Append to tag_order, move "tag" after
510 *rtp
++ = minimal_tag
;
513 tre_purge_regset(regset
, tnfa
, tag
);
517 DPRINT((" ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag
));
518 regset
[0] = REGSET_UNSET
;
527 case ADDTAGS_RECURSE_NOT_TOP_UNION
:
528 /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
529 * indicating that if a union is being processed, it is not the
530 * top-most of a series */
532 goto do_addtags_recurse
;
534 case ADDTAGS_RECURSE
:
535 /* Setting top_union to 1 means that if a union is begin processed,
536 * it is the top-most of a series, and should recurse through the
537 * series to set the left_tag and right_tag values */
541 node
= tre_stack_pop_voidptr(stack
);
543 id
= node
->submatch_id
;
549 /* Add start of this submatch to regset. */
550 for (i
= 0; regset
[i
] != REGSET_UNSET
; i
++);
553 regset_contains
|= REGSET_HAS_STARTS
;
555 /* Add end of this submatch to regset after processing this
557 STACK_PUSH(stack
, voidptr
, node
);
558 STACK_PUSHX(stack
, int, id
);
559 STACK_PUSHX(stack
, int, ADDTAGS_SET_SUBMATCH_END
);
566 tre_literal_t
*lit
= node
->obj
;
568 if (!IS_SPECIAL(lit
) || IS_BACKREF(lit
) || IS_EMPTY(lit
) || IS_ASSERTION(lit
))
570 DPRINT(("Literal %d-%d\n",
571 (int)lit
->code_min
, (int)lit
->code_max
));
574 /* Regset is not empty, so add a tag before the
575 literal or backref. */
578 DPRINT((" ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
583 status
= tre_merge_branches(mem
, node
, NULL
, tag
,
585 if (status
!= REG_OK
)
587 status
= tre_add_tag_left(mem
, node
, tag
);
588 if (status
!= REG_OK
)
590 if (regset_contains
== REGSET_HAS_STARTS
)
591 tnfa
->tag_directions
[tag
] = TRE_TAG_LEFT_MAXIMIZE
;
593 tnfa
->tag_directions
[tag
] = direction
;
594 DPRINT(("Setting t%d direction to %s\n", tag
,
595 tag_dir_str
[tnfa
->tag_directions
[tag
]]));
596 tre_purge_regset(regset
, tnfa
, tag
);
600 int b
= lit
->code_max
;
601 int t
= tnfa
->submatch_data
[b
].so_tag
;
602 /* Fail if the referenced submatch hasn't been
604 if (tnfa
->submatch_data
[b
].eo_tag
< 0)
606 status
= REG_ESUBREG
;
611 DPRINT((" Backref %d start: "
612 "t%d reordered to before t%d\n",
616 /* Append to tag_order, move "tag" after
623 DPRINT((" Backref %d start: "
624 "(t%d already before t%d)\n",
626 #endif /* TRE_DEBUG */
630 DPRINT((" ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
632 regset
[0] = REGSET_UNSET
;
641 assert(!IS_TAG(lit
));
647 tre_catenation_t
*cat
= node
->obj
;
648 tre_ast_node_t
*left
= cat
->left
;
649 tre_ast_node_t
*right
= cat
->right
;
650 int reserved_tag
= -1;
651 DPRINT(("Catenation, next_tag = %d\n", next_tag
));
654 /* After processing right child. */
655 STACK_PUSHX(stack
, voidptr
, node
);
656 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_CAT_RIGHT
);
658 /* Process right child. */
659 STACK_PUSHX(stack
, voidptr
, right
);
660 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE
);
662 /* After processing left child. */
663 STACK_PUSHX(stack
, int, next_tag
+ left
->num_tags
);
664 DPRINT((" Pushing %d for after left\n",
665 next_tag
+ left
->num_tags
));
666 if (left
->num_tags
> 0 && right
->num_tags
> 0)
668 /* Reserve the next tag to the right child. */
669 DPRINT((" ADDTAGS_RECURSE:CATENATION num_tags++ "
670 "Reserving next_tag %d to right child\n",
672 reserved_tag
= next_tag
;
675 STACK_PUSHX(stack
, int, reserved_tag
);
676 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_CAT_LEFT
);
678 /* Process left child. */
679 STACK_PUSHX(stack
, voidptr
, left
);
680 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE
);
686 tre_iteration_t
*iter
= node
->obj
;
687 DPRINT(("Iteration\n"));
690 STACK_PUSHX(stack
, int, regset_contains
!= 0);
691 STACK_PUSHX(stack
, int, tag
);
692 STACK_PUSHX(stack
, voidptr
, node
);
693 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_ITERATION
);
695 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
696 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE
);
698 /* Regset is not empty, so add a tag here (this always happens
699 because iterators always get submatch id, even if in the
705 status
= tre_merge_branches(mem
, node
, NULL
, tag
,
707 if (status
!= REG_OK
)
709 status
= tre_add_tag_left(mem
, node
, tag
);
710 if (status
!= REG_OK
)
712 if (regset_contains
== REGSET_HAS_STARTS
&& tag
!= 0)
713 tnfa
->tag_directions
[tag
] = iter
->minimal
?
715 TRE_TAG_LEFT_MAXIMIZE
;
717 tnfa
->tag_directions
[tag
] = direction
;
718 DPRINT(("Setting t%d direction to %s\n", tag
,
719 tag_dir_str
[tnfa
->tag_directions
[tag
]]));
720 tre_purge_regset(regset
, tnfa
, tag
);
723 DPRINT((" ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
725 regset
[0] = REGSET_UNSET
;
731 direction
= TRE_TAG_LEFT_MAXIMIZE
;
732 DPRINT((" Setting direction to %s\n", tag_dir_str
[direction
]));
738 tre_ast_node_t
*left
;
739 tre_ast_node_t
*right
;
746 DPRINT((" UNION num_tags++ tag=%d\n", tag
));
753 /* For the top union, walk the tree of consecutive unions,
754 * setting the left_tag and right_tag values in increasing
755 * order (left to right priority) */
757 (node
->num_submatches
-
758 (node
->submatch_id
>= 0 &&
759 node
->submatch_id
< SUBMATCH_ID_INVISIBLE_START
)) > 0)
762 int last
= tre_stack_num_objects(stack
);
764 STACK_PUSH(stack
, voidptr
, node
);
765 STACK_PUSH(stack
, int, ADDTAGS_UNION_RECURSE
);
767 while (tre_stack_num_objects(stack
) > last
)
769 symbol
= (tre_addtags_symbol_t
)tre_stack_pop_int(stack
);
772 case ADDTAGS_UNION_RECURSE
:
773 n
= tre_stack_pop_voidptr(stack
);
777 /* Since the top union has num_submatches > 0,
778 * we set all the consecutive union's
779 * make_branches to 1 to force the generation
780 * of end tags for each union branch. */
781 n
->make_branches
= 1;
783 STACK_PUSH(stack
, voidptr
, n
);
784 STACK_PUSH(stack
, int,
785 ADDTAGS_UNION_RIGHT_RECURSE
);
787 if (left
->type
== UNION
)
789 STACK_PUSH(stack
, voidptr
, left
);
790 STACK_PUSH(stack
, int,
791 ADDTAGS_UNION_RECURSE
);
795 DPRINT((" ADDTAGS_UNION_RECURSE "
796 "num_tags++ tag=%d\n", tag
));
804 case ADDTAGS_UNION_RIGHT_RECURSE
:
805 n
= tre_stack_pop_voidptr(stack
);
809 if (right
->type
== UNION
)
811 STACK_PUSH(stack
, voidptr
, right
);
812 STACK_PUSH(stack
, int,
813 ADDTAGS_UNION_RECURSE
);
817 DPRINT((" ADDTAGS_UNION_RIGHT_RECURSE "
818 "num_tags++ tag=%d\n", tag
));
819 uni
->right_tag
= tag
;
831 } /* end switch(symbol) */
832 } /* end while(tre_stack_num_objects(stack) > last */
835 STACK_PUSHX(stack
, int, front_tag
);
836 STACK_PUSHX(stack
, voidptr
, node
);
837 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_UNION_TOP
);
839 } /* end if (top_union && ...) */
845 /* After processing right child. */
846 STACK_PUSHX(stack
, voidptr
, regset
);
847 STACK_PUSHX(stack
, int, regset_contains
!= 0);
848 STACK_PUSHX(stack
, voidptr
, node
);
849 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_UNION_RIGHT
);
851 /* Process right child. */
852 STACK_PUSHX(stack
, voidptr
, right
);
853 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE_NOT_TOP_UNION
);
855 /* After processing left child. */
856 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_UNION_LEFT
);
858 /* Process left child. */
859 STACK_PUSHX(stack
, voidptr
, left
);
860 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE_NOT_TOP_UNION
);
862 /* Regset is not empty, so add a tag here. */
867 status
= tre_merge_branches(mem
, node
, NULL
, front_tag
,
869 if (status
!= REG_OK
)
871 status
= tre_add_tag_left(mem
, node
, front_tag
);
872 if (status
!= REG_OK
)
874 if (regset_contains
== REGSET_HAS_STARTS
)
875 tnfa
->tag_directions
[front_tag
] = TRE_TAG_LEFT_MAXIMIZE
;
877 tnfa
->tag_directions
[front_tag
] = direction
;
878 DPRINT(("Setting t%d direction to %s\n", front_tag
,
879 tag_dir_str
[tnfa
->tag_directions
[front_tag
]]));
880 tre_purge_regset(regset
, tnfa
, front_tag
);
883 regset
[0] = REGSET_UNSET
;
889 } /* end switch (node->type) */
891 break; /* end case: ADDTAGS_RECURSE */
893 case ADDTAGS_AFTER_ITERATION
:
895 tre_iteration_t
*iter
;
896 tre_ast_node_t
*orig
;
899 node
= tre_stack_pop_voidptr(stack
);
900 orig
= node
->original
? node
->original
: node
;
901 iter
= (tre_iteration_t
*)orig
->obj
;
902 enter_tag
= tre_stack_pop_int(stack
);
904 minimal_tag
= enter_tag
;
906 DPRINT(("After iteration\n"));
909 node
->num_tags
= iter
->arg
->num_tags
+ tre_stack_pop_int(stack
);
910 DPRINT((" ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
915 /* node->last_matched_branch will have the start tag (the tag
916 just *before* the iteration). iter->arg->last_matched_branch
917 will have the tag(s) inside the iteration, the ones that
918 may need to be reset if the iteration doesn't match. So
919 before we merge iter->arg into node, we need to set up
920 a new tre_last_matched_t and tre_last_matched_branch_t,
921 using any of the inside tags as cmp_tag (we choose the first
922 tag found by bit_ffs). If there are no inside tags, we
923 don't bother creating the extra structures. */
924 tre_last_matched_branch_pre_t
*b
=
925 iter
->arg
->last_matched_branch
;
927 if (b
&& b
->n_tags
> 0)
929 tre_last_matched_pre_t
*u
;
931 bit_ffs(b
->tags
, num_tags
, &b
->cmp_tag
);
932 DPRINT((" ADDTAGS_AFTER_ITERATION: n_tags=%d "
933 "cmp_tag = %d\n", b
->n_tags
, b
->cmp_tag
));
935 u
= tre_mem_calloc(mem
, sizeof(tre_last_matched_pre_t
) +
936 sizeof(tre_last_matched_branch_pre_t
)
937 + bitstr_size(tnfa
->num_tags
));
945 u
->start_tag
= b
->cmp_tag
;
946 u
->tot_branches
= b
->tot_branches
;
947 u
->tot_last_matched
= 1 + b
->tot_last_matched
;
948 u
->tot_tags
= b
->tot_tags
;
950 b
= (tre_last_matched_branch_pre_t
*)(u
+ 1);
952 b
->n_last_matched
= 1;
953 b
->tot_branches
= 1 + u
->tot_branches
;
954 b
->tot_last_matched
= u
->tot_last_matched
;
955 b
->tot_tags
= u
->tot_tags
;
957 iter
->arg
->last_matched_branch
= b
;
959 status
= tre_merge_branches(mem
, node
, iter
->arg
, 0,
961 if (status
!= REG_OK
)
966 /* Add a union with a left EMPTY literal and the right
967 being iter->arg. This should force the tags inside
968 the minimal iteration to prefer being unset */
969 if (iter
->min
== 0 && iter
->max
<= 1)
971 tre_ast_node_t
*u
, *e
;
973 e
= tre_ast_new_literal(mem
, EMPTY
, -1, -1);
979 u
= tre_ast_new_union(mem
, e
, iter
->arg
);
988 direction
= TRE_TAG_MINIMIZE
;
991 direction
= TRE_TAG_MAXIMIZE
;
992 DPRINT((" Setting direction to %s\n", tag_dir_str
[direction
]));
997 case ADDTAGS_AFTER_CAT_LEFT
:
999 int new_tag
= tre_stack_pop_int(stack
);
1000 next_tag
= tre_stack_pop_int(stack
);
1001 DPRINT(("After cat left, tag = %d, next_tag = %d\n",
1005 DPRINT((" Setting tag to %d\n", new_tag
));
1011 case ADDTAGS_AFTER_CAT_RIGHT
:
1013 tre_catenation_t
*cat
;
1015 DPRINT(("After cat right\n"));
1016 node
= tre_stack_pop_voidptr(stack
);
1020 node
->num_tags
= cat
->left
->num_tags
+ cat
->right
->num_tags
;
1021 DPRINT((" ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1026 status
= tre_merge_branches(mem
, cat
->left
, cat
->right
, 0,
1028 if (status
!= REG_OK
)
1030 status
= tre_merge_branches(mem
, node
, cat
->left
, 0,
1036 case ADDTAGS_AFTER_UNION_LEFT
:
1037 DPRINT(("After union left\n"));
1038 /* Lift the bottom of the `regset' array so that when processing
1039 the right operand the items currently in the array are
1040 invisible. The original bottom was saved at ADDTAGS_UNION and
1041 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1042 while (*regset
!= REGSET_UNSET
)
1044 regset_contains
= 0;
1047 case ADDTAGS_AFTER_UNION_RIGHT
:
1050 tre_ast_node_t
*orig
;
1052 /* Note: node may not be a UNION, but a CATENATION with a left
1053 * tag. So that is why we pass the original node->obj on the
1054 * stack, to get the union's true values. */
1056 DPRINT(("After union right\n"));
1057 node
= tre_stack_pop_voidptr(stack
);
1058 orig
= node
->original
? node
->original
: node
;
1059 uni
= (tre_union_t
*)orig
->obj
;
1060 added_tags
= tre_stack_pop_int(stack
);
1063 node
->num_tags
= uni
->left
->num_tags
+ uni
->right
->num_tags
1065 if (uni
->left_tag
> 0)
1067 if (uni
->right_tag
> 0)
1069 DPRINT((" ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1072 regset
= tre_stack_pop_voidptr(stack
);
1074 /* Add tags after both children, the left child gets a smaller
1075 tag than the right child. This guarantees that we prefer
1076 the left child over the right child. */
1077 /* XXX - This is not always necessary (if the children have
1078 tags which must be seen for every match of that child). */
1079 if (!first_pass
&& node
->make_branches
)
1081 tre_last_matched_branch_pre_t
*lb
=
1082 uni
->left
->last_matched_branch
;
1083 tre_last_matched_branch_pre_t
*rb
=
1084 uni
->right
->last_matched_branch
;
1085 tre_last_matched_pre_t
*lu
=
1086 uni
->left
->last_matched_in_progress
;
1087 tre_last_matched_pre_t
*ru
=
1088 uni
->right
->last_matched_in_progress
;
1089 tre_last_matched_pre_t
*u
;
1090 /* We don't need to call tre_merge_branches because these
1091 * tags don't participate in submatch ranges, so don't need
1092 * to be recorded. But we do set the cmp_tag entry of the
1093 * tre_last_matched_branch_pre_t, so we might call
1094 * tre_merge_branches if we need to create an empty
1095 * tre_last_matched_branch_pre_t. */
1096 if (uni
->left_tag
> 0)
1098 DPRINT(("Setting t%d direction to maximize\n",
1100 status
= tre_add_tag_right(mem
, uni
->left
, uni
->left_tag
);
1101 if (status
!= REG_OK
)
1103 tnfa
->tag_directions
[uni
->left_tag
] = TRE_TAG_MAXIMIZE
;
1106 status
= tre_merge_branches(mem
, uni
->left
, NULL
, -1,
1108 if (status
!= REG_OK
)
1110 lb
= uni
->left
->last_matched_branch
;
1112 lb
->cmp_tag
= uni
->left_tag
;
1114 if (uni
->right_tag
> 0)
1116 DPRINT(("Setting t%d direction to maximize\n",
1118 status
= tre_add_tag_right(mem
, uni
->right
, uni
->right_tag
);
1119 if (status
!= REG_OK
)
1121 tnfa
->tag_directions
[uni
->right_tag
] = TRE_TAG_MAXIMIZE
;
1124 status
= tre_merge_branches(mem
, uni
->right
, NULL
, -1,
1126 if (status
!= REG_OK
)
1128 rb
= uni
->right
->last_matched_branch
;
1130 rb
->cmp_tag
= uni
->right_tag
;
1132 /* Now merge the tre_last_matched_branch_pre_t into a
1133 tre_last_matched_pre_t */
1138 /* Create a new tre_last_matched_pre_t */
1139 u
= tre_mem_calloc(mem
, sizeof(tre_last_matched_pre_t
));
1142 status
= REG_ESPACE
;
1145 u
->tot_last_matched
= 1;
1151 u
->tot_branches
+= lb
->tot_branches
;
1152 u
->tot_last_matched
+= lb
->tot_last_matched
;
1153 u
->tot_tags
+= lb
->tot_tags
;
1158 u
->tot_branches
+= rb
->tot_branches
;
1159 u
->tot_last_matched
+= rb
->tot_last_matched
;
1160 u
->tot_tags
+= rb
->tot_tags
;
1167 u
->tot_branches
+= rb
->tot_branches
;
1168 u
->tot_last_matched
+= rb
->tot_last_matched
;
1169 u
->tot_tags
+= rb
->tot_tags
;
1174 /* Use ru, and add lb */
1178 lb
->next
= u
->branches
;
1181 u
->tot_branches
+= lb
->tot_branches
;
1182 u
->tot_last_matched
+= lb
->tot_last_matched
;
1183 u
->tot_tags
+= lb
->tot_tags
;
1187 else if (ru
== NULL
)
1189 /* Use lu, and add rb */
1193 rb
->next
= u
->branches
;
1196 u
->tot_branches
+= rb
->tot_branches
;
1197 u
->tot_last_matched
+= rb
->tot_last_matched
;
1198 u
->tot_tags
+= rb
->tot_tags
;
1203 /* Merge lu and ru into lu */
1208 tre_last_matched_branch_pre_t
*b
= lu
->branches
;
1209 while (b
->next
) b
= b
->next
;
1210 b
->next
= ru
->branches
;
1211 lu
->n_branches
+= ru
->n_branches
;
1214 else if (ru
->branches
)
1216 lu
->branches
= ru
->branches
;
1217 lu
->n_branches
= ru
->n_branches
;
1219 lu
->tot_branches
+= ru
->tot_branches
;
1220 lu
->tot_last_matched
+= ru
->tot_last_matched
- 1;
1221 lu
->tot_tags
+= ru
->tot_tags
;
1224 node
->last_matched_in_progress
= u
;
1226 direction
= TRE_TAG_MAXIMIZE
;
1230 case ADDTAGS_AFTER_UNION_TOP
: /* only called when not first_pass */
1232 tre_last_matched_branch_pre_t
*b
;
1233 tre_last_matched_pre_t
*u
;
1236 DPRINT(("After union top\n"));
1237 node
= tre_stack_pop_voidptr(stack
);
1238 start_tag
= tre_stack_pop_int(stack
);
1239 b
= tre_mem_calloc(mem
, sizeof(tre_last_matched_branch_pre_t
)
1240 + bitstr_size(tnfa
->num_tags
));
1243 status
= REG_ESPACE
;
1247 u
= node
->last_matched_in_progress
;
1248 u
->start_tag
= start_tag
;
1249 b
->tot_branches
= 1 + u
->tot_branches
;
1250 b
->tot_last_matched
= u
->tot_last_matched
;
1251 b
->tot_tags
= u
->tot_tags
;
1252 b
->last_matched
= u
;
1253 b
->n_last_matched
= 1;
1254 node
->last_matched_branch
= b
;
1255 node
->last_matched_in_progress
= NULL
;
1263 } /* end switch(symbol) */
1264 } /* end while(tre_stack_num_objects(stack) > bottom) */
1266 if (status
!= REG_OK
)
1268 DPRINT(("Error during %s pass\n", first_pass
? "first" : "second"));
1269 goto error_post_compile
;
1275 if (num_tags
!= tnfa
->num_tags
)
1277 DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags
,
1279 status
= REG_BADPAT
;
1280 goto error_post_compile
;
1283 tre_purge_regset(regset
, tnfa
, tag
);
1284 DPRINT(("Setting t%d to %s\n", num_tags
,
1285 tag_dir_str
[direction
]));
1286 tnfa
->tag_directions
[num_tags
] = direction
;
1288 if (rtp
> reorder_tags
+ 2 * tnfa
->num_reorder_tags
)
1290 DPRINT(("Processed %d reorder tags instead of %d\n",
1291 (int)(rtp
- reorder_tags
) / 2, tnfa
->num_reorder_tags
));
1292 status
= REG_BADPAT
;
1293 goto error_post_compile
;
1297 if (reorder_tags
[0] >= 0)
1299 DPRINT(("reorder_tags:\n"));
1300 for (rtp
= reorder_tags
; *rtp
>= 0;)
1302 DPRINT(("%d after ", *rtp
++));
1303 DPRINT(("%d\n", *rtp
++));
1307 DPRINT(("No reorder_tags\n"));
1308 #endif /* TRE_DEBUG */
1310 /* Initialize to_reorder */
1311 for (i
= 0; i
< num_tags
; i
++)
1313 /* Use to_seq_order to convert reorder_tags values, and use those to
1314 * reorder to_reorder */
1315 for (rtp
= reorder_tags
; *rtp
>= 0;)
1320 /* Skip reordering the final tag */
1323 DPRINT(("Skipping reorder of %d\n", ti
));
1327 /* The number of the tag to reorder */
1328 high
= to_reorder
[ti
];
1329 /* Reorder after this tag */
1330 low
= to_reorder
[*rtp
++];
1332 DPRINT(("ti=%d high=%d low=%d\n", ti
, high
, low
));
1335 DPRINT(("Tag %d already before %d\n", high
, low
));
1338 for (j
= 0; j
< num_tags
; j
++)
1339 if (to_reorder
[j
] > low
&& to_reorder
[j
] < high
)
1341 to_reorder
[ti
] = low
+ 1;
1343 DPRINT(("to_reorder=("));
1344 for (j
= 0; j
< num_tags
; j
++)
1346 DPRINT(("%d", to_reorder
[j
]));
1347 if (j
< num_tags
- 1)
1351 #endif /* TRE_DEBUG */
1353 /* Determine if reordering in really necessary */
1355 int need_reorder
= 0;
1356 for (i
= 0; i
< num_tags
; i
++)
1357 if(to_reorder
[i
] != i
)
1362 /* If need_reorder is not set, free reorder_tags, and set to NULL,
1363 * indicating no reordering is needed */
1366 DPRINT(("Don't need to reorder\n"));
1367 xfree(reorder_tags
);
1368 reorder_tags
= NULL
;
1376 tre_tag_direction_t
*new_tag_directions
;
1378 DPRINT(("to_reorder:"));
1379 for (i
= 0; i
< num_tags
; i
++)
1380 DPRINT((" %d->%d", i
, to_reorder
[i
]));
1382 #endif /* TRE_DEBUG */
1384 DPRINT(("Reordering submatch_data\n"));
1385 for (i
= 0; i
< (int)tnfa
->num_submatches
; i
++)
1388 int so
= tnfa
->submatch_data
[i
].so_tag
;
1389 int eo
= tnfa
->submatch_data
[i
].eo_tag
;
1390 #endif /* TRE_DEBUG */
1391 tnfa
->submatch_data
[i
].so_tag
=
1392 to_reorder
[tnfa
->submatch_data
[i
].so_tag
];
1393 tnfa
->submatch_data
[i
].eo_tag
=
1394 tnfa
->submatch_data
[i
].eo_tag
< num_tags
?
1395 to_reorder
[tnfa
->submatch_data
[i
].eo_tag
] :
1396 tnfa
->submatch_data
[i
].eo_tag
;
1397 DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i
, so
, eo
,
1398 tnfa
->submatch_data
[i
].so_tag
,
1399 tnfa
->submatch_data
[i
].eo_tag
));
1402 DPRINT(("Reordering tag_directions\n"));
1403 /* We only allocate num_tags directions and reorder them. The
1404 * num_tags-th direction (end tag) is left unchanged. */
1405 new_tag_directions
= xmalloc(sizeof(*new_tag_directions
) * num_tags
);
1406 if (new_tag_directions
== NULL
)
1408 status
= REG_ESPACE
;
1409 goto error_post_compile
;
1411 for (i
= 0; i
< num_tags
; i
++)
1413 new_tag_directions
[to_reorder
[i
]] = tnfa
->tag_directions
[i
];
1416 for (i
= 0; i
< num_tags
; i
++)
1418 DPRINT(("t%d %s->%s\n", i
,
1419 tag_dir_str
[tnfa
->tag_directions
[i
]],
1420 tag_dir_str
[new_tag_directions
[i
]]));
1422 DPRINT(("t%d %s->%s\n", num_tags
,
1423 tag_dir_str
[tnfa
->tag_directions
[num_tags
]],
1424 tag_dir_str
[tnfa
->tag_directions
[num_tags
]]));
1425 #endif /* TRE_DEBUG */
1426 memcpy(tnfa
->tag_directions
, new_tag_directions
, sizeof(*new_tag_directions
) * num_tags
);
1427 xfree(new_tag_directions
);
1429 DPRINT(("Reordering minimal_tags\n"));
1430 for (i
= 0; tnfa
->minimal_tags
[i
] >= 0; i
++)
1431 tnfa
->minimal_tags
[i
] = tnfa
->minimal_tags
[i
] < num_tags
?
1432 to_reorder
[tnfa
->minimal_tags
[i
]] :
1433 tnfa
->minimal_tags
[i
];
1435 DPRINT(("Reordering AST tags\n"));
1436 STACK_PUSH(stack
, voidptr
, tree
);
1437 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1439 node
= tre_stack_pop_voidptr(stack
);
1445 tre_literal_t
*lit
= (tre_literal_t
*)node
->obj
;
1447 lit
->code_max
= to_reorder
[lit
->code_max
];
1453 tre_union_t
*uni
= (tre_union_t
*)node
->obj
;
1454 STACK_PUSHX(stack
, voidptr
, uni
->right
);
1455 STACK_PUSHX(stack
, voidptr
, uni
->left
);
1461 tre_catenation_t
*cat
= (tre_catenation_t
*)node
->obj
;
1462 STACK_PUSHX(stack
, voidptr
, cat
->right
);
1463 STACK_PUSHX(stack
, voidptr
, cat
->left
);
1469 tre_iteration_t
*iter
= (tre_iteration_t
*)node
->obj
;
1470 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
1479 if (status
!= REG_OK
)
1481 DPRINT(("Error while reordering tags\n"));
1482 goto error_post_compile
;
1489 if (tree
->last_matched_branch
)
1491 tre_last_matched_branch_t
*buf
, *b
, *bb
;
1492 tre_last_matched_branch_pre_t
*bp
;
1493 tre_last_matched_t
*u
, *uu
;
1494 tre_last_matched_pre_t
*up
;
1498 tre_last_matched_branch_t
*_b
;
1499 tre_last_matched_t
*_u
;
1502 DPRINT(("last_match_branch_pre:\n"));
1503 print_last_match_branch_pre(tree
->last_matched_branch
, 0, num_tags
);
1504 #endif /* TRE_DEBUG */
1505 buf
= (tre_last_matched_branch_t
*)xcalloc(1,
1506 tree
->last_matched_branch
->tot_branches
1507 * sizeof(tre_last_matched_branch_t
) +
1508 tree
->last_matched_branch
->tot_last_matched
1509 * sizeof(tre_last_matched_t
) +
1510 tree
->last_matched_branch
->tot_tags
*
1514 status
= REG_ESPACE
;
1515 goto error_post_compile
;
1519 u
= (tre_last_matched_t
*)(b
+
1520 tree
->last_matched_branch
->tot_branches
);
1521 t
= (int *)(u
+ tree
->last_matched_branch
->tot_last_matched
);
1526 #endif /* TRE_DEBUG */
1527 DPRINT(("Copying info_pre to info\n"));
1528 STACK_PUSH(stack
, voidptr
, tree
->last_matched_branch
);
1529 STACK_PUSH(stack
, int, 1);
1530 STACK_PUSH(stack
, int, COPY_LAST_MATCHED_BRANCH
);
1532 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1534 switch (tre_stack_pop_int(stack
))
1536 case COPY_LAST_MATCHED_BRANCH
:
1537 i
= tre_stack_pop_int(stack
);
1538 /* The tre_last_matched_branch_pre_t * is still on the
1540 STACK_PUSHX(stack
, voidptr
, b
);
1541 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_BRANCH_NEXT
);
1545 case COPY_LAST_MATCHED_BRANCH_NEXT
:
1546 bb
= tre_stack_pop_voidptr(stack
);
1547 bp
= tre_stack_pop_voidptr(stack
);
1548 bb
->n_last_matched
= bp
->n_last_matched
;
1549 bb
->cmp_tag
= bp
->cmp_tag
;
1553 n
= bb
->n_tags
= bp
->n_tags
;
1555 for (i
= 0; i
< num_tags
; i
++)
1556 if (bit_test(bp
->tags
, i
))
1565 STACK_PUSHX(stack
, voidptr
, bp
->next
);
1566 STACK_PUSHX(stack
, voidptr
, bb
+ 1);
1567 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_BRANCH_NEXT
);
1569 if (bp
->n_last_matched
> 0)
1571 bb
->last_matched
= u
;
1572 STACK_PUSHX(stack
, voidptr
, bp
->last_matched
);
1573 STACK_PUSHX(stack
, int, bp
->n_last_matched
);
1574 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED
);
1578 case COPY_LAST_MATCHED
:
1579 i
= tre_stack_pop_int(stack
);
1580 /* The tre_last_matched_pre_t * is still on the stack */
1581 STACK_PUSHX(stack
, voidptr
, u
);
1582 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_NEXT
);
1586 case COPY_LAST_MATCHED_NEXT
:
1587 uu
= tre_stack_pop_voidptr(stack
);
1588 up
= tre_stack_pop_voidptr(stack
);
1589 uu
->n_branches
= up
->n_branches
;
1591 uu
->start_tag
= up
->start_tag
;
1594 STACK_PUSHX(stack
, voidptr
, up
->next
);
1595 STACK_PUSHX(stack
, voidptr
, uu
+ 1);
1596 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_NEXT
);
1598 STACK_PUSHX(stack
, voidptr
, up
->branches
);
1599 STACK_PUSHX(stack
, int, up
->n_branches
);
1600 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_BRANCH
);
1604 if (status
!= REG_OK
)
1605 goto error_post_compile
;
1607 DPRINT(("last_matched_branch:\n"));
1608 print_last_match_branch(buf
, 0);
1609 if (b
!= _b
+ tree
->last_matched_branch
->tot_branches
)
1610 DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1611 b
, _b
+ tree
->last_matched_branch
->tot_branches
));
1612 if (u
!= _u
+ tree
->last_matched_branch
->tot_last_matched
)
1613 DPRINT(("u/%p != _u + "
1614 "tree->last_matched_branch->tot_last_matched/%p\n",
1615 u
, _u
+ tree
->last_matched_branch
->tot_last_matched
));
1616 if (t
!= _t
+ tree
->last_matched_branch
->tot_tags
)
1617 DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1618 t
, _t
+ tree
->last_matched_branch
->tot_tags
));
1619 #endif /* TRE_DEBUG */
1620 tnfa
->last_matched_branch
= buf
;
1624 DPRINT(("No last_match_branch_pre\n"));
1625 #endif /* TRE_DEBUG */
1628 DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
1629 first_pass
? "First pass" : "Second pass", num_tags
));
1631 tre_ast_print(tree
);
1632 #endif /* TRE_DEBUG */
1633 DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree
->num_tags
,
1635 assert(tree
->num_tags
== num_tags
);
1636 tnfa
->end_tag
= num_tags
;
1637 tnfa
->num_tags
= num_tags
;
1638 tnfa
->num_minimals
= num_minimals
;
1640 xfree(reorder_tags
);
1650 AST to TNFA compilation routines.
1656 } tre_copyast_symbol_t
;
1658 /* Flags for tre_copy_ast(). */
1659 #define COPY_REMOVE_TAGS 1
1660 #define COPY_MAXIMIZE_FIRST_TAG 2
1662 static reg_errcode_t
1663 tre_copy_ast(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*ast
,
1664 int flags
, int *pos_add
, tre_tag_direction_t
*tag_directions
,
1665 tre_ast_node_t
**copy
, int *max_pos
)
1667 reg_errcode_t status
= REG_OK
;
1668 int bottom
= tre_stack_num_objects(stack
);
1671 tre_ast_node_t
**result
= copy
;
1672 tre_copyast_symbol_t symbol
;
1674 STACK_PUSH(stack
, voidptr
, ast
);
1675 STACK_PUSH(stack
, int, COPY_RECURSE
);
1677 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1679 tre_ast_node_t
*node
;
1680 if (status
!= REG_OK
)
1683 symbol
= (tre_copyast_symbol_t
)tre_stack_pop_int(stack
);
1686 case COPY_SET_RESULT_PTR
:
1687 result
= tre_stack_pop_voidptr(stack
);
1690 node
= tre_stack_pop_voidptr(stack
);
1695 tre_literal_t
*lit
= node
->obj
;
1696 int pos
= lit
->position
;
1697 int min
= lit
->code_min
;
1698 int max
= lit
->code_max
;
1699 tre_bracket_match_list_t
*list
= !IS_SPECIAL(lit
) ?
1700 lit
->u
.bracket_match_list
:
1702 if (!IS_SPECIAL(lit
) || IS_BACKREF(lit
))
1704 /* XXX - e.g. [ab] has only one position but two
1705 nodes, so we are creating holes in the state space
1706 here. Not fatal, just wastes memory. */
1710 else if (IS_TAG(lit
) && (flags
& COPY_REMOVE_TAGS
))
1712 /* Change this tag to empty. */
1716 else if (IS_TAG(lit
) && (flags
& COPY_MAXIMIZE_FIRST_TAG
)
1719 /* Maximize the first tag. */
1720 if (tag_directions
[max
] == TRE_TAG_LEFT_MAXIMIZE
)
1721 tag_directions
[max
] = TRE_TAG_MAXIMIZE
;
1724 *result
= tre_ast_new_literal(mem
, min
, max
, pos
);
1725 if (*result
== NULL
)
1726 status
= REG_ESPACE
;
1731 if (!IS_SPECIAL(lit
))
1732 ((tre_literal_t
*)(*result
)->obj
)->u
.bracket_match_list
1738 tre_union_t
*uni
= node
->obj
;
1740 *result
= tre_ast_new_union(mem
, uni
->left
, uni
->right
);
1741 if (*result
== NULL
)
1743 status
= REG_ESPACE
;
1746 tmp
= (*result
)->obj
;
1747 result
= &tmp
->left
;
1748 STACK_PUSHX(stack
, voidptr
, uni
->right
);
1749 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1750 STACK_PUSHX(stack
, voidptr
, &tmp
->right
);
1751 STACK_PUSHX(stack
, int, COPY_SET_RESULT_PTR
);
1752 STACK_PUSHX(stack
, voidptr
, uni
->left
);
1753 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1758 tre_catenation_t
*cat
= node
->obj
;
1759 tre_catenation_t
*tmp
;
1760 *result
= tre_ast_new_catenation(mem
, cat
->left
, cat
->right
);
1761 if (*result
== NULL
)
1763 status
= REG_ESPACE
;
1766 tmp
= (*result
)->obj
;
1769 result
= &tmp
->left
;
1771 STACK_PUSHX(stack
, voidptr
, cat
->right
);
1772 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1773 STACK_PUSHX(stack
, voidptr
, &tmp
->right
);
1774 STACK_PUSHX(stack
, int, COPY_SET_RESULT_PTR
);
1775 STACK_PUSHX(stack
, voidptr
, cat
->left
);
1776 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1781 tre_iteration_t
*iter
= node
->obj
;
1782 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
1783 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1784 *result
= tre_ast_new_iter(mem
, iter
->arg
, iter
->min
,
1785 iter
->max
, iter
->minimal
);
1786 if (*result
== NULL
)
1788 status
= REG_ESPACE
;
1791 iter
= (*result
)->obj
;
1792 result
= &iter
->arg
;
1802 *pos_add
+= num_copied
;
1809 } tre_expand_ast_symbol_t
;
1811 /* Expands each iteration node that has a finite nonzero minimum or maximum
1812 iteration count to a catenated sequence of copies of the node. */
1813 static reg_errcode_t
1814 tre_expand_ast(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*ast
,
1815 int *position
, tre_tag_direction_t
*tag_directions
,
1816 int __unused
*max_depth
)
1818 reg_errcode_t status
= REG_OK
;
1819 int bottom
= tre_stack_num_objects(stack
);
1821 int pos_add_total
= 0;
1824 /* Current approximate matching parameters. */
1825 int params
[TRE_PARAM_LAST
];
1826 /* Approximate parameter nesting level. */
1827 int params_depth
= 0;
1828 #endif /* TRE_APPROX */
1832 #endif /* TRE_APPROX */
1835 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
1836 params
[i
] = TRE_PARAM_DEFAULT
;
1837 #endif /* TRE_APPROX */
1839 STACK_PUSHR(stack
, voidptr
, ast
);
1840 STACK_PUSHR(stack
, int, EXPAND_RECURSE
);
1841 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1843 tre_ast_node_t
*node
;
1844 tre_expand_ast_symbol_t symbol
;
1846 if (status
!= REG_OK
)
1849 DPRINT(("pos_add %d\n", pos_add
));
1851 symbol
= (tre_expand_ast_symbol_t
)tre_stack_pop_int(stack
);
1852 node
= tre_stack_pop_voidptr(stack
);
1855 case EXPAND_RECURSE
:
1860 tre_literal_t
*lit
= node
->obj
;
1861 if (!IS_SPECIAL(lit
) || IS_BACKREF(lit
))
1863 lit
->position
+= pos_add
;
1864 if (lit
->position
> max_pos
)
1865 max_pos
= lit
->position
;
1871 tre_union_t
*uni
= node
->obj
;
1872 STACK_PUSHX(stack
, voidptr
, uni
->right
);
1873 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1874 STACK_PUSHX(stack
, voidptr
, uni
->left
);
1875 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1880 tre_catenation_t
*cat
= node
->obj
;
1881 STACK_PUSHX(stack
, voidptr
, cat
->right
);
1882 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1883 STACK_PUSHX(stack
, voidptr
, cat
->left
);
1884 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1889 tre_iteration_t
*iter
= node
->obj
;
1890 STACK_PUSHX(stack
, int, pos_add
);
1891 STACK_PUSHX(stack
, voidptr
, node
);
1892 STACK_PUSHX(stack
, int, EXPAND_AFTER_ITER
);
1893 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
1894 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1895 /* If we are going to expand this node at EXPAND_AFTER_ITER
1896 then don't increase the `pos' fields of the nodes now, it
1897 will get done when expanding. */
1898 if (iter
->min
> 1 || iter
->max
> 1)
1909 case EXPAND_AFTER_ITER
:
1911 tre_iteration_t
*iter
= node
->obj
;
1913 pos_add
= tre_stack_pop_int(stack
);
1914 pos_add_last
= pos_add
;
1915 /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1916 immediate replace the whole iteration with EMPTY. This
1917 unfortunately drops any submatches, and messes up setting the
1918 pmatch values (we can get tags of -1, and tag values in the
1919 billions). So we left it there and replace with EMPTY here. */
1920 if (iter
->min
== 0 && iter
->max
== 0)
1922 tre_ast_node_t
*empty
= tre_ast_new_literal(mem
, EMPTY
, -1, -1);
1925 node
->obj
= empty
->obj
;
1926 node
->type
= empty
->type
;
1928 else if (iter
->min
> 1 || iter
->max
> 1)
1930 tre_ast_node_t
*seq1
= NULL
, *seq2
= NULL
;
1932 int pos_add_save
= pos_add
;
1934 /* Create a catenated sequence of copies of the node. */
1935 for (j
= 0; j
< iter
->min
; j
++)
1937 tre_ast_node_t
*copy
;
1938 /* Remove tags from all but the last copy. */
1939 int flags
= ((j
+ 1 < iter
->min
)
1941 : COPY_MAXIMIZE_FIRST_TAG
);
1942 DPRINT((" pos_add %d\n", pos_add
));
1943 pos_add_save
= pos_add
;
1944 status
= tre_copy_ast(mem
, stack
, iter
->arg
, flags
,
1945 &pos_add
, tag_directions
, ©
,
1947 if (status
!= REG_OK
)
1950 seq1
= tre_ast_new_catenation(mem
, seq1
, copy
);
1957 if (iter
->max
== -1)
1959 /* No upper limit. */
1960 pos_add_save
= pos_add
;
1961 status
= tre_copy_ast(mem
, stack
, iter
->arg
, 0,
1962 &pos_add
, NULL
, &seq2
, &max_pos
);
1963 if (status
!= REG_OK
)
1965 seq2
= tre_ast_new_iter(mem
, seq2
, 0, -1, 0);
1971 for (j
= iter
->min
; j
< iter
->max
; j
++)
1973 tre_ast_node_t
*tmp
, *copy
;
1974 pos_add_save
= pos_add
;
1975 status
= tre_copy_ast(mem
, stack
, iter
->arg
, 0,
1976 &pos_add
, NULL
, ©
, &max_pos
);
1977 if (status
!= REG_OK
)
1980 seq2
= tre_ast_new_catenation(mem
, copy
, seq2
);
1985 tmp
= tre_ast_new_literal(mem
, EMPTY
, -1, -1);
1988 seq2
= tre_ast_new_union(mem
, tmp
, seq2
);
1994 pos_add
= pos_add_save
;
1997 else if (seq2
!= NULL
)
1998 seq1
= tre_ast_new_catenation(mem
, seq1
, seq2
);
2001 node
->obj
= seq1
->obj
;
2002 node
->type
= seq1
->type
;
2006 pos_add_total
+= pos_add
- pos_add_last
;
2007 if (iter_depth
== 0)
2008 pos_add
= pos_add_total
;
2011 /* If approximate parameters are specified, surround the result
2012 with two parameter setting nodes. The one on the left sets
2013 the specified parameters, and the one on the right restores
2014 the old parameters. */
2017 tre_ast_node_t
*tmp_l
, *tmp_r
, *tmp_node
, *node_copy
;
2020 tmp_l
= tre_ast_new_literal(mem
, PARAMETER
, 0, -1);
2023 ((tre_literal_t
*)tmp_l
->obj
)->u
.params
= iter
->params
;
2024 iter
->params
[TRE_PARAM_DEPTH
] = params_depth
+ 1;
2025 tmp_r
= tre_ast_new_literal(mem
, PARAMETER
, 0, -1);
2028 old_params
= tre_mem_alloc(mem
, sizeof(*old_params
)
2032 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2033 old_params
[i
] = params
[i
];
2034 ((tre_literal_t
*)tmp_r
->obj
)->u
.params
= old_params
;
2035 old_params
[TRE_PARAM_DEPTH
] = params_depth
;
2036 /* XXX - this is the only place where ast_new_node is
2037 needed -- should be moved inside AST module. */
2038 node_copy
= tre_ast_new_node(mem
, ITERATION
,
2039 sizeof(tre_iteration_t
));
2042 node_copy
->obj
= node
->obj
;
2043 tmp_node
= tre_ast_new_catenation(mem
, tmp_l
, node_copy
);
2046 tmp_node
= tre_ast_new_catenation(mem
, tmp_node
, tmp_r
);
2049 /* Replace the contents of `node' with `tmp_node'. */
2050 memcpy(node
, tmp_node
, sizeof(*node
));
2051 node
->obj
= tmp_node
->obj
;
2052 node
->type
= tmp_node
->type
;
2054 if (params_depth
> *max_depth
)
2055 *max_depth
= params_depth
;
2057 #endif /* TRE_APPROX */
2066 *position
+= pos_add_total
;
2068 /* `max_pos' should never be larger than `*position' if the above
2069 code works, but just an extra safeguard let's make sure
2070 `*position' is set large enough so enough memory will be
2071 allocated for the transition table. */
2072 if (max_pos
> *position
)
2073 *position
= max_pos
;
2076 DPRINT(("Expanded AST:\n"));
2078 DPRINT(("*position %d, max_pos %d\n", *position
, max_pos
));
2084 static tre_pos_and_tags_t
*
2085 tre_set_empty(tre_mem_t mem
)
2087 tre_pos_and_tags_t
*new_set
;
2089 new_set
= tre_mem_calloc(mem
, sizeof(*new_set
));
2090 if (new_set
== NULL
)
2093 new_set
[0].position
= -1;
2094 new_set
[0].code_min
= -1;
2095 new_set
[0].code_max
= -1;
2100 static tre_pos_and_tags_t
*
2101 tre_set_one(tre_mem_t mem
, int position
, int code_min
, int code_max
,
2102 tre_bracket_match_list_t
*bracket_match_list
, int backref
)
2104 tre_pos_and_tags_t
*new_set
;
2106 new_set
= tre_mem_calloc(mem
, sizeof(*new_set
) * 2);
2107 if (new_set
== NULL
)
2110 new_set
[0].position
= position
;
2111 new_set
[0].code_min
= code_min
;
2112 new_set
[0].code_max
= code_max
;
2113 new_set
[0].bracket_match_list
= bracket_match_list
;
2114 new_set
[0].backref
= backref
;
2115 new_set
[1].position
= -1;
2116 new_set
[1].code_min
= -1;
2117 new_set
[1].code_max
= -1;
2122 static tre_pos_and_tags_t
*
2123 tre_set_union(tre_mem_t mem
, tre_pos_and_tags_t
*set1
, tre_pos_and_tags_t
*set2
,
2124 int *tags
, int assertions
, int *params
)
2127 tre_pos_and_tags_t
*new_set
;
2131 for (num_tags
= 0; tags
!= NULL
&& tags
[num_tags
] >= 0; num_tags
++);
2132 for (s1
= 0; set1
[s1
].position
>= 0; s1
++);
2133 for (s2
= 0; set2
[s2
].position
>= 0; s2
++);
2134 new_set
= tre_mem_calloc(mem
, sizeof(*new_set
) * (s1
+ s2
+ 1));
2138 for (s1
= 0; set1
[s1
].position
>= 0; s1
++)
2140 new_set
[s1
].position
= set1
[s1
].position
;
2141 new_set
[s1
].code_min
= set1
[s1
].code_min
;
2142 new_set
[s1
].code_max
= set1
[s1
].code_max
;
2143 new_set
[s1
].assertions
= set1
[s1
].assertions
| assertions
;
2144 new_set
[s1
].bracket_match_list
= set1
[s1
].bracket_match_list
;
2145 new_set
[s1
].backref
= set1
[s1
].backref
;
2146 if (set1
[s1
].tags
== NULL
&& tags
== NULL
)
2147 new_set
[s1
].tags
= NULL
;
2150 for (i
= 0; set1
[s1
].tags
!= NULL
&& set1
[s1
].tags
[i
] >= 0; i
++);
2151 new_tags
= tre_mem_alloc(mem
, (sizeof(*new_tags
)
2152 * (i
+ num_tags
+ 1)));
2153 if (new_tags
== NULL
)
2155 for (j
= 0; j
< i
; j
++)
2156 new_tags
[j
] = set1
[s1
].tags
[j
];
2157 for (i
= 0; i
< num_tags
; i
++)
2158 new_tags
[j
+ i
] = tags
[i
];
2159 new_tags
[j
+ i
] = -1;
2160 new_set
[s1
].tags
= new_tags
;
2162 if (set1
[s1
].params
)
2163 new_set
[s1
].params
= set1
[s1
].params
;
2166 if (!new_set
[s1
].params
)
2167 new_set
[s1
].params
= params
;
2170 new_set
[s1
].params
= tre_mem_alloc(mem
, sizeof(*params
) *
2172 if (!new_set
[s1
].params
)
2174 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2175 if (params
[i
] != TRE_PARAM_UNSET
)
2176 new_set
[s1
].params
[i
] = params
[i
];
2181 for (s2
= 0; set2
[s2
].position
>= 0; s2
++)
2183 new_set
[s1
+ s2
].position
= set2
[s2
].position
;
2184 new_set
[s1
+ s2
].code_min
= set2
[s2
].code_min
;
2185 new_set
[s1
+ s2
].code_max
= set2
[s2
].code_max
;
2186 /* XXX - why not | assertions here as well? */
2187 new_set
[s1
+ s2
].assertions
= set2
[s2
].assertions
;
2188 new_set
[s1
+ s2
].bracket_match_list
= set2
[s2
].bracket_match_list
;
2189 new_set
[s1
+ s2
].backref
= set2
[s2
].backref
;
2190 if (set2
[s2
].tags
== NULL
)
2191 new_set
[s1
+ s2
].tags
= NULL
;
2194 for (i
= 0; set2
[s2
].tags
[i
] >= 0; i
++);
2195 new_tags
= tre_mem_alloc(mem
, sizeof(*new_tags
) * (i
+ 1));
2196 if (new_tags
== NULL
)
2198 for (j
= 0; j
< i
; j
++)
2199 new_tags
[j
] = set2
[s2
].tags
[j
];
2201 new_set
[s1
+ s2
].tags
= new_tags
;
2203 if (set2
[s2
].params
)
2204 new_set
[s1
+ s2
].params
= set2
[s2
].params
;
2207 if (!new_set
[s1
+ s2
].params
)
2208 new_set
[s1
+ s2
].params
= params
;
2211 new_set
[s1
+ s2
].params
= tre_mem_alloc(mem
, sizeof(*params
) *
2213 if (!new_set
[s1
+ s2
].params
)
2215 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2216 if (params
[i
] != TRE_PARAM_UNSET
)
2217 new_set
[s1
+ s2
].params
[i
] = params
[i
];
2221 new_set
[s1
+ s2
].position
= -1;
2225 /* Finds the empty path through `node' which is the one that should be
2226 taken according to POSIX.2 rules, and adds the tags on that path to
2227 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2228 set to the number of tags seen on the path. */
2229 static reg_errcode_t
2230 tre_match_empty(tre_stack_t
*stack
, tre_ast_node_t
*node
, int *tags
,
2231 int *assertions
, int *params
, int *num_tags_seen
,
2236 tre_catenation_t
*cat
;
2237 tre_iteration_t
*iter
;
2239 int bottom
= tre_stack_num_objects(stack
);
2240 reg_errcode_t status
= REG_OK
;
2246 status
= tre_stack_push_voidptr(stack
, node
);
2248 /* Walk through the tree recursively. */
2249 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
2251 node
= tre_stack_pop_voidptr(stack
);
2256 lit
= (tre_literal_t
*)node
->obj
;
2257 switch (lit
->code_min
)
2260 if (lit
->code_max
>= 0)
2264 /* Add the tag to `tags'. */
2265 for (i
= 0; tags
[i
] >= 0; i
++)
2266 if (tags
[i
] == lit
->code_max
)
2270 tags
[i
] = lit
->code_max
;
2279 assert(lit
->code_max
>= 1
2280 || lit
->code_max
<= ASSERT_LAST
);
2281 if (assertions
!= NULL
)
2282 *assertions
|= lit
->code_max
;
2286 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2287 params
[i
] = lit
->u
.params
[i
];
2288 if (params_seen
!= NULL
)
2300 /* Subexpressions starting earlier take priority over ones
2301 starting later, so we prefer the left subexpression over the
2302 right subexpression. */
2303 uni
= (tre_union_t
*)node
->obj
;
2304 if (uni
->left
->nullable
)
2305 STACK_PUSHX(stack
, voidptr
, uni
->left
)
2306 else if (uni
->right
->nullable
)
2307 STACK_PUSHX(stack
, voidptr
, uni
->right
)
2313 /* The path must go through both children. */
2314 cat
= (tre_catenation_t
*)node
->obj
;
2315 assert(cat
->left
->nullable
);
2316 assert(cat
->right
->nullable
);
2317 STACK_PUSHX(stack
, voidptr
, cat
->left
);
2318 STACK_PUSHX(stack
, voidptr
, cat
->right
);
2322 /* A match with an empty string is preferred over no match at
2323 all, so we go through the argument if possible. */
2324 iter
= (tre_iteration_t
*)node
->obj
;
2325 if (iter
->arg
->nullable
)
2326 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
2342 NFL_POST_CATENATION
,
2344 } tre_nfl_stack_symbol_t
;
2347 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2348 the nodes of the AST `tree'. */
2349 static reg_errcode_t
2350 tre_compute_nfl(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*tree
)
2352 int bottom
= tre_stack_num_objects(stack
);
2354 STACK_PUSHR(stack
, voidptr
, tree
);
2355 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2357 while (tre_stack_num_objects(stack
) > bottom
)
2359 tre_nfl_stack_symbol_t symbol
;
2360 tre_ast_node_t
*node
;
2362 symbol
= (tre_nfl_stack_symbol_t
)tre_stack_pop_int(stack
);
2363 node
= tre_stack_pop_voidptr(stack
);
2371 tre_literal_t
*lit
= (tre_literal_t
*)node
->obj
;
2372 if (IS_BACKREF(lit
))
2374 /* Back references: nullable = false, firstpos = {i},
2377 node
->firstpos
= tre_set_one(mem
, lit
->position
, 0,
2378 TRE_CHAR_MAX
, NULL
, -1);
2379 if (!node
->firstpos
)
2381 node
->lastpos
= tre_set_one(mem
, lit
->position
, 0,
2383 (int)lit
->code_max
);
2387 else if (lit
->code_min
< 0)
2389 /* Tags, empty strings, params, and zero width assertions:
2390 nullable = true, firstpos = {}, and lastpos = {}. */
2392 node
->firstpos
= tre_set_empty(mem
);
2393 if (!node
->firstpos
)
2395 node
->lastpos
= tre_set_empty(mem
);
2401 /* Literal at position i: nullable = false, firstpos = {i},
2405 tre_set_one(mem
, lit
->position
, (int)lit
->code_min
,
2406 (int)lit
->code_max
, NULL
, -1);
2407 if (!node
->firstpos
)
2409 node
->lastpos
= tre_set_one(mem
, lit
->position
,
2412 lit
->u
.bracket_match_list
,
2421 /* Compute the attributes for the two subtrees, and after that
2423 STACK_PUSHR(stack
, voidptr
, node
);
2424 STACK_PUSHR(stack
, int, NFL_POST_UNION
);
2425 STACK_PUSHR(stack
, voidptr
, ((tre_union_t
*)node
->obj
)->right
);
2426 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2427 STACK_PUSHR(stack
, voidptr
, ((tre_union_t
*)node
->obj
)->left
);
2428 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2432 /* Compute the attributes for the two subtrees, and after that
2434 STACK_PUSHR(stack
, voidptr
, node
);
2435 STACK_PUSHR(stack
, int, NFL_POST_CATENATION
);
2436 STACK_PUSHR(stack
, voidptr
, ((tre_catenation_t
*)node
->obj
)->right
);
2437 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2438 STACK_PUSHR(stack
, voidptr
, ((tre_catenation_t
*)node
->obj
)->left
);
2439 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2443 /* Compute the attributes for the subtree, and after that for
2445 STACK_PUSHR(stack
, voidptr
, node
);
2446 STACK_PUSHR(stack
, int, NFL_POST_ITERATION
);
2447 STACK_PUSHR(stack
, voidptr
, ((tre_iteration_t
*)node
->obj
)->arg
);
2448 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2451 break; /* end case: NFL_RECURSE */
2453 case NFL_POST_UNION
:
2455 tre_union_t
*uni
= (tre_union_t
*)node
->obj
;
2456 node
->nullable
= uni
->left
->nullable
|| uni
->right
->nullable
;
2457 node
->firstpos
= tre_set_union(mem
, uni
->left
->firstpos
,
2458 uni
->right
->firstpos
, NULL
, 0, NULL
);
2459 if (!node
->firstpos
)
2461 node
->lastpos
= tre_set_union(mem
, uni
->left
->lastpos
,
2462 uni
->right
->lastpos
, NULL
, 0, NULL
);
2468 case NFL_POST_ITERATION
:
2470 int num_tags
, *tags
, assertions
, params_seen
;
2472 reg_errcode_t status
;
2473 tre_iteration_t
*iter
= (tre_iteration_t
*)node
->obj
;
2475 /* From Ville Laurikari's original 2001 Master's thesis, the
2476 firstpos(n) and lastpos(n) of an iteration is just the
2477 corresponding values of the iteration's argument. Unfortunately,
2478 this isn't sufficient for the following BRE:
2480 \(a*\)*b\(\1\) matched against ab
2482 The backreference wants to force the first subexpression to
2483 be the empty string, but there is no transition for this. So
2484 we need to modify the lastpos(n) of an iteration to be the
2485 equivalent of that of catentation. Using the same notation as
2486 in the thesis, lastpos(n) is redefined as:
2488 if nullable(c1) then
2490 addtags(lastpos(c1),
2495 where c1 is the argument node. firstpos(n) remains the same. */
2497 /* Compute lastpos. */
2498 if (iter
->min
== 0 || iter
->arg
->nullable
)
2501 if (iter
->arg
->nullable
)
2503 /* The arg matches the empty string. Make a first pass
2504 with tre_match_empty() to get the number of tags and
2506 status
= tre_match_empty(stack
, iter
->arg
,
2507 NULL
, NULL
, NULL
, &num_tags
,
2509 if (status
!= REG_OK
)
2511 /* Allocate arrays for the tags and parameters. */
2512 tags
= xmalloc(sizeof(int) * (num_tags
+ 1));
2520 params
= tre_mem_alloc(mem
, sizeof(*params
)
2528 /* Second pass with tre_mach_empty() to get the list of
2529 tags and parameters. */
2530 status
= tre_match_empty(stack
, iter
->arg
, tags
,
2531 &assertions
, params
, NULL
, NULL
);
2532 if (status
!= REG_OK
)
2538 tre_set_union(mem
, iter
->arg
->lastpos
, iter
->arg
->lastpos
,
2539 tags
, assertions
, params
);
2545 node
->lastpos
= iter
->arg
->lastpos
;
2550 node
->lastpos
= iter
->arg
->lastpos
;
2552 node
->firstpos
= iter
->arg
->firstpos
;
2556 case NFL_POST_CATENATION
:
2558 int num_tags
, *tags
, assertions
, params_seen
;
2560 reg_errcode_t status
;
2561 tre_catenation_t
*cat
= node
->obj
;
2562 node
->nullable
= cat
->left
->nullable
&& cat
->right
->nullable
;
2564 /* Compute firstpos. */
2565 if (cat
->left
->nullable
)
2567 /* The left side matches the empty string. Make a first pass
2568 with tre_match_empty() to get the number of tags and
2570 status
= tre_match_empty(stack
, cat
->left
,
2571 NULL
, NULL
, NULL
, &num_tags
,
2573 if (status
!= REG_OK
)
2575 /* Allocate arrays for the tags and parameters. */
2576 tags
= xmalloc(sizeof(*tags
) * (num_tags
+ 1));
2584 params
= tre_mem_alloc(mem
, sizeof(*params
)
2592 /* Second pass with tre_mach_empty() to get the list of
2593 tags and parameters. */
2594 status
= tre_match_empty(stack
, cat
->left
, tags
,
2595 &assertions
, params
, NULL
, NULL
);
2596 if (status
!= REG_OK
)
2602 tre_set_union(mem
, cat
->right
->firstpos
, cat
->left
->firstpos
,
2603 tags
, assertions
, params
);
2605 if (!node
->firstpos
)
2610 node
->firstpos
= cat
->left
->firstpos
;
2613 /* Compute lastpos. */
2614 if (cat
->right
->nullable
)
2616 /* The right side matches the empty string. Make a first pass
2617 with tre_match_empty() to get the number of tags and
2619 status
= tre_match_empty(stack
, cat
->right
,
2620 NULL
, NULL
, NULL
, &num_tags
,
2622 if (status
!= REG_OK
)
2624 /* Allocate arrays for the tags and parameters. */
2625 tags
= xmalloc(sizeof(int) * (num_tags
+ 1));
2633 params
= tre_mem_alloc(mem
, sizeof(*params
)
2641 /* Second pass with tre_mach_empty() to get the list of
2642 tags and parameters. */
2643 status
= tre_match_empty(stack
, cat
->right
, tags
,
2644 &assertions
, params
, NULL
, NULL
);
2645 if (status
!= REG_OK
)
2651 tre_set_union(mem
, cat
->left
->lastpos
, cat
->right
->lastpos
,
2652 tags
, assertions
, params
);
2659 node
->lastpos
= cat
->right
->lastpos
;
2674 /* Adds a transition from each position in `p1' to each position in `p2'. */
2675 static reg_errcode_t
2676 tre_make_trans(tre_pos_and_tags_t
*p1
, tre_pos_and_tags_t
*p2
,
2677 tre_tnfa_transition_t
*transitions
,
2678 int *counts
, int *offs
)
2680 tre_pos_and_tags_t
*orig_p2
= p2
;
2681 tre_tnfa_transition_t
*trans
;
2682 int i
, j
, k
, l
, dup
, prev_p2_pos
;
2684 if (transitions
!= NULL
)
2685 while (p1
->position
>= 0)
2689 while (p2
->position
>= 0)
2691 /* Optimization: if this position was already handled, skip it. */
2692 if (p2
->position
== prev_p2_pos
)
2697 prev_p2_pos
= p2
->position
;
2698 /* Set `trans' to point to the next unused transition from
2699 position `p1->position'. */
2700 trans
= transitions
+ offs
[p1
->position
];
2701 while (trans
->state
!= NULL
)
2704 /* If we find a previous transition from `p1->position' to
2705 `p2->position', it is overwritten. This can happen only
2706 if there are nested loops in the regexp, like in "((a)*)*".
2707 In POSIX.2 repetition using the outer loop is always
2708 preferred over using the inner loop. Therefore the
2709 transition for the inner loop is useless and can be thrown
2711 /* XXX - The same position is used for all nodes in a bracket
2712 expression, so this optimization cannot be used (it will
2713 break bracket expressions) unless I figure out a way to
2715 if (trans
->state_id
== p2
->position
)
2724 if (trans
->state
== NULL
)
2725 (trans
+ 1)->state
= NULL
;
2726 /* Use the character ranges, assertions, etc. from `p1' for
2727 the transition from `p1' to `p2'. */
2728 trans
->code_min
= p1
->code_min
;
2729 trans
->code_max
= p1
->code_max
;
2730 trans
->state
= transitions
+ offs
[p2
->position
];
2731 trans
->state_id
= p2
->position
;
2732 trans
->assertions
= p1
->assertions
| p2
->assertions
2733 | (p1
->bracket_match_list
!= NULL
? ASSERT_BRACKET_MATCH
: 0);
2734 if (p1
->backref
>= 0)
2736 assert((trans
->assertions
& ASSERT_BRACKET_MATCH
) == 0);
2737 assert(p2
->backref
< 0);
2738 trans
->u
.backref
= p1
->backref
;
2739 trans
->assertions
|= ASSERT_BACKREF
;
2741 if (p1
->bracket_match_list
!= NULL
)
2743 trans
->u
.bracket_match_list
=
2744 xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1
->bracket_match_list
));
2745 if (trans
->u
.bracket_match_list
== NULL
)
2747 memcpy(trans
->u
.bracket_match_list
, p1
->bracket_match_list
,
2748 SIZEOF_BRACKET_MATCH_LIST(p1
->bracket_match_list
));
2751 /* Find out how many tags this transition has. */
2753 if (p1
->tags
!= NULL
)
2754 while(p1
->tags
[i
] >= 0)
2757 if (p2
->tags
!= NULL
)
2758 while(p2
->tags
[j
] >= 0)
2761 /* If we are overwriting a transition, free the old tag array. */
2762 if (trans
->tags
!= NULL
)
2766 /* If there were any tags, allocate an array and fill it. */
2769 trans
->tags
= xmalloc(sizeof(*trans
->tags
) * (i
+ j
+ 1));
2773 if (p1
->tags
!= NULL
)
2774 while(p1
->tags
[i
] >= 0)
2776 trans
->tags
[i
] = p1
->tags
[i
];
2781 if (p2
->tags
!= NULL
)
2782 while (p2
->tags
[j
] >= 0)
2784 /* Don't add duplicates. */
2786 for (k
= 0; k
< i
; k
++)
2787 if (trans
->tags
[k
] == p2
->tags
[j
])
2793 trans
->tags
[l
++] = p2
->tags
[j
];
2796 trans
->tags
[l
] = -1;
2799 /* Set the parameter array. If both `p2' and `p1' have same
2800 parameters, the values in `p2' override those in `p1'. */
2801 if (p1
->params
|| p2
->params
)
2804 trans
->params
= xmalloc(sizeof(*trans
->params
)
2808 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2810 trans
->params
[i
] = TRE_PARAM_UNSET
;
2811 if (p1
->params
&& p1
->params
[i
] != TRE_PARAM_UNSET
)
2812 trans
->params
[i
] = p1
->params
[i
];
2813 if (p2
->params
&& p2
->params
[i
] != TRE_PARAM_UNSET
)
2814 trans
->params
[i
] = p2
->params
[i
];
2820 xfree(trans
->params
);
2821 trans
->params
= NULL
;
2829 DPRINT((" %2d -> %2d on %3d", p1
->position
, p2
->position
,
2831 if (p1
->code_max
!= p1
->code_min
)
2832 DPRINT(("-%3d", p1
->code_max
));
2836 DPRINT((", tags ["));
2839 DPRINT(("%d", *tags
));
2846 if (trans
->assertions
)
2847 DPRINT((", assert %d", trans
->assertions
));
2848 if (trans
->assertions
& ASSERT_BACKREF
)
2849 DPRINT((", backref %d", trans
->u
.backref
));
2850 else if (trans
->assertions
& ASSERT_BRACKET_MATCH
)
2851 DPRINT((", bracket_match_list %p",
2852 trans
->u
.bracket_match_list
));
2856 tre_print_params(trans
->params
);
2860 #endif /* TRE_DEBUG */
2866 /* Compute a maximum limit for the number of transitions leaving
2868 while (p1
->position
>= 0)
2871 while (p2
->position
>= 0)
2873 counts
[p1
->position
]++;
2881 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2882 labelled with one character range (there are no transitions on empty
2883 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2885 static reg_errcode_t
2886 tre_ast_to_tnfa(tre_ast_node_t
*node
, tre_tnfa_transition_t
*transitions
,
2887 int *counts
, int *offs
)
2890 tre_catenation_t
*cat
;
2891 tre_iteration_t
*iter
;
2892 reg_errcode_t errcode
= REG_OK
;
2894 /* XXX - recurse using a stack!. */
2900 uni
= (tre_union_t
*)node
->obj
;
2901 errcode
= tre_ast_to_tnfa(uni
->left
, transitions
, counts
, offs
);
2902 if (errcode
!= REG_OK
)
2904 errcode
= tre_ast_to_tnfa(uni
->right
, transitions
, counts
, offs
);
2908 cat
= (tre_catenation_t
*)node
->obj
;
2909 /* Add a transition from each position in cat->left->lastpos
2910 to each position in cat->right->firstpos. */
2911 errcode
= tre_make_trans(cat
->left
->lastpos
, cat
->right
->firstpos
,
2912 transitions
, counts
, offs
);
2913 if (errcode
!= REG_OK
)
2915 errcode
= tre_ast_to_tnfa(cat
->left
, transitions
, counts
, offs
);
2916 if (errcode
!= REG_OK
)
2918 errcode
= tre_ast_to_tnfa(cat
->right
, transitions
, counts
, offs
);
2922 iter
= (tre_iteration_t
*)node
->obj
;
2923 assert(iter
->max
== -1 || iter
->max
== 1);
2925 if (iter
->max
== -1)
2927 assert(iter
->min
== 0 || iter
->min
== 1);
2928 /* Add a transition from each last position in the iterated
2929 expression to each first position. */
2930 errcode
= tre_make_trans(iter
->arg
->lastpos
, iter
->arg
->firstpos
,
2931 transitions
, counts
, offs
);
2932 if (errcode
!= REG_OK
)
2935 errcode
= tre_ast_to_tnfa(iter
->arg
, transitions
, counts
, offs
);
2942 #define ERROR_EXIT(err) \
2946 if (/*CONSTCOND*/1) \
2949 while (/*CONSTCOND*/0)
2953 tre_compile(regex_t
*preg
, const tre_char_t
*regex
, size_t n
, int cflags
,
2957 tre_ast_node_t
*tree
, *tmp_ast_l
, *tmp_ast_r
;
2958 tre_pos_and_tags_t
*p
;
2959 int *counts
= NULL
, *offs
= NULL
;
2961 tre_tnfa_transition_t
*transitions
, *initial
;
2962 tre_tnfa_t
*tnfa
= NULL
;
2963 tre_submatch_data_t
*submatch_data
= NULL
;
2964 tre_tag_direction_t
*tag_directions
= NULL
;
2965 reg_errcode_t errcode
;
2968 /* Parse context. */
2969 tre_parse_ctx_t parse_ctx
;
2971 /* Allocate a stack used throughout the compilation process for various
2973 stack
= tre_stack_new(512, 10240, 128);
2976 /* Allocate a fast memory allocator. */
2977 mem
= tre_mem_new();
2980 tre_stack_destroy(stack
);
2984 /* Parse the regexp. */
2985 memset(&parse_ctx
, 0, sizeof(parse_ctx
));
2986 parse_ctx
.mem
= mem
;
2987 parse_ctx
.stack
= stack
;
2988 parse_ctx
.re
= regex
;
2990 /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2992 if ((cflags
& (REG_ENHANCED
| REG_EXTENDED
)) != (REG_ENHANCED
| REG_EXTENDED
))
2993 cflags
&= ~REG_UNGREEDY
;
2994 parse_ctx
.cflags
= cflags
;
2995 parse_ctx
.max_backref
= -1;
2996 parse_ctx
.loc
= loc
;
2997 parse_ctx
.submatch_id_invisible
= SUBMATCH_ID_INVISIBLE_START
;
2999 DPRINT(("tre_compile: parsing '%.*" STRF
"'\n", (int)n
, regex
));
3000 errcode
= tre_parse(&parse_ctx
);
3001 if (errcode
!= REG_OK
)
3002 ERROR_EXIT(errcode
);
3003 preg
->re_nsub
= parse_ctx
.submatch_id
- 1;
3004 tree
= parse_ctx
.result
;
3006 /* Back references and approximate matching cannot currently be used
3007 in the same regexp. */
3008 if (parse_ctx
.max_backref
>= 0 && parse_ctx
.have_approx
)
3009 ERROR_EXIT(REG_BADPAT
);
3012 tre_ast_print(tree
);
3013 #endif /* TRE_DEBUG */
3015 /* Referring to nonexistent subexpressions is illegal. */
3016 if (parse_ctx
.max_backref
> (int)preg
->re_nsub
)
3017 ERROR_EXIT(REG_ESUBREG
);
3019 /* Allocate the TNFA struct. */
3020 tnfa
= xcalloc(1, sizeof(tre_tnfa_t
));
3022 ERROR_EXIT(REG_ESPACE
);
3023 tnfa
->have_backrefs
= parse_ctx
.max_backref
>= 0;
3024 tnfa
->have_approx
= parse_ctx
.have_approx
;
3025 tnfa
->num_submatches
= parse_ctx
.submatch_id
;
3026 tnfa
->num_submatches_invisible
= parse_ctx
.submatch_id_invisible
3027 - SUBMATCH_ID_INVISIBLE_START
;
3028 tnfa
->num_reorder_tags
= parse_ctx
.num_reorder_tags
;
3029 tnfa
->loc
= parse_ctx
.loc
;
3031 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
3032 regexp does not have back references, this can be skipped. */
3033 if (tnfa
->num_reorder_tags
> 0 || !(cflags
& REG_NOSUB
))
3035 DPRINT(("tre_compile: setting up tags\n"));
3037 /* Figure out how many tags we will need. */
3038 errcode
= tre_add_tags(NULL
, stack
, tree
, tnfa
);
3039 if (errcode
!= REG_OK
)
3040 ERROR_EXIT(errcode
);
3042 tre_ast_print(tree
);
3043 #endif /* TRE_DEBUG */
3045 if (tnfa
->num_tags
> 0)
3047 tag_directions
= xmalloc(sizeof(*tag_directions
)
3048 * (tnfa
->num_tags
+ 1));
3049 if (tag_directions
== NULL
)
3050 ERROR_EXIT(REG_ESPACE
);
3051 tnfa
->tag_directions
= tag_directions
;
3052 memset(tag_directions
, -1,
3053 sizeof(*tag_directions
) * (tnfa
->num_tags
+ 1));
3055 tnfa
->minimal_tags
= xcalloc((unsigned)tnfa
->num_tags
* 2 + 3,
3056 sizeof(*tnfa
->minimal_tags
));
3057 if (tnfa
->minimal_tags
== NULL
)
3058 ERROR_EXIT(REG_ESPACE
);
3060 submatch_data
= xcalloc((unsigned)parse_ctx
.submatch_id
,
3061 sizeof(*submatch_data
));
3062 if (submatch_data
== NULL
)
3063 ERROR_EXIT(REG_ESPACE
);
3064 /* Set the eo_tag value to -1 to indicate that that corresponding
3065 * submatch has not be completed yet */
3066 for (i
= 0; i
< parse_ctx
.submatch_id
; i
++)
3068 submatch_data
[i
].eo_tag
= -1;
3070 tnfa
->submatch_data
= submatch_data
;
3072 errcode
= tre_add_tags(mem
, stack
, tree
, tnfa
);
3073 if (errcode
!= REG_OK
)
3074 ERROR_EXIT(errcode
);
3077 for (i
= 0; i
< parse_ctx
.submatch_id
; i
++)
3078 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3079 i
, submatch_data
[i
].so_tag
, submatch_data
[i
].eo_tag
));
3080 for (i
= 0; i
<= tnfa
->num_tags
; i
++)
3081 DPRINT(("t%d is %s\n", i
, tag_dir_str
[tag_directions
[i
]]));
3082 #endif /* TRE_DEBUG */
3085 /* Expand iteration nodes. */
3086 errcode
= tre_expand_ast(mem
, stack
, tree
, &parse_ctx
.position
,
3087 tag_directions
, &tnfa
->params_depth
);
3088 if (errcode
!= REG_OK
)
3089 ERROR_EXIT(errcode
);
3091 /* Add a dummy node for the final state.
3092 XXX - For certain patterns this dummy node can be optimized away,
3093 for example "a*" or "ab*". Figure out a simple way to detect
3094 this possibility. */
3096 tmp_ast_r
= tre_ast_new_literal(mem
, 0, 0, parse_ctx
.position
++);
3097 if (tmp_ast_r
== NULL
)
3098 ERROR_EXIT(REG_ESPACE
);
3100 tree
= tre_ast_new_catenation(mem
, tmp_ast_l
, tmp_ast_r
);
3102 ERROR_EXIT(REG_ESPACE
);
3105 tre_ast_print(tree
);
3106 DPRINT(("Number of states: %d\n", parse_ctx
.position
));
3108 for (i
= 0; i
< parse_ctx
.submatch_id
; i
++)
3109 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3110 i
, submatch_data
[i
].so_tag
, submatch_data
[i
].eo_tag
));
3112 for (i
= 0; i
<= tnfa
->num_tags
; i
++)
3113 DPRINT(("t%d is %s\n", i
, tag_dir_str
[tag_directions
[i
]]));
3114 #endif /* TRE_DEBUG */
3116 errcode
= tre_compute_nfl(mem
, stack
, tree
);
3117 if (errcode
!= REG_OK
)
3118 ERROR_EXIT(errcode
);
3120 counts
= xmalloc(sizeof(int) * parse_ctx
.position
);
3122 ERROR_EXIT(REG_ESPACE
);
3124 offs
= xmalloc(sizeof(int) * parse_ctx
.position
);
3126 ERROR_EXIT(REG_ESPACE
);
3128 for (i
= 0; i
< parse_ctx
.position
; i
++)
3130 tre_ast_to_tnfa(tree
, NULL
, counts
, NULL
);
3133 for (i
= 0; i
< parse_ctx
.position
; i
++)
3136 add
+= counts
[i
] + 1;
3139 transitions
= xcalloc((unsigned)add
+ 1, sizeof(*transitions
));
3140 if (transitions
== NULL
)
3141 ERROR_EXIT(REG_ESPACE
);
3142 tnfa
->transitions
= transitions
;
3143 tnfa
->num_transitions
= add
;
3145 DPRINT(("Converting to TNFA:\n"));
3146 errcode
= tre_ast_to_tnfa(tree
, transitions
, counts
, offs
);
3147 if (errcode
!= REG_OK
)
3148 ERROR_EXIT(errcode
);
3150 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3151 /* If in eight bit mode, compute a table of characters that can be the
3152 first character of a match. */
3153 tnfa
->first_char
= -1;
3154 if (TRE_MB_CUR_MAX_L(tnfa
->loc
) == 1 && !tmp_ast_l
->nullable
)
3158 DPRINT(("Characters that can start a match:"));
3159 tnfa
->firstpos_chars
= xcalloc(256, sizeof(char));
3160 if (tnfa
->firstpos_chars
== NULL
)
3161 ERROR_EXIT(REG_ESPACE
);
3162 for (p
= tree
->firstpos
; p
->position
>= 0; p
++)
3164 tre_tnfa_transition_t
*j
= transitions
+ offs
[p
->position
];
3165 while (j
->state
!= NULL
)
3167 for (k
= j
->code_min
; k
<= j
->code_max
&& k
< 256; k
++)
3170 tnfa
->firstpos_chars
[k
] = 1;
3177 #define TRE_OPTIMIZE_FIRST_CHAR 1
3178 #if TRE_OPTIMIZE_FIRST_CHAR
3181 for (k
= 0; k
< 256; k
++)
3182 if (tnfa
->firstpos_chars
[k
])
3184 DPRINT(("first char must be %d\n", k
));
3185 tnfa
->first_char
= k
;
3186 xfree(tnfa
->firstpos_chars
);
3187 tnfa
->firstpos_chars
= NULL
;
3195 tnfa
->firstpos_chars
= NULL
;
3196 #else /* !USE_FIRSTPOS_CHARS */
3198 /* Set first_char only if there is only one character that can be the
3199 first character of a match */
3200 tnfa
->first_char
= -1;
3201 if (!tmp_ast_l
->nullable
)
3204 for (p
= tree
->firstpos
; scanning
&& p
->position
>= 0; p
++)
3206 tre_tnfa_transition_t
*j
= transitions
+ offs
[p
->position
];
3207 while (j
->state
!= NULL
)
3209 if (j
->code_min
<= j
->code_max
)
3211 if (j
->code_max
!= j
->code_min
|| j
->code_min
== -1 || tnfa
->first_char
!= -1)
3213 tnfa
->first_char
= -1;
3217 tnfa
->first_char
= j
->code_min
;
3223 if (tnfa
->first_char
>= 0)
3224 DPRINT(("first char must be %d\n", tnfa
->first_char
));
3225 #endif /* TRE_DEBUG */
3227 #endif /* !USE_FIRSTPOS_CHARS */
3231 while (p
->position
>= 0)
3238 DPRINT(("initial: %d", p
->position
));
3246 DPRINT(("%d", *tags
));
3252 DPRINT((", assert %d", p
->assertions
));
3256 tre_print_params(p
->params
);
3260 #endif /* TRE_DEBUG */
3265 initial
= xcalloc((unsigned)i
+ 1, sizeof(tre_tnfa_transition_t
));
3266 if (initial
== NULL
)
3267 ERROR_EXIT(REG_ESPACE
);
3268 tnfa
->initial
= initial
;
3271 for (p
= tree
->firstpos
; p
->position
>= 0; p
++)
3273 initial
[i
].state
= transitions
+ offs
[p
->position
];
3274 initial
[i
].state_id
= p
->position
;
3275 initial
[i
].tags
= NULL
;
3276 /* Copy the arrays p->tags, and p->params, they are allocated
3277 from a tre_mem object. */
3281 for (j
= 0; p
->tags
[j
] >= 0; j
++);
3282 initial
[i
].tags
= xmalloc(sizeof(*p
->tags
) * (j
+ 1));
3283 if (!initial
[i
].tags
)
3284 ERROR_EXIT(REG_ESPACE
);
3285 memcpy(initial
[i
].tags
, p
->tags
, sizeof(*p
->tags
) * (j
+ 1));
3287 initial
[i
].params
= NULL
;
3290 initial
[i
].params
= xmalloc(sizeof(*p
->params
) * TRE_PARAM_LAST
);
3291 if (!initial
[i
].params
)
3292 ERROR_EXIT(REG_ESPACE
);
3293 memcpy(initial
[i
].params
, p
->params
,
3294 sizeof(*p
->params
) * TRE_PARAM_LAST
);
3296 initial
[i
].assertions
= p
->assertions
;
3299 initial
[i
].state
= NULL
;
3301 tnfa
->num_transitions
= add
;
3302 tnfa
->final
= transitions
+ offs
[tree
->lastpos
[0].position
];
3303 tnfa
->num_states
= parse_ctx
.position
;
3304 tnfa
->cflags
= cflags
;
3306 DPRINT(("final state %d (%p)\n", tree
->lastpos
[0].position
,
3307 (void *)tnfa
->final
));
3309 tre_mem_destroy(mem
);
3310 tre_stack_destroy(stack
);
3314 #ifdef TRE_USE_SYSTEM_REGEX_H
3315 preg
->re_magic
= RE_MAGIC
;
3316 #endif /* TRE_USE_SYSTEM_REGEX_H */
3317 preg
->TRE_REGEX_T_FIELD
= (void *)tnfa
;
3319 /* In Libc, we need to retain the locale. Outside Libc, we already called
3320 duplocale() which does the retaining. */
3321 XL_RETAIN(tnfa
->loc
);
3322 #endif /* __LIBC__ */
3326 /* Free everything that was allocated and return the error code. */
3327 tre_mem_destroy(mem
);
3329 tre_stack_destroy(stack
);
3335 /* Set tnfa into preg, so that calling tre_free() will free the contents
3336 of tnfa. But in Libc, NULL out the loc field since we never retained
3337 the locale. Outside Libc, we let tre_free() call freelocale(). */
3338 preg
->TRE_REGEX_T_FIELD
= (void *)tnfa
;
3340 if(tnfa
) tnfa
->loc
= NULL
;
3341 #endif /* __LIBC__ */
3351 tre_free(regex_t
*preg
)
3355 tre_tnfa_transition_t
*trans
;
3357 #ifdef TRE_USE_SYSTEM_REGEX_H
3359 #endif /* TRE_USE_SYSTEM_REGEX_H */
3360 tnfa
= (void *)preg
->TRE_REGEX_T_FIELD
;
3363 preg
->TRE_REGEX_T_FIELD
= NULL
;
3365 for (i
= 0; i
< tnfa
->num_transitions
; i
++)
3366 if (tnfa
->transitions
[i
].state
)
3368 if (tnfa
->transitions
[i
].tags
)
3369 xfree(tnfa
->transitions
[i
].tags
);
3370 if (tnfa
->transitions
[i
].assertions
& ASSERT_BRACKET_MATCH
)
3371 xfree(tnfa
->transitions
[i
].u
.bracket_match_list
);
3372 if (tnfa
->transitions
[i
].params
)
3373 xfree(tnfa
->transitions
[i
].params
);
3375 if (tnfa
->transitions
)
3376 xfree(tnfa
->transitions
);
3380 for (trans
= tnfa
->initial
; trans
->state
; trans
++)
3385 xfree(trans
->params
);
3387 xfree(tnfa
->initial
);
3390 if (tnfa
->submatch_data
)
3392 xfree(tnfa
->submatch_data
);
3395 if (tnfa
->tag_directions
)
3396 xfree(tnfa
->tag_directions
);
3397 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3398 if (tnfa
->firstpos_chars
)
3399 xfree(tnfa
->firstpos_chars
);
3400 #endif /* USE_FIRSTPOS_CHARS */
3401 if (tnfa
->minimal_tags
)
3402 xfree(tnfa
->minimal_tags
);
3406 XL_RELEASE(tnfa
->loc
);
3407 #else /* !__LIBC__ */
3408 freelocale(tnfa
->loc
);
3409 #endif /* !__LIBC__ */
3411 if (tnfa
->last_matched_branch
)
3412 xfree(tnfa
->last_matched_branch
);
3421 static char str
[256];
3426 (void) tre_config(TRE_CONFIG_VERSION
, &version
);
3427 (void) snprintf(str
, sizeof(str
), "TRE %s (BSD)", version
);
3433 tre_config(int query
, void *result
)
3435 int *int_result
= result
;
3436 const char **string_result
= result
;
3440 case TRE_CONFIG_APPROX
:
3443 #else /* !TRE_APPROX */
3445 #endif /* !TRE_APPROX */
3448 case TRE_CONFIG_WCHAR
:
3451 #else /* !TRE_WCHAR */
3453 #endif /* !TRE_WCHAR */
3456 case TRE_CONFIG_MULTIBYTE
:
3457 #ifdef TRE_MULTIBYTE
3459 #else /* !TRE_MULTIBYTE */
3461 #endif /* !TRE_MULTIBYTE */
3464 case TRE_CONFIG_SYSTEM_ABI
:
3465 #ifdef TRE_CONFIG_SYSTEM_ABI
3467 #else /* !TRE_CONFIG_SYSTEM_ABI */
3469 #endif /* !TRE_CONFIG_SYSTEM_ABI */
3472 case TRE_CONFIG_VERSION
:
3473 *string_result
= TRE_VERSION
;
3479 #endif /* !__LIBC__ */
3481 #pragma clang diagnostic push