2 tre-compile.c - TRE regex compiler
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
11 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
18 #endif /* HAVE_CONFIG_H */
24 #include "tre-internal.h"
26 #include "tre-stack.h"
28 #include "tre-parse.h"
29 #include "tre-compile.h"
31 #include "tre-last-matched.h"
35 The bit_ffs() macro in bitstring.h is flawed. Replace it with a working one.
38 #define bit_ffs(name, nbits, value) { \
39 register bitstr_t *_name = name; \
40 register int _byte, _nbits = nbits; \
41 register int _stopbyte = _bit_byte(_nbits), _value = -1; \
42 for (_byte = 0; _byte <= _stopbyte; ++_byte) \
44 _value = _byte << 3; \
45 for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
46 ++_value, _stopbyte >>= 1); \
53 Algorithms to setup tags so that submatch addressing can be done.
58 static const char *tag_dir_str
[] = {
64 static const char _indent
[] = " ";
67 print_indent(int indent
)
73 static void print_last_matched_pre(tre_last_matched_pre_t
*lm
, int indent
,
76 print_last_match_branch_pre(tre_last_matched_branch_pre_t
*branch
, int indent
,
79 tre_last_matched_pre_t
*u
= branch
->last_matched
;
80 int n_last_matched
= 0;
89 DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
90 branch
->tot_branches
, branch
->tot_last_matched
, branch
->tot_tags
));
92 DPRINT(("..n_last_matched=%d last_matched=%d\n", branch
->n_last_matched
,
94 if (branch
->n_last_matched
!= n_last_matched
)
95 DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
96 if (branch
->cmp_tag
> 0)
99 const char *sep
= " tags=";
100 print_indent(indent
);
101 DPRINT(("..cmp_tag=%d n_tags=%d", branch
->cmp_tag
, branch
->n_tags
));
102 for (i
= 0; i
< num_tags
; i
++)
103 if (bit_test(branch
->tags
, i
))
105 DPRINT(("%s%d", sep
, i
));
111 u
= branch
->last_matched
;
115 print_last_matched_pre(u
, indent
, num_tags
);
121 print_last_matched_pre(tre_last_matched_pre_t
*lm
, int indent
, int num_tags
)
123 tre_last_matched_branch_pre_t
*b
= lm
->branches
;
132 print_indent(indent
);
133 DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
134 lm
->tot_branches
, lm
->tot_last_matched
, lm
->tot_tags
));
135 print_indent(indent
);
136 DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm
->start_tag
,
137 lm
->n_branches
, n_branches
));
138 if (lm
->n_branches
!= n_branches
)
139 DPRINT(("*** mismatch between n and branches ***\n"));
145 print_last_match_branch_pre(b
, indent
, num_tags
);
150 static void print_last_matched(tre_last_matched_t
*lm
, int indent
);
152 print_last_match_branch(tre_last_matched_branch_t
*branch
, int indent
)
154 tre_last_matched_t
*u
;
157 print_indent(indent
);
158 DPRINT(("BRANCH: n_last_matched=%d\n", branch
->n_last_matched
));
159 if (branch
->cmp_tag
> 0)
161 print_indent(indent
);
162 DPRINT(("..cmp_tag=%d n_tags=%d", branch
->cmp_tag
, branch
->n_tags
));
163 if (branch
->n_tags
> 0)
165 const char *sep
= " tags=";
166 for (i
= 0; i
< branch
->n_tags
; i
++)
168 DPRINT(("%s%d", sep
, branch
->tags
[i
]));
175 u
= branch
->last_matched
;
177 for (i
= branch
->n_last_matched
; i
> 0; i
--, u
++)
178 print_last_matched(u
, indent
);
182 print_last_matched(tre_last_matched_t
*lm
, int indent
)
185 tre_last_matched_branch_t
*b
;
187 print_indent(indent
);
188 DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm
->n_branches
,
193 for (i
= lm
->n_branches
; i
> 0; i
--, b
++)
194 print_last_match_branch(b
, indent
);
196 #endif /* TRE_DEBUG */
199 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
200 one if needed. If tag_id > 0, add that tag as well (a negative tag_id will
201 create an unset tre_last_matched_branch_pre_t. */
203 tre_merge_branches(tre_mem_t mem
, tre_ast_node_t
*dst
, tre_ast_node_t
*src
,
204 int tag_id
, int num_tags
)
206 tre_last_matched_branch_pre_t
*db
= dst
->last_matched_branch
;
207 tre_last_matched_branch_pre_t
*sb
= (src
? src
->last_matched_branch
: NULL
);
213 bitstr_t
*l
= db
->tags
;
214 bitstr_t
*r
= sb
->tags
;
215 int i
= bitstr_size(num_tags
);
219 /* db and sb are the info from two parallel sub-trees, so the tags
220 must be mutually exclusive, and we can just add their numbers */
221 db
->n_tags
+= sb
->n_tags
;
222 db
->tot_tags
+= sb
->tot_tags
;
223 if (db
->last_matched
)
225 if (sb
->last_matched
)
227 tre_last_matched_pre_t
*u
= db
->last_matched
;
231 u
->next
= sb
->last_matched
;
232 db
->n_last_matched
+= sb
->n_last_matched
;
233 db
->tot_branches
+= sb
->tot_branches
;
234 db
->tot_last_matched
+= sb
->tot_last_matched
;
237 else if (sb
->last_matched
)
239 db
->last_matched
= sb
->last_matched
;
240 db
->n_last_matched
= sb
->n_last_matched
;
241 db
->tot_branches
= sb
->tot_branches
;
242 db
->tot_last_matched
= sb
->tot_last_matched
;
253 db
= tre_mem_calloc(mem
, sizeof(tre_last_matched_branch_pre_t
)
254 + bitstr_size(num_tags
));
257 db
->tot_branches
= 1;
261 /* tag_id is a new tag, and shouldn't exist in db's tags,
262 so we can always increment n_tags */
263 bit_set(db
->tags
, tag_id
);
268 dst
->last_matched_branch
= db
;
273 /* Inserts a catenation node to the root of the tree given in `node'.
274 As the left child a new tag with number `tag_id' to `node' is added,
275 and the right child is the old root. */
277 tre_add_tag_left(tre_mem_t mem
, tre_ast_node_t
*node
, int tag_id
)
281 DPRINT(("add_tag_left: tag %d\n", tag_id
));
283 c
= tre_mem_alloc(mem
, sizeof(*c
));
286 c
->left
= tre_ast_new_literal(mem
, TAG
, tag_id
, -1);
289 c
->right
= tre_mem_calloc(mem
, sizeof(tre_ast_node_t
));
290 if (c
->right
== NULL
)
293 c
->right
->obj
= node
->obj
;
294 c
->right
->type
= node
->type
;
295 c
->right
->last_matched_branch
= node
->last_matched_branch
;
296 c
->right
->nullable
= -1;
297 c
->right
->submatch_id
= -1;
299 node
->type
= CATENATION
;
300 node
->original
= c
->right
;
304 /* Inserts a catenation node to the root of the tree given in `node'.
305 As the right child a new tag with number `tag_id' to `node' is added,
306 and the left child is the old root. */
308 tre_add_tag_right(tre_mem_t mem
, tre_ast_node_t
*node
, int tag_id
)
312 DPRINT(("tre_add_tag_right: tag %d\n", tag_id
));
314 c
= tre_mem_alloc(mem
, sizeof(*c
));
317 c
->right
= tre_ast_new_literal(mem
, TAG
, tag_id
, -1);
318 if (c
->right
== NULL
)
320 c
->left
= tre_mem_calloc(mem
, sizeof(tre_ast_node_t
));
324 c
->left
->obj
= node
->obj
;
325 c
->left
->type
= node
->type
;
326 c
->left
->last_matched_branch
= node
->last_matched_branch
;
327 c
->left
->nullable
= -1;
328 c
->left
->submatch_id
= -1;
330 node
->type
= CATENATION
;
331 node
->original
= c
->left
;
337 ADDTAGS_RECURSE_NOT_TOP_UNION
,
338 ADDTAGS_AFTER_ITERATION
,
339 ADDTAGS_AFTER_UNION_LEFT
,
340 ADDTAGS_AFTER_UNION_RIGHT
,
341 ADDTAGS_AFTER_CAT_LEFT
,
342 ADDTAGS_AFTER_CAT_RIGHT
,
343 ADDTAGS_SET_SUBMATCH_END
,
344 ADDTAGS_UNION_RECURSE
,
345 ADDTAGS_UNION_RIGHT_RECURSE
,
346 ADDTAGS_AFTER_UNION_TOP
,
347 } tre_addtags_symbol_t
;
350 COPY_LAST_MATCHED_BRANCH
,
351 COPY_LAST_MATCHED_BRANCH_NEXT
,
353 COPY_LAST_MATCHED_NEXT
,
357 #define REGSET_UNSET ((unsigned)-1)
359 /* Go through `regset' and set submatch data for submatches that are
362 tre_purge_regset(unsigned *regset
, tre_tnfa_t
*tnfa
, int tag
)
366 for (i
= 0; regset
[i
] != REGSET_UNSET
; i
++)
368 int id
= regset
[i
] / 2;
369 int start
= !(regset
[i
] % 2);
370 if (id
>= SUBMATCH_ID_INVISIBLE_START
)
372 DPRINT((" Using tag %d for %s offset of "
373 "submatch %d\n", tag
,
374 start
? "start" : "end", id
));
376 tnfa
->submatch_data
[id
].so_tag
= tag
;
378 tnfa
->submatch_data
[id
].eo_tag
= tag
;
384 #define REGSET_HAS_STARTS 0x1
385 #define REGSET_HAS_ENDS 0x2
388 /* Adds tags to appropriate locations in the parse tree in `tree', so that
389 subexpressions marked for submatch addressing can be traced. */
391 tre_add_tags(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*tree
,
394 reg_errcode_t status
= REG_OK
;
395 tre_addtags_symbol_t symbol
;
396 tre_ast_node_t
*node
= tree
; /* Tree node we are currently looking at. */
397 int bottom
= tre_stack_num_objects(stack
);
398 /* True for first pass (counting number of needed tags) */
399 int first_pass
= (mem
== NULL
|| tnfa
== NULL
);
400 unsigned *regset
, *orig_regset
;
401 int regset_contains
= 0;
402 int num_tags
= 0; /* Total number of tags. */
403 int num_minimals
= 0; /* Number of special minimal tags. */
404 int tag
= 0; /* The tag that is to be added next. */
405 int next_tag
= 1; /* Next tag to use after this one. */
406 int minimal_tag
= -1; /* Tag that marks the beginning of a minimal match. */
407 int *reorder_tags
= NULL
; /* Tag reorder array: a pair for each reorder,
408 * the first is the tag to reorder, the second
409 * is the tag after which the first is reordered */
410 int *rtp
; /* Pointer used to fill in reorder_tags and
412 int *to_reorder
; /* Transform array converting sequential order to
413 * that specified by reorder_tags */
416 tre_tag_direction_t direction
= TRE_TAG_LEFT_MAXIMIZE
;
419 DPRINT(("Initializing direction to %s\n", tag_dir_str
[direction
]));
421 tnfa
->minimal_tags
[0] = -1;
424 regset
= xmalloc(sizeof(*regset
) * ((tnfa
->num_submatches
425 + tnfa
->num_submatches_invisible
+ 1) * 2));
431 regset
[0] = REGSET_UNSET
;
432 orig_regset
= regset
;
436 /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
437 * to_reorder in one batch (assuming all are the same type) */
438 rtp
= reorder_tags
= xmalloc(sizeof(*reorder_tags
) *
439 ((2 * tnfa
->num_reorder_tags
+ 1) +
441 if (reorder_tags
== NULL
)
444 goto error_reorder_tags
;
446 to_reorder
= reorder_tags
+ (2 * tnfa
->num_reorder_tags
+ 1);
449 STACK_PUSH(stack
, voidptr
, node
);
450 STACK_PUSH(stack
, int, ADDTAGS_RECURSE
);
452 while (tre_stack_num_objects(stack
) > bottom
)
454 if (status
!= REG_OK
)
457 symbol
= (tre_addtags_symbol_t
)tre_stack_pop_int(stack
);
462 case ADDTAGS_SET_SUBMATCH_END
:
466 id
= tre_stack_pop_int(stack
);
467 node
= tre_stack_pop_voidptr(stack
);
468 /* Add end of this submatch to regset. */
469 for (i
= 0; regset
[i
] != REGSET_UNSET
; i
++);
470 regset
[i
] = id
* 2 + 1;
472 regset_contains
|= REGSET_HAS_ENDS
;
474 /* Always put a tag after a minimal iterator. */
475 if (minimal_tag
>= 0)
480 DPRINT((" ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
486 status
= tre_merge_branches(mem
, node
, NULL
, tag
,
488 if (status
!= REG_OK
)
490 status
= tre_add_tag_right(mem
, node
, tag
);
491 if (status
!= REG_OK
)
493 tnfa
->tag_directions
[tag
] = TRE_TAG_MINIMIZE
;
494 DPRINT(("Setting t%d direction to %s\n", tag
,
495 tag_dir_str
[tnfa
->tag_directions
[tag
]]));
496 DPRINT(("Minimal %d, %d\n", minimal_tag
, tag
));
497 for (i
= 0; tnfa
->minimal_tags
[i
] >= 0; i
++);
498 tnfa
->minimal_tags
[i
] = tag
;
499 tnfa
->minimal_tags
[i
+ 1] = minimal_tag
;
500 tnfa
->minimal_tags
[i
+ 2] = -1;
502 DPRINT((" Minimal end: t%d reordered to "
503 "after t%d\n", tag
, minimal_tag
));
504 /* Append to tag_order, move "tag" after
507 *rtp
++ = minimal_tag
;
510 tre_purge_regset(regset
, tnfa
, tag
);
514 DPRINT((" ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag
));
515 regset
[0] = REGSET_UNSET
;
524 case ADDTAGS_RECURSE_NOT_TOP_UNION
:
525 /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
526 * indicating that if a union is being processed, it is not the
527 * top-most of a series */
529 goto do_addtags_recurse
;
531 case ADDTAGS_RECURSE
:
532 /* Setting top_union to 1 means that if a union is begin processed,
533 * it is the top-most of a series, and should recurse through the
534 * series to set the left_tag and right_tag values */
538 node
= tre_stack_pop_voidptr(stack
);
540 id
= node
->submatch_id
;
546 /* Add start of this submatch to regset. */
547 for (i
= 0; regset
[i
] != REGSET_UNSET
; i
++);
550 regset_contains
|= REGSET_HAS_STARTS
;
552 /* Add end of this submatch to regset after processing this
554 STACK_PUSH(stack
, voidptr
, node
);
555 STACK_PUSHX(stack
, int, id
);
556 STACK_PUSHX(stack
, int, ADDTAGS_SET_SUBMATCH_END
);
563 tre_literal_t
*lit
= node
->obj
;
565 if (!IS_SPECIAL(lit
) || IS_BACKREF(lit
) || IS_EMPTY(lit
) || IS_ASSERTION(lit
))
567 DPRINT(("Literal %d-%d\n",
568 (int)lit
->code_min
, (int)lit
->code_max
));
571 /* Regset is not empty, so add a tag before the
572 literal or backref. */
575 DPRINT((" ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
580 status
= tre_merge_branches(mem
, node
, NULL
, tag
,
582 if (status
!= REG_OK
)
584 status
= tre_add_tag_left(mem
, node
, tag
);
585 if (status
!= REG_OK
)
587 if (regset_contains
== REGSET_HAS_STARTS
)
588 tnfa
->tag_directions
[tag
] = TRE_TAG_LEFT_MAXIMIZE
;
590 tnfa
->tag_directions
[tag
] = direction
;
591 DPRINT(("Setting t%d direction to %s\n", tag
,
592 tag_dir_str
[tnfa
->tag_directions
[tag
]]));
593 tre_purge_regset(regset
, tnfa
, tag
);
597 int b
= lit
->code_max
;
598 int t
= tnfa
->submatch_data
[b
].so_tag
;
599 /* Fail if the referenced submatch hasn't been
601 if (tnfa
->submatch_data
[b
].eo_tag
< 0)
603 status
= REG_ESUBREG
;
608 DPRINT((" Backref %d start: "
609 "t%d reordered to before t%d\n",
613 /* Append to tag_order, move "tag" after
620 DPRINT((" Backref %d start: "
621 "(t%d already before t%d)\n",
623 #endif /* TRE_DEBUG */
627 DPRINT((" ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
629 regset
[0] = REGSET_UNSET
;
638 assert(!IS_TAG(lit
));
644 tre_catenation_t
*cat
= node
->obj
;
645 tre_ast_node_t
*left
= cat
->left
;
646 tre_ast_node_t
*right
= cat
->right
;
647 int reserved_tag
= -1;
648 DPRINT(("Catenation, next_tag = %d\n", next_tag
));
651 /* After processing right child. */
652 STACK_PUSHX(stack
, voidptr
, node
);
653 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_CAT_RIGHT
);
655 /* Process right child. */
656 STACK_PUSHX(stack
, voidptr
, right
);
657 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE
);
659 /* After processing left child. */
660 STACK_PUSHX(stack
, int, next_tag
+ left
->num_tags
);
661 DPRINT((" Pushing %d for after left\n",
662 next_tag
+ left
->num_tags
));
663 if (left
->num_tags
> 0 && right
->num_tags
> 0)
665 /* Reserve the next tag to the right child. */
666 DPRINT((" ADDTAGS_RECURSE:CATENATION num_tags++ "
667 "Reserving next_tag %d to right child\n",
669 reserved_tag
= next_tag
;
672 STACK_PUSHX(stack
, int, reserved_tag
);
673 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_CAT_LEFT
);
675 /* Process left child. */
676 STACK_PUSHX(stack
, voidptr
, left
);
677 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE
);
683 tre_iteration_t
*iter
= node
->obj
;
684 DPRINT(("Iteration\n"));
687 STACK_PUSHX(stack
, int, regset_contains
!= 0);
688 STACK_PUSHX(stack
, int, tag
);
689 STACK_PUSHX(stack
, voidptr
, node
);
690 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_ITERATION
);
692 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
693 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE
);
695 /* Regset is not empty, so add a tag here (this always happens
696 because iterators always get submatch id, even if in the
702 status
= tre_merge_branches(mem
, node
, NULL
, tag
,
704 if (status
!= REG_OK
)
706 status
= tre_add_tag_left(mem
, node
, tag
);
707 if (status
!= REG_OK
)
709 if (regset_contains
== REGSET_HAS_STARTS
&& tag
!= 0)
710 tnfa
->tag_directions
[tag
] = iter
->minimal
?
712 TRE_TAG_LEFT_MAXIMIZE
;
714 tnfa
->tag_directions
[tag
] = direction
;
715 DPRINT(("Setting t%d direction to %s\n", tag
,
716 tag_dir_str
[tnfa
->tag_directions
[tag
]]));
717 tre_purge_regset(regset
, tnfa
, tag
);
720 DPRINT((" ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
722 regset
[0] = REGSET_UNSET
;
728 direction
= TRE_TAG_LEFT_MAXIMIZE
;
729 DPRINT((" Setting direction to %s\n", tag_dir_str
[direction
]));
735 tre_ast_node_t
*left
;
736 tre_ast_node_t
*right
;
743 DPRINT((" UNION num_tags++ tag=%d\n", tag
));
750 /* For the top union, walk the tree of consecutive unions,
751 * setting the left_tag and right_tag values in increasing
752 * order (left to right priority) */
754 (node
->num_submatches
-
755 (node
->submatch_id
>= 0 &&
756 node
->submatch_id
< SUBMATCH_ID_INVISIBLE_START
)) > 0)
759 int last
= tre_stack_num_objects(stack
);
761 STACK_PUSH(stack
, voidptr
, node
);
762 STACK_PUSH(stack
, int, ADDTAGS_UNION_RECURSE
);
764 while (tre_stack_num_objects(stack
) > last
)
766 symbol
= (tre_addtags_symbol_t
)tre_stack_pop_int(stack
);
769 case ADDTAGS_UNION_RECURSE
:
770 n
= tre_stack_pop_voidptr(stack
);
774 /* Since the top union has num_submatches > 0,
775 * we set all the consecutive union's
776 * make_branches to 1 to force the generation
777 * of end tags for each union branch. */
778 n
->make_branches
= 1;
780 STACK_PUSH(stack
, voidptr
, n
);
781 STACK_PUSH(stack
, int,
782 ADDTAGS_UNION_RIGHT_RECURSE
);
784 if (left
->type
== UNION
)
786 STACK_PUSH(stack
, voidptr
, left
);
787 STACK_PUSH(stack
, int,
788 ADDTAGS_UNION_RECURSE
);
792 DPRINT((" ADDTAGS_UNION_RECURSE "
793 "num_tags++ tag=%d\n", tag
));
801 case ADDTAGS_UNION_RIGHT_RECURSE
:
802 n
= tre_stack_pop_voidptr(stack
);
806 if (right
->type
== UNION
)
808 STACK_PUSH(stack
, voidptr
, right
);
809 STACK_PUSH(stack
, int,
810 ADDTAGS_UNION_RECURSE
);
814 DPRINT((" ADDTAGS_UNION_RIGHT_RECURSE "
815 "num_tags++ tag=%d\n", tag
));
816 uni
->right_tag
= tag
;
828 } /* end switch(symbol) */
829 } /* end while(tre_stack_num_objects(stack) > last */
832 STACK_PUSHX(stack
, int, front_tag
);
833 STACK_PUSHX(stack
, voidptr
, node
);
834 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_UNION_TOP
);
836 } /* end if (top_union && ...) */
842 /* After processing right child. */
843 STACK_PUSHX(stack
, voidptr
, regset
);
844 STACK_PUSHX(stack
, int, regset_contains
!= 0);
845 STACK_PUSHX(stack
, voidptr
, node
);
846 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_UNION_RIGHT
);
848 /* Process right child. */
849 STACK_PUSHX(stack
, voidptr
, right
);
850 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE_NOT_TOP_UNION
);
852 /* After processing left child. */
853 STACK_PUSHX(stack
, int, ADDTAGS_AFTER_UNION_LEFT
);
855 /* Process left child. */
856 STACK_PUSHX(stack
, voidptr
, left
);
857 STACK_PUSHX(stack
, int, ADDTAGS_RECURSE_NOT_TOP_UNION
);
859 /* Regset is not empty, so add a tag here. */
864 status
= tre_merge_branches(mem
, node
, NULL
, front_tag
,
866 if (status
!= REG_OK
)
868 status
= tre_add_tag_left(mem
, node
, front_tag
);
869 if (status
!= REG_OK
)
871 if (regset_contains
== REGSET_HAS_STARTS
)
872 tnfa
->tag_directions
[front_tag
] = TRE_TAG_LEFT_MAXIMIZE
;
874 tnfa
->tag_directions
[front_tag
] = direction
;
875 DPRINT(("Setting t%d direction to %s\n", front_tag
,
876 tag_dir_str
[tnfa
->tag_directions
[front_tag
]]));
877 tre_purge_regset(regset
, tnfa
, front_tag
);
880 regset
[0] = REGSET_UNSET
;
886 } /* end switch (node->type) */
888 break; /* end case: ADDTAGS_RECURSE */
890 case ADDTAGS_AFTER_ITERATION
:
892 tre_iteration_t
*iter
;
893 tre_ast_node_t
*orig
;
896 node
= tre_stack_pop_voidptr(stack
);
897 orig
= node
->original
? node
->original
: node
;
898 iter
= (tre_iteration_t
*)orig
->obj
;
899 enter_tag
= tre_stack_pop_int(stack
);
901 minimal_tag
= enter_tag
;
903 DPRINT(("After iteration\n"));
906 node
->num_tags
= iter
->arg
->num_tags
+ tre_stack_pop_int(stack
);
907 DPRINT((" ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
912 /* node->last_matched_branch will have the start tag (the tag
913 just *before* the iteration). iter->arg->last_matched_branch
914 will have the tag(s) inside the iteration, the ones that
915 may need to be reset if the iteration doesn't match. So
916 before we merge iter->arg into node, we need to set up
917 a new tre_last_matched_t and tre_last_matched_branch_t,
918 using any of the inside tags as cmp_tag (we choose the first
919 tag found by bit_ffs). If there are no inside tags, we
920 don't bother creating the extra structures. */
921 tre_last_matched_branch_pre_t
*b
=
922 iter
->arg
->last_matched_branch
;
924 if (b
&& b
->n_tags
> 0)
926 tre_last_matched_pre_t
*u
;
928 bit_ffs(b
->tags
, num_tags
, &b
->cmp_tag
);
929 DPRINT((" ADDTAGS_AFTER_ITERATION: n_tags=%d "
930 "cmp_tag = %d\n", b
->n_tags
, b
->cmp_tag
));
932 u
= tre_mem_calloc(mem
, sizeof(tre_last_matched_pre_t
) +
933 sizeof(tre_last_matched_branch_pre_t
)
934 + bitstr_size(tnfa
->num_tags
));
942 u
->start_tag
= b
->cmp_tag
;
943 u
->tot_branches
= b
->tot_branches
;
944 u
->tot_last_matched
= 1 + b
->tot_last_matched
;
945 u
->tot_tags
= b
->tot_tags
;
947 b
= (tre_last_matched_branch_pre_t
*)(u
+ 1);
949 b
->n_last_matched
= 1;
950 b
->tot_branches
= 1 + u
->tot_branches
;
951 b
->tot_last_matched
= u
->tot_last_matched
;
952 b
->tot_tags
= u
->tot_tags
;
954 iter
->arg
->last_matched_branch
= b
;
956 status
= tre_merge_branches(mem
, node
, iter
->arg
, 0,
958 if (status
!= REG_OK
)
963 /* Add a union with a left EMPTY literal and the right
964 being iter->arg. This should force the tags inside
965 the minimal iteration to prefer being unset */
966 if (iter
->min
== 0 && iter
->max
<= 1)
968 tre_ast_node_t
*u
, *e
;
970 e
= tre_ast_new_literal(mem
, EMPTY
, -1, -1);
976 u
= tre_ast_new_union(mem
, e
, iter
->arg
);
985 direction
= TRE_TAG_MINIMIZE
;
988 direction
= TRE_TAG_MAXIMIZE
;
989 DPRINT((" Setting direction to %s\n", tag_dir_str
[direction
]));
994 case ADDTAGS_AFTER_CAT_LEFT
:
996 int new_tag
= tre_stack_pop_int(stack
);
997 next_tag
= tre_stack_pop_int(stack
);
998 DPRINT(("After cat left, tag = %d, next_tag = %d\n",
1002 DPRINT((" Setting tag to %d\n", new_tag
));
1008 case ADDTAGS_AFTER_CAT_RIGHT
:
1010 tre_catenation_t
*cat
;
1012 DPRINT(("After cat right\n"));
1013 node
= tre_stack_pop_voidptr(stack
);
1017 node
->num_tags
= cat
->left
->num_tags
+ cat
->right
->num_tags
;
1018 DPRINT((" ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1023 status
= tre_merge_branches(mem
, cat
->left
, cat
->right
, 0,
1025 if (status
!= REG_OK
)
1027 status
= tre_merge_branches(mem
, node
, cat
->left
, 0,
1033 case ADDTAGS_AFTER_UNION_LEFT
:
1034 DPRINT(("After union left\n"));
1035 /* Lift the bottom of the `regset' array so that when processing
1036 the right operand the items currently in the array are
1037 invisible. The original bottom was saved at ADDTAGS_UNION and
1038 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1039 while (*regset
!= REGSET_UNSET
)
1041 regset_contains
= 0;
1044 case ADDTAGS_AFTER_UNION_RIGHT
:
1047 tre_ast_node_t
*orig
;
1049 /* Note: node may not be a UNION, but a CATENATION with a left
1050 * tag. So that is why we pass the original node->obj on the
1051 * stack, to get the union's true values. */
1053 DPRINT(("After union right\n"));
1054 node
= tre_stack_pop_voidptr(stack
);
1055 orig
= node
->original
? node
->original
: node
;
1056 uni
= (tre_union_t
*)orig
->obj
;
1057 added_tags
= tre_stack_pop_int(stack
);
1060 node
->num_tags
= uni
->left
->num_tags
+ uni
->right
->num_tags
1062 if (uni
->left_tag
> 0)
1064 if (uni
->right_tag
> 0)
1066 DPRINT((" ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1069 regset
= tre_stack_pop_voidptr(stack
);
1071 /* Add tags after both children, the left child gets a smaller
1072 tag than the right child. This guarantees that we prefer
1073 the left child over the right child. */
1074 /* XXX - This is not always necessary (if the children have
1075 tags which must be seen for every match of that child). */
1076 if (!first_pass
&& node
->make_branches
)
1078 tre_last_matched_branch_pre_t
*lb
=
1079 uni
->left
->last_matched_branch
;
1080 tre_last_matched_branch_pre_t
*rb
=
1081 uni
->right
->last_matched_branch
;
1082 tre_last_matched_pre_t
*lu
=
1083 uni
->left
->last_matched_in_progress
;
1084 tre_last_matched_pre_t
*ru
=
1085 uni
->right
->last_matched_in_progress
;
1086 tre_last_matched_pre_t
*u
;
1087 /* We don't need to call tre_merge_branches because these
1088 * tags don't participate in submatch ranges, so don't need
1089 * to be recorded. But we do set the cmp_tag entry of the
1090 * tre_last_matched_branch_pre_t, so we might call
1091 * tre_merge_branches if we need to create an empty
1092 * tre_last_matched_branch_pre_t. */
1093 if (uni
->left_tag
> 0)
1095 DPRINT(("Setting t%d direction to maximize\n",
1097 status
= tre_add_tag_right(mem
, uni
->left
, uni
->left_tag
);
1098 if (status
!= REG_OK
)
1100 tnfa
->tag_directions
[uni
->left_tag
] = TRE_TAG_MAXIMIZE
;
1103 status
= tre_merge_branches(mem
, uni
->left
, NULL
, -1,
1105 if (status
!= REG_OK
)
1107 lb
= uni
->left
->last_matched_branch
;
1109 lb
->cmp_tag
= uni
->left_tag
;
1111 if (uni
->right_tag
> 0)
1113 DPRINT(("Setting t%d direction to maximize\n",
1115 status
= tre_add_tag_right(mem
, uni
->right
, uni
->right_tag
);
1116 if (status
!= REG_OK
)
1118 tnfa
->tag_directions
[uni
->right_tag
] = TRE_TAG_MAXIMIZE
;
1121 status
= tre_merge_branches(mem
, uni
->right
, NULL
, -1,
1123 if (status
!= REG_OK
)
1125 rb
= uni
->right
->last_matched_branch
;
1127 rb
->cmp_tag
= uni
->right_tag
;
1129 /* Now merge the tre_last_matched_branch_pre_t into a
1130 tre_last_matched_pre_t */
1135 /* Create a new tre_last_matched_pre_t */
1136 u
= tre_mem_calloc(mem
, sizeof(tre_last_matched_pre_t
));
1139 status
= REG_ESPACE
;
1142 u
->tot_last_matched
= 1;
1148 u
->tot_branches
+= lb
->tot_branches
;
1149 u
->tot_last_matched
+= lb
->tot_last_matched
;
1150 u
->tot_tags
+= lb
->tot_tags
;
1155 u
->tot_branches
+= rb
->tot_branches
;
1156 u
->tot_last_matched
+= rb
->tot_last_matched
;
1157 u
->tot_tags
+= rb
->tot_tags
;
1164 u
->tot_branches
+= rb
->tot_branches
;
1165 u
->tot_last_matched
+= rb
->tot_last_matched
;
1166 u
->tot_tags
+= rb
->tot_tags
;
1171 /* Use ru, and add lb */
1175 lb
->next
= u
->branches
;
1178 u
->tot_branches
+= lb
->tot_branches
;
1179 u
->tot_last_matched
+= lb
->tot_last_matched
;
1180 u
->tot_tags
+= lb
->tot_tags
;
1184 else if (ru
== NULL
)
1186 /* Use lu, and add rb */
1190 rb
->next
= u
->branches
;
1193 u
->tot_branches
+= rb
->tot_branches
;
1194 u
->tot_last_matched
+= rb
->tot_last_matched
;
1195 u
->tot_tags
+= rb
->tot_tags
;
1200 /* Merge lu and ru into lu */
1205 tre_last_matched_branch_pre_t
*b
= lu
->branches
;
1206 while (b
->next
) b
= b
->next
;
1207 b
->next
= ru
->branches
;
1208 lu
->n_branches
+= ru
->n_branches
;
1211 else if (ru
->branches
)
1213 lu
->branches
= ru
->branches
;
1214 lu
->n_branches
= ru
->n_branches
;
1216 lu
->tot_branches
+= ru
->tot_branches
;
1217 lu
->tot_last_matched
+= ru
->tot_last_matched
- 1;
1218 lu
->tot_tags
+= ru
->tot_tags
;
1221 node
->last_matched_in_progress
= u
;
1223 direction
= TRE_TAG_MAXIMIZE
;
1227 case ADDTAGS_AFTER_UNION_TOP
: /* only called when not first_pass */
1229 tre_last_matched_branch_pre_t
*b
;
1230 tre_last_matched_pre_t
*u
;
1233 DPRINT(("After union top\n"));
1234 node
= tre_stack_pop_voidptr(stack
);
1235 start_tag
= tre_stack_pop_int(stack
);
1236 b
= tre_mem_calloc(mem
, sizeof(tre_last_matched_branch_pre_t
)
1237 + bitstr_size(tnfa
->num_tags
));
1240 status
= REG_ESPACE
;
1244 u
= node
->last_matched_in_progress
;
1245 u
->start_tag
= start_tag
;
1246 b
->tot_branches
= 1 + u
->tot_branches
;
1247 b
->tot_last_matched
= u
->tot_last_matched
;
1248 b
->tot_tags
= u
->tot_tags
;
1249 b
->last_matched
= u
;
1250 b
->n_last_matched
= 1;
1251 node
->last_matched_branch
= b
;
1252 node
->last_matched_in_progress
= NULL
;
1260 } /* end switch(symbol) */
1261 } /* end while(tre_stack_num_objects(stack) > bottom) */
1263 if (status
!= REG_OK
)
1265 DPRINT(("Error during %s pass\n", first_pass
? "first" : "second"));
1266 goto error_post_compile
;
1272 if (num_tags
!= tnfa
->num_tags
)
1274 DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags
,
1276 status
= REG_BADPAT
;
1277 goto error_post_compile
;
1280 tre_purge_regset(regset
, tnfa
, tag
);
1281 DPRINT(("Setting t%d to %s\n", num_tags
,
1282 tag_dir_str
[direction
]));
1283 tnfa
->tag_directions
[num_tags
] = direction
;
1285 if (rtp
> reorder_tags
+ 2 * tnfa
->num_reorder_tags
)
1287 DPRINT(("Processed %d reorder tags instead of %d\n",
1288 (int)(rtp
- reorder_tags
) / 2, tnfa
->num_reorder_tags
));
1289 status
= REG_BADPAT
;
1290 goto error_post_compile
;
1294 if (reorder_tags
[0] >= 0)
1296 DPRINT(("reorder_tags:\n"));
1297 for (rtp
= reorder_tags
; *rtp
>= 0;)
1299 DPRINT(("%d after ", *rtp
++));
1300 DPRINT(("%d\n", *rtp
++));
1304 DPRINT(("No reorder_tags\n"));
1305 #endif /* TRE_DEBUG */
1307 /* Initialize to_reorder */
1308 for (i
= 0; i
< num_tags
; i
++)
1310 /* Use to_seq_order to convert reorder_tags values, and use those to
1311 * reorder to_reorder */
1312 for (rtp
= reorder_tags
; *rtp
>= 0;)
1317 /* Skip reordering the final tag */
1320 DPRINT(("Skipping reorder of %d\n", ti
));
1324 /* The number of the tag to reorder */
1325 high
= to_reorder
[ti
];
1326 /* Reorder after this tag */
1327 low
= to_reorder
[*rtp
++];
1329 DPRINT(("ti=%d high=%d low=%d\n", ti
, high
, low
));
1332 DPRINT(("Tag %d already before %d\n", high
, low
));
1335 for (j
= 0; j
< num_tags
; j
++)
1336 if (to_reorder
[j
] > low
&& to_reorder
[j
] < high
)
1338 to_reorder
[ti
] = low
+ 1;
1340 DPRINT(("to_reorder=("));
1341 for (j
= 0; j
< num_tags
; j
++)
1343 DPRINT(("%d", to_reorder
[j
]));
1344 if (j
< num_tags
- 1)
1348 #endif /* TRE_DEBUG */
1350 /* Determine if reordering in really necessary */
1352 int need_reorder
= 0;
1353 for (i
= 0; i
< num_tags
; i
++)
1354 if(to_reorder
[i
] != i
)
1359 /* If need_reorder is not set, free reorder_tags, and set to NULL,
1360 * indicating no reordering is needed */
1363 DPRINT(("Don't need to reorder\n"));
1364 xfree(reorder_tags
);
1365 reorder_tags
= NULL
;
1373 tre_tag_direction_t
*new_tag_directions
;
1375 DPRINT(("to_reorder:"));
1376 for (i
= 0; i
< num_tags
; i
++)
1377 DPRINT((" %d->%d", i
, to_reorder
[i
]));
1379 #endif /* TRE_DEBUG */
1381 DPRINT(("Reordering submatch_data\n"));
1382 for (i
= 0; i
< (int)tnfa
->num_submatches
; i
++)
1385 int so
= tnfa
->submatch_data
[i
].so_tag
;
1386 int eo
= tnfa
->submatch_data
[i
].eo_tag
;
1387 #endif /* TRE_DEBUG */
1388 tnfa
->submatch_data
[i
].so_tag
=
1389 to_reorder
[tnfa
->submatch_data
[i
].so_tag
];
1390 tnfa
->submatch_data
[i
].eo_tag
=
1391 tnfa
->submatch_data
[i
].eo_tag
< num_tags
?
1392 to_reorder
[tnfa
->submatch_data
[i
].eo_tag
] :
1393 tnfa
->submatch_data
[i
].eo_tag
;
1394 DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i
, so
, eo
,
1395 tnfa
->submatch_data
[i
].so_tag
,
1396 tnfa
->submatch_data
[i
].eo_tag
));
1399 DPRINT(("Reordering tag_directions\n"));
1400 /* We only allocate num_tags directions and reorder them. The
1401 * num_tags-th direction (end tag) is left unchanged. */
1402 new_tag_directions
= xmalloc(sizeof(*new_tag_directions
) * num_tags
);
1403 if (new_tag_directions
== NULL
)
1405 status
= REG_ESPACE
;
1406 goto error_post_compile
;
1408 for (i
= 0; i
< num_tags
; i
++)
1410 new_tag_directions
[to_reorder
[i
]] = tnfa
->tag_directions
[i
];
1413 for (i
= 0; i
< num_tags
; i
++)
1415 DPRINT(("t%d %s->%s\n", i
,
1416 tag_dir_str
[tnfa
->tag_directions
[i
]],
1417 tag_dir_str
[new_tag_directions
[i
]]));
1419 DPRINT(("t%d %s->%s\n", num_tags
,
1420 tag_dir_str
[tnfa
->tag_directions
[num_tags
]],
1421 tag_dir_str
[tnfa
->tag_directions
[num_tags
]]));
1422 #endif /* TRE_DEBUG */
1423 memcpy(tnfa
->tag_directions
, new_tag_directions
, sizeof(*new_tag_directions
) * num_tags
);
1424 xfree(new_tag_directions
);
1426 DPRINT(("Reordering minimal_tags\n"));
1427 for (i
= 0; tnfa
->minimal_tags
[i
] >= 0; i
++)
1428 tnfa
->minimal_tags
[i
] = tnfa
->minimal_tags
[i
] < num_tags
?
1429 to_reorder
[tnfa
->minimal_tags
[i
]] :
1430 tnfa
->minimal_tags
[i
];
1432 DPRINT(("Reordering AST tags\n"));
1433 STACK_PUSH(stack
, voidptr
, tree
);
1434 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1436 node
= tre_stack_pop_voidptr(stack
);
1442 tre_literal_t
*lit
= (tre_literal_t
*)node
->obj
;
1444 lit
->code_max
= to_reorder
[lit
->code_max
];
1450 tre_union_t
*uni
= (tre_union_t
*)node
->obj
;
1451 STACK_PUSHX(stack
, voidptr
, uni
->right
);
1452 STACK_PUSHX(stack
, voidptr
, uni
->left
);
1458 tre_catenation_t
*cat
= (tre_catenation_t
*)node
->obj
;
1459 STACK_PUSHX(stack
, voidptr
, cat
->right
);
1460 STACK_PUSHX(stack
, voidptr
, cat
->left
);
1466 tre_iteration_t
*iter
= (tre_iteration_t
*)node
->obj
;
1467 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
1476 if (status
!= REG_OK
)
1478 DPRINT(("Error while reordering tags\n"));
1479 goto error_post_compile
;
1486 if (tree
->last_matched_branch
)
1488 tre_last_matched_branch_t
*buf
, *b
, *bb
;
1489 tre_last_matched_branch_pre_t
*bp
;
1490 tre_last_matched_t
*u
, *uu
;
1491 tre_last_matched_pre_t
*up
;
1495 tre_last_matched_branch_t
*_b
;
1496 tre_last_matched_t
*_u
;
1499 DPRINT(("last_match_branch_pre:\n"));
1500 print_last_match_branch_pre(tree
->last_matched_branch
, 0, num_tags
);
1501 #endif /* TRE_DEBUG */
1502 buf
= (tre_last_matched_branch_t
*)xcalloc(1,
1503 tree
->last_matched_branch
->tot_branches
1504 * sizeof(tre_last_matched_branch_t
) +
1505 tree
->last_matched_branch
->tot_last_matched
1506 * sizeof(tre_last_matched_t
) +
1507 tree
->last_matched_branch
->tot_tags
*
1511 status
= REG_ESPACE
;
1512 goto error_post_compile
;
1516 u
= (tre_last_matched_t
*)(b
+
1517 tree
->last_matched_branch
->tot_branches
);
1518 t
= (int *)(u
+ tree
->last_matched_branch
->tot_last_matched
);
1523 #endif /* TRE_DEBUG */
1524 DPRINT(("Copying info_pre to info\n"));
1525 STACK_PUSH(stack
, voidptr
, tree
->last_matched_branch
);
1526 STACK_PUSH(stack
, int, 1);
1527 STACK_PUSH(stack
, int, COPY_LAST_MATCHED_BRANCH
);
1529 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1531 switch (tre_stack_pop_int(stack
))
1533 case COPY_LAST_MATCHED_BRANCH
:
1534 i
= tre_stack_pop_int(stack
);
1535 /* The tre_last_matched_branch_pre_t * is still on the
1537 STACK_PUSHX(stack
, voidptr
, b
);
1538 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_BRANCH_NEXT
);
1542 case COPY_LAST_MATCHED_BRANCH_NEXT
:
1543 bb
= tre_stack_pop_voidptr(stack
);
1544 bp
= tre_stack_pop_voidptr(stack
);
1545 bb
->n_last_matched
= bp
->n_last_matched
;
1546 bb
->cmp_tag
= bp
->cmp_tag
;
1550 n
= bb
->n_tags
= bp
->n_tags
;
1552 for (i
= 0; i
< num_tags
; i
++)
1553 if (bit_test(bp
->tags
, i
))
1562 STACK_PUSHX(stack
, voidptr
, bp
->next
);
1563 STACK_PUSHX(stack
, voidptr
, bb
+ 1);
1564 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_BRANCH_NEXT
);
1566 if (bp
->n_last_matched
> 0)
1568 bb
->last_matched
= u
;
1569 STACK_PUSHX(stack
, voidptr
, bp
->last_matched
);
1570 STACK_PUSHX(stack
, int, bp
->n_last_matched
);
1571 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED
);
1575 case COPY_LAST_MATCHED
:
1576 i
= tre_stack_pop_int(stack
);
1577 /* The tre_last_matched_pre_t * is still on the stack */
1578 STACK_PUSHX(stack
, voidptr
, u
);
1579 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_NEXT
);
1583 case COPY_LAST_MATCHED_NEXT
:
1584 uu
= tre_stack_pop_voidptr(stack
);
1585 up
= tre_stack_pop_voidptr(stack
);
1586 uu
->n_branches
= up
->n_branches
;
1588 uu
->start_tag
= up
->start_tag
;
1591 STACK_PUSHX(stack
, voidptr
, up
->next
);
1592 STACK_PUSHX(stack
, voidptr
, uu
+ 1);
1593 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_NEXT
);
1595 STACK_PUSHX(stack
, voidptr
, up
->branches
);
1596 STACK_PUSHX(stack
, int, up
->n_branches
);
1597 STACK_PUSHX(stack
, int, COPY_LAST_MATCHED_BRANCH
);
1601 if (status
!= REG_OK
)
1602 goto error_post_compile
;
1604 DPRINT(("last_matched_branch:\n"));
1605 print_last_match_branch(buf
, 0);
1606 if (b
!= _b
+ tree
->last_matched_branch
->tot_branches
)
1607 DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1608 b
, _b
+ tree
->last_matched_branch
->tot_branches
));
1609 if (u
!= _u
+ tree
->last_matched_branch
->tot_last_matched
)
1610 DPRINT(("u/%p != _u + "
1611 "tree->last_matched_branch->tot_last_matched/%p\n",
1612 u
, _u
+ tree
->last_matched_branch
->tot_last_matched
));
1613 if (t
!= _t
+ tree
->last_matched_branch
->tot_tags
)
1614 DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1615 t
, _t
+ tree
->last_matched_branch
->tot_tags
));
1616 #endif /* TRE_DEBUG */
1617 tnfa
->last_matched_branch
= buf
;
1621 DPRINT(("No last_match_branch_pre\n"));
1622 #endif /* TRE_DEBUG */
1625 DPRINT(("tre_add_tags: %s complete. Number of tags %d.\n",
1626 first_pass
? "First pass" : "Second pass", num_tags
));
1628 tre_ast_print(tree
);
1629 #endif /* TRE_DEBUG */
1630 DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree
->num_tags
,
1632 assert(tree
->num_tags
== num_tags
);
1633 tnfa
->end_tag
= num_tags
;
1634 tnfa
->num_tags
= num_tags
;
1635 tnfa
->num_minimals
= num_minimals
;
1637 xfree(reorder_tags
);
1647 AST to TNFA compilation routines.
1653 } tre_copyast_symbol_t
;
1655 /* Flags for tre_copy_ast(). */
1656 #define COPY_REMOVE_TAGS 1
1657 #define COPY_MAXIMIZE_FIRST_TAG 2
1659 static reg_errcode_t
1660 tre_copy_ast(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*ast
,
1661 int flags
, int *pos_add
, tre_tag_direction_t
*tag_directions
,
1662 tre_ast_node_t
**copy
, int *max_pos
)
1664 reg_errcode_t status
= REG_OK
;
1665 int bottom
= tre_stack_num_objects(stack
);
1668 tre_ast_node_t
**result
= copy
;
1669 tre_copyast_symbol_t symbol
;
1671 STACK_PUSH(stack
, voidptr
, ast
);
1672 STACK_PUSH(stack
, int, COPY_RECURSE
);
1674 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1676 tre_ast_node_t
*node
;
1677 if (status
!= REG_OK
)
1680 symbol
= (tre_copyast_symbol_t
)tre_stack_pop_int(stack
);
1683 case COPY_SET_RESULT_PTR
:
1684 result
= tre_stack_pop_voidptr(stack
);
1687 node
= tre_stack_pop_voidptr(stack
);
1692 tre_literal_t
*lit
= node
->obj
;
1693 int pos
= lit
->position
;
1694 int min
= lit
->code_min
;
1695 int max
= lit
->code_max
;
1696 tre_bracket_match_list_t
*list
= !IS_SPECIAL(lit
) ?
1697 lit
->u
.bracket_match_list
:
1699 if (!IS_SPECIAL(lit
) || IS_BACKREF(lit
))
1701 /* XXX - e.g. [ab] has only one position but two
1702 nodes, so we are creating holes in the state space
1703 here. Not fatal, just wastes memory. */
1707 else if (IS_TAG(lit
) && (flags
& COPY_REMOVE_TAGS
))
1709 /* Change this tag to empty. */
1713 else if (IS_TAG(lit
) && (flags
& COPY_MAXIMIZE_FIRST_TAG
)
1716 /* Maximize the first tag. */
1717 if (tag_directions
[max
] == TRE_TAG_LEFT_MAXIMIZE
)
1718 tag_directions
[max
] = TRE_TAG_MAXIMIZE
;
1721 *result
= tre_ast_new_literal(mem
, min
, max
, pos
);
1722 if (*result
== NULL
)
1723 status
= REG_ESPACE
;
1728 if (!IS_SPECIAL(lit
))
1729 ((tre_literal_t
*)(*result
)->obj
)->u
.bracket_match_list
1735 tre_union_t
*uni
= node
->obj
;
1737 *result
= tre_ast_new_union(mem
, uni
->left
, uni
->right
);
1738 if (*result
== NULL
)
1740 status
= REG_ESPACE
;
1743 tmp
= (*result
)->obj
;
1744 result
= &tmp
->left
;
1745 STACK_PUSHX(stack
, voidptr
, uni
->right
);
1746 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1747 STACK_PUSHX(stack
, voidptr
, &tmp
->right
);
1748 STACK_PUSHX(stack
, int, COPY_SET_RESULT_PTR
);
1749 STACK_PUSHX(stack
, voidptr
, uni
->left
);
1750 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1755 tre_catenation_t
*cat
= node
->obj
;
1756 tre_catenation_t
*tmp
;
1757 *result
= tre_ast_new_catenation(mem
, cat
->left
, cat
->right
);
1758 if (*result
== NULL
)
1760 status
= REG_ESPACE
;
1763 tmp
= (*result
)->obj
;
1766 result
= &tmp
->left
;
1768 STACK_PUSHX(stack
, voidptr
, cat
->right
);
1769 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1770 STACK_PUSHX(stack
, voidptr
, &tmp
->right
);
1771 STACK_PUSHX(stack
, int, COPY_SET_RESULT_PTR
);
1772 STACK_PUSHX(stack
, voidptr
, cat
->left
);
1773 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1778 tre_iteration_t
*iter
= node
->obj
;
1779 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
1780 STACK_PUSHX(stack
, int, COPY_RECURSE
);
1781 *result
= tre_ast_new_iter(mem
, iter
->arg
, iter
->min
,
1782 iter
->max
, iter
->minimal
);
1783 if (*result
== NULL
)
1785 status
= REG_ESPACE
;
1788 iter
= (*result
)->obj
;
1789 result
= &iter
->arg
;
1799 *pos_add
+= num_copied
;
1806 } tre_expand_ast_symbol_t
;
1808 /* Expands each iteration node that has a finite nonzero minimum or maximum
1809 iteration count to a catenated sequence of copies of the node. */
1810 static reg_errcode_t
1811 tre_expand_ast(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*ast
,
1812 int *position
, tre_tag_direction_t
*tag_directions
,
1813 int __unused
*max_depth
)
1815 reg_errcode_t status
= REG_OK
;
1816 int bottom
= tre_stack_num_objects(stack
);
1818 int pos_add_total
= 0;
1821 /* Current approximate matching parameters. */
1822 int params
[TRE_PARAM_LAST
];
1823 /* Approximate parameter nesting level. */
1824 int params_depth
= 0;
1825 #endif /* TRE_APPROX */
1829 #endif /* TRE_APPROX */
1832 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
1833 params
[i
] = TRE_PARAM_DEFAULT
;
1834 #endif /* TRE_APPROX */
1836 STACK_PUSHR(stack
, voidptr
, ast
);
1837 STACK_PUSHR(stack
, int, EXPAND_RECURSE
);
1838 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
1840 tre_ast_node_t
*node
;
1841 tre_expand_ast_symbol_t symbol
;
1843 if (status
!= REG_OK
)
1846 DPRINT(("pos_add %d\n", pos_add
));
1848 symbol
= (tre_expand_ast_symbol_t
)tre_stack_pop_int(stack
);
1849 node
= tre_stack_pop_voidptr(stack
);
1852 case EXPAND_RECURSE
:
1857 tre_literal_t
*lit
= node
->obj
;
1858 if (!IS_SPECIAL(lit
) || IS_BACKREF(lit
))
1860 lit
->position
+= pos_add
;
1861 if (lit
->position
> max_pos
)
1862 max_pos
= lit
->position
;
1868 tre_union_t
*uni
= node
->obj
;
1869 STACK_PUSHX(stack
, voidptr
, uni
->right
);
1870 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1871 STACK_PUSHX(stack
, voidptr
, uni
->left
);
1872 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1877 tre_catenation_t
*cat
= node
->obj
;
1878 STACK_PUSHX(stack
, voidptr
, cat
->right
);
1879 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1880 STACK_PUSHX(stack
, voidptr
, cat
->left
);
1881 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1886 tre_iteration_t
*iter
= node
->obj
;
1887 STACK_PUSHX(stack
, int, pos_add
);
1888 STACK_PUSHX(stack
, voidptr
, node
);
1889 STACK_PUSHX(stack
, int, EXPAND_AFTER_ITER
);
1890 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
1891 STACK_PUSHX(stack
, int, EXPAND_RECURSE
);
1892 /* If we are going to expand this node at EXPAND_AFTER_ITER
1893 then don't increase the `pos' fields of the nodes now, it
1894 will get done when expanding. */
1895 if (iter
->min
> 1 || iter
->max
> 1)
1906 case EXPAND_AFTER_ITER
:
1908 tre_iteration_t
*iter
= node
->obj
;
1910 pos_add
= tre_stack_pop_int(stack
);
1911 pos_add_last
= pos_add
;
1912 /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1913 immediate replace the whole iteration with EMPTY. This
1914 unfortunately drops any submatches, and messes up setting the
1915 pmatch values (we can get tags of -1, and tag values in the
1916 billions). So we left it there and replace with EMPTY here. */
1917 if (iter
->min
== 0 && iter
->max
== 0)
1919 tre_ast_node_t
*empty
= tre_ast_new_literal(mem
, EMPTY
, -1, -1);
1922 node
->obj
= empty
->obj
;
1923 node
->type
= empty
->type
;
1925 else if (iter
->min
> 1 || iter
->max
> 1)
1927 tre_ast_node_t
*seq1
= NULL
, *seq2
= NULL
;
1929 int pos_add_save
= pos_add
;
1931 /* Create a catenated sequence of copies of the node. */
1932 for (j
= 0; j
< iter
->min
; j
++)
1934 tre_ast_node_t
*copy
;
1935 /* Remove tags from all but the last copy. */
1936 int flags
= ((j
+ 1 < iter
->min
)
1938 : COPY_MAXIMIZE_FIRST_TAG
);
1939 DPRINT((" pos_add %d\n", pos_add
));
1940 pos_add_save
= pos_add
;
1941 status
= tre_copy_ast(mem
, stack
, iter
->arg
, flags
,
1942 &pos_add
, tag_directions
, ©
,
1944 if (status
!= REG_OK
)
1947 seq1
= tre_ast_new_catenation(mem
, seq1
, copy
);
1954 if (iter
->max
== -1)
1956 /* No upper limit. */
1957 pos_add_save
= pos_add
;
1958 status
= tre_copy_ast(mem
, stack
, iter
->arg
, 0,
1959 &pos_add
, NULL
, &seq2
, &max_pos
);
1960 if (status
!= REG_OK
)
1962 seq2
= tre_ast_new_iter(mem
, seq2
, 0, -1, 0);
1968 for (j
= iter
->min
; j
< iter
->max
; j
++)
1970 tre_ast_node_t
*tmp
, *copy
;
1971 pos_add_save
= pos_add
;
1972 status
= tre_copy_ast(mem
, stack
, iter
->arg
, 0,
1973 &pos_add
, NULL
, ©
, &max_pos
);
1974 if (status
!= REG_OK
)
1977 seq2
= tre_ast_new_catenation(mem
, copy
, seq2
);
1982 tmp
= tre_ast_new_literal(mem
, EMPTY
, -1, -1);
1985 seq2
= tre_ast_new_union(mem
, tmp
, seq2
);
1991 pos_add
= pos_add_save
;
1994 else if (seq2
!= NULL
)
1995 seq1
= tre_ast_new_catenation(mem
, seq1
, seq2
);
1998 node
->obj
= seq1
->obj
;
1999 node
->type
= seq1
->type
;
2003 pos_add_total
+= pos_add
- pos_add_last
;
2004 if (iter_depth
== 0)
2005 pos_add
= pos_add_total
;
2008 /* If approximate parameters are specified, surround the result
2009 with two parameter setting nodes. The one on the left sets
2010 the specified parameters, and the one on the right restores
2011 the old parameters. */
2014 tre_ast_node_t
*tmp_l
, *tmp_r
, *tmp_node
, *node_copy
;
2017 tmp_l
= tre_ast_new_literal(mem
, PARAMETER
, 0, -1);
2020 ((tre_literal_t
*)tmp_l
->obj
)->u
.params
= iter
->params
;
2021 iter
->params
[TRE_PARAM_DEPTH
] = params_depth
+ 1;
2022 tmp_r
= tre_ast_new_literal(mem
, PARAMETER
, 0, -1);
2025 old_params
= tre_mem_alloc(mem
, sizeof(*old_params
)
2029 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2030 old_params
[i
] = params
[i
];
2031 ((tre_literal_t
*)tmp_r
->obj
)->u
.params
= old_params
;
2032 old_params
[TRE_PARAM_DEPTH
] = params_depth
;
2033 /* XXX - this is the only place where ast_new_node is
2034 needed -- should be moved inside AST module. */
2035 node_copy
= tre_ast_new_node(mem
, ITERATION
,
2036 sizeof(tre_iteration_t
));
2039 node_copy
->obj
= node
->obj
;
2040 tmp_node
= tre_ast_new_catenation(mem
, tmp_l
, node_copy
);
2043 tmp_node
= tre_ast_new_catenation(mem
, tmp_node
, tmp_r
);
2046 /* Replace the contents of `node' with `tmp_node'. */
2047 memcpy(node
, tmp_node
, sizeof(*node
));
2048 node
->obj
= tmp_node
->obj
;
2049 node
->type
= tmp_node
->type
;
2051 if (params_depth
> *max_depth
)
2052 *max_depth
= params_depth
;
2054 #endif /* TRE_APPROX */
2063 *position
+= pos_add_total
;
2065 /* `max_pos' should never be larger than `*position' if the above
2066 code works, but just an extra safeguard let's make sure
2067 `*position' is set large enough so enough memory will be
2068 allocated for the transition table. */
2069 if (max_pos
> *position
)
2070 *position
= max_pos
;
2073 DPRINT(("Expanded AST:\n"));
2075 DPRINT(("*position %d, max_pos %d\n", *position
, max_pos
));
2081 static tre_pos_and_tags_t
*
2082 tre_set_empty(tre_mem_t mem
)
2084 tre_pos_and_tags_t
*new_set
;
2086 new_set
= tre_mem_calloc(mem
, sizeof(*new_set
));
2087 if (new_set
== NULL
)
2090 new_set
[0].position
= -1;
2091 new_set
[0].code_min
= -1;
2092 new_set
[0].code_max
= -1;
2097 static tre_pos_and_tags_t
*
2098 tre_set_one(tre_mem_t mem
, int position
, int code_min
, int code_max
,
2099 tre_bracket_match_list_t
*bracket_match_list
, int backref
)
2101 tre_pos_and_tags_t
*new_set
;
2103 new_set
= tre_mem_calloc(mem
, sizeof(*new_set
) * 2);
2104 if (new_set
== NULL
)
2107 new_set
[0].position
= position
;
2108 new_set
[0].code_min
= code_min
;
2109 new_set
[0].code_max
= code_max
;
2110 new_set
[0].bracket_match_list
= bracket_match_list
;
2111 new_set
[0].backref
= backref
;
2112 new_set
[1].position
= -1;
2113 new_set
[1].code_min
= -1;
2114 new_set
[1].code_max
= -1;
2119 static tre_pos_and_tags_t
*
2120 tre_set_union(tre_mem_t mem
, tre_pos_and_tags_t
*set1
, tre_pos_and_tags_t
*set2
,
2121 int *tags
, int assertions
, int *params
)
2124 tre_pos_and_tags_t
*new_set
;
2128 for (num_tags
= 0; tags
!= NULL
&& tags
[num_tags
] >= 0; num_tags
++);
2129 for (s1
= 0; set1
[s1
].position
>= 0; s1
++);
2130 for (s2
= 0; set2
[s2
].position
>= 0; s2
++);
2131 new_set
= tre_mem_calloc(mem
, sizeof(*new_set
) * (s1
+ s2
+ 1));
2135 for (s1
= 0; set1
[s1
].position
>= 0; s1
++)
2137 new_set
[s1
].position
= set1
[s1
].position
;
2138 new_set
[s1
].code_min
= set1
[s1
].code_min
;
2139 new_set
[s1
].code_max
= set1
[s1
].code_max
;
2140 new_set
[s1
].assertions
= set1
[s1
].assertions
| assertions
;
2141 new_set
[s1
].bracket_match_list
= set1
[s1
].bracket_match_list
;
2142 new_set
[s1
].backref
= set1
[s1
].backref
;
2143 if (set1
[s1
].tags
== NULL
&& tags
== NULL
)
2144 new_set
[s1
].tags
= NULL
;
2147 for (i
= 0; set1
[s1
].tags
!= NULL
&& set1
[s1
].tags
[i
] >= 0; i
++);
2148 new_tags
= tre_mem_alloc(mem
, (sizeof(*new_tags
)
2149 * (i
+ num_tags
+ 1)));
2150 if (new_tags
== NULL
)
2152 for (j
= 0; j
< i
; j
++)
2153 new_tags
[j
] = set1
[s1
].tags
[j
];
2154 for (i
= 0; i
< num_tags
; i
++)
2155 new_tags
[j
+ i
] = tags
[i
];
2156 new_tags
[j
+ i
] = -1;
2157 new_set
[s1
].tags
= new_tags
;
2159 if (set1
[s1
].params
)
2160 new_set
[s1
].params
= set1
[s1
].params
;
2163 if (!new_set
[s1
].params
)
2164 new_set
[s1
].params
= params
;
2167 new_set
[s1
].params
= tre_mem_alloc(mem
, sizeof(*params
) *
2169 if (!new_set
[s1
].params
)
2171 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2172 if (params
[i
] != TRE_PARAM_UNSET
)
2173 new_set
[s1
].params
[i
] = params
[i
];
2178 for (s2
= 0; set2
[s2
].position
>= 0; s2
++)
2180 new_set
[s1
+ s2
].position
= set2
[s2
].position
;
2181 new_set
[s1
+ s2
].code_min
= set2
[s2
].code_min
;
2182 new_set
[s1
+ s2
].code_max
= set2
[s2
].code_max
;
2183 /* XXX - why not | assertions here as well? */
2184 new_set
[s1
+ s2
].assertions
= set2
[s2
].assertions
;
2185 new_set
[s1
+ s2
].bracket_match_list
= set2
[s2
].bracket_match_list
;
2186 new_set
[s1
+ s2
].backref
= set2
[s2
].backref
;
2187 if (set2
[s2
].tags
== NULL
)
2188 new_set
[s1
+ s2
].tags
= NULL
;
2191 for (i
= 0; set2
[s2
].tags
[i
] >= 0; i
++);
2192 new_tags
= tre_mem_alloc(mem
, sizeof(*new_tags
) * (i
+ 1));
2193 if (new_tags
== NULL
)
2195 for (j
= 0; j
< i
; j
++)
2196 new_tags
[j
] = set2
[s2
].tags
[j
];
2198 new_set
[s1
+ s2
].tags
= new_tags
;
2200 if (set2
[s2
].params
)
2201 new_set
[s1
+ s2
].params
= set2
[s2
].params
;
2204 if (!new_set
[s1
+ s2
].params
)
2205 new_set
[s1
+ s2
].params
= params
;
2208 new_set
[s1
+ s2
].params
= tre_mem_alloc(mem
, sizeof(*params
) *
2210 if (!new_set
[s1
+ s2
].params
)
2212 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2213 if (params
[i
] != TRE_PARAM_UNSET
)
2214 new_set
[s1
+ s2
].params
[i
] = params
[i
];
2218 new_set
[s1
+ s2
].position
= -1;
2222 /* Finds the empty path through `node' which is the one that should be
2223 taken according to POSIX.2 rules, and adds the tags on that path to
2224 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2225 set to the number of tags seen on the path. */
2226 static reg_errcode_t
2227 tre_match_empty(tre_stack_t
*stack
, tre_ast_node_t
*node
, int *tags
,
2228 int *assertions
, int *params
, int *num_tags_seen
,
2233 tre_catenation_t
*cat
;
2234 tre_iteration_t
*iter
;
2236 int bottom
= tre_stack_num_objects(stack
);
2237 reg_errcode_t status
= REG_OK
;
2243 status
= tre_stack_push_voidptr(stack
, node
);
2245 /* Walk through the tree recursively. */
2246 while (status
== REG_OK
&& tre_stack_num_objects(stack
) > bottom
)
2248 node
= tre_stack_pop_voidptr(stack
);
2253 lit
= (tre_literal_t
*)node
->obj
;
2254 switch (lit
->code_min
)
2257 if (lit
->code_max
>= 0)
2261 /* Add the tag to `tags'. */
2262 for (i
= 0; tags
[i
] >= 0; i
++)
2263 if (tags
[i
] == lit
->code_max
)
2267 tags
[i
] = lit
->code_max
;
2276 assert(lit
->code_max
>= 1
2277 || lit
->code_max
<= ASSERT_LAST
);
2278 if (assertions
!= NULL
)
2279 *assertions
|= lit
->code_max
;
2283 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2284 params
[i
] = lit
->u
.params
[i
];
2285 if (params_seen
!= NULL
)
2297 /* Subexpressions starting earlier take priority over ones
2298 starting later, so we prefer the left subexpression over the
2299 right subexpression. */
2300 uni
= (tre_union_t
*)node
->obj
;
2301 if (uni
->left
->nullable
)
2302 STACK_PUSHX(stack
, voidptr
, uni
->left
)
2303 else if (uni
->right
->nullable
)
2304 STACK_PUSHX(stack
, voidptr
, uni
->right
)
2310 /* The path must go through both children. */
2311 cat
= (tre_catenation_t
*)node
->obj
;
2312 assert(cat
->left
->nullable
);
2313 assert(cat
->right
->nullable
);
2314 STACK_PUSHX(stack
, voidptr
, cat
->left
);
2315 STACK_PUSHX(stack
, voidptr
, cat
->right
);
2319 /* A match with an empty string is preferred over no match at
2320 all, so we go through the argument if possible. */
2321 iter
= (tre_iteration_t
*)node
->obj
;
2322 if (iter
->arg
->nullable
)
2323 STACK_PUSHX(stack
, voidptr
, iter
->arg
);
2339 NFL_POST_CATENATION
,
2341 } tre_nfl_stack_symbol_t
;
2344 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2345 the nodes of the AST `tree'. */
2346 static reg_errcode_t
2347 tre_compute_nfl(tre_mem_t mem
, tre_stack_t
*stack
, tre_ast_node_t
*tree
)
2349 int bottom
= tre_stack_num_objects(stack
);
2351 STACK_PUSHR(stack
, voidptr
, tree
);
2352 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2354 while (tre_stack_num_objects(stack
) > bottom
)
2356 tre_nfl_stack_symbol_t symbol
;
2357 tre_ast_node_t
*node
;
2359 symbol
= (tre_nfl_stack_symbol_t
)tre_stack_pop_int(stack
);
2360 node
= tre_stack_pop_voidptr(stack
);
2368 tre_literal_t
*lit
= (tre_literal_t
*)node
->obj
;
2369 if (IS_BACKREF(lit
))
2371 /* Back references: nullable = false, firstpos = {i},
2374 node
->firstpos
= tre_set_one(mem
, lit
->position
, 0,
2375 TRE_CHAR_MAX
, NULL
, -1);
2376 if (!node
->firstpos
)
2378 node
->lastpos
= tre_set_one(mem
, lit
->position
, 0,
2380 (int)lit
->code_max
);
2384 else if (lit
->code_min
< 0)
2386 /* Tags, empty strings, params, and zero width assertions:
2387 nullable = true, firstpos = {}, and lastpos = {}. */
2389 node
->firstpos
= tre_set_empty(mem
);
2390 if (!node
->firstpos
)
2392 node
->lastpos
= tre_set_empty(mem
);
2398 /* Literal at position i: nullable = false, firstpos = {i},
2402 tre_set_one(mem
, lit
->position
, (int)lit
->code_min
,
2403 (int)lit
->code_max
, NULL
, -1);
2404 if (!node
->firstpos
)
2406 node
->lastpos
= tre_set_one(mem
, lit
->position
,
2409 lit
->u
.bracket_match_list
,
2418 /* Compute the attributes for the two subtrees, and after that
2420 STACK_PUSHR(stack
, voidptr
, node
);
2421 STACK_PUSHR(stack
, int, NFL_POST_UNION
);
2422 STACK_PUSHR(stack
, voidptr
, ((tre_union_t
*)node
->obj
)->right
);
2423 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2424 STACK_PUSHR(stack
, voidptr
, ((tre_union_t
*)node
->obj
)->left
);
2425 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2429 /* Compute the attributes for the two subtrees, and after that
2431 STACK_PUSHR(stack
, voidptr
, node
);
2432 STACK_PUSHR(stack
, int, NFL_POST_CATENATION
);
2433 STACK_PUSHR(stack
, voidptr
, ((tre_catenation_t
*)node
->obj
)->right
);
2434 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2435 STACK_PUSHR(stack
, voidptr
, ((tre_catenation_t
*)node
->obj
)->left
);
2436 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2440 /* Compute the attributes for the subtree, and after that for
2442 STACK_PUSHR(stack
, voidptr
, node
);
2443 STACK_PUSHR(stack
, int, NFL_POST_ITERATION
);
2444 STACK_PUSHR(stack
, voidptr
, ((tre_iteration_t
*)node
->obj
)->arg
);
2445 STACK_PUSHR(stack
, int, NFL_RECURSE
);
2448 break; /* end case: NFL_RECURSE */
2450 case NFL_POST_UNION
:
2452 tre_union_t
*uni
= (tre_union_t
*)node
->obj
;
2453 node
->nullable
= uni
->left
->nullable
|| uni
->right
->nullable
;
2454 node
->firstpos
= tre_set_union(mem
, uni
->left
->firstpos
,
2455 uni
->right
->firstpos
, NULL
, 0, NULL
);
2456 if (!node
->firstpos
)
2458 node
->lastpos
= tre_set_union(mem
, uni
->left
->lastpos
,
2459 uni
->right
->lastpos
, NULL
, 0, NULL
);
2465 case NFL_POST_ITERATION
:
2467 int num_tags
, *tags
, assertions
, params_seen
;
2469 reg_errcode_t status
;
2470 tre_iteration_t
*iter
= (tre_iteration_t
*)node
->obj
;
2472 /* From Ville Laurikari's original 2001 Master's thesis, the
2473 firstpos(n) and lastpos(n) of an iteration is just the
2474 corresponding values of the iteration's argument. Unfortunately,
2475 this isn't sufficient for the following BRE:
2477 \(a*\)*b\(\1\) matched against ab
2479 The backreference wants to force the first subexpression to
2480 be the empty string, but there is no transition for this. So
2481 we need to modify the lastpos(n) of an iteration to be the
2482 equivalent of that of catentation. Using the same notation as
2483 in the thesis, lastpos(n) is redefined as:
2485 if nullable(c1) then
2487 addtags(lastpos(c1),
2492 where c1 is the argument node. firstpos(n) remains the same. */
2494 /* Compute lastpos. */
2495 if (iter
->min
== 0 || iter
->arg
->nullable
)
2498 if (iter
->arg
->nullable
)
2500 /* The arg matches the empty string. Make a first pass
2501 with tre_match_empty() to get the number of tags and
2503 status
= tre_match_empty(stack
, iter
->arg
,
2504 NULL
, NULL
, NULL
, &num_tags
,
2506 if (status
!= REG_OK
)
2508 /* Allocate arrays for the tags and parameters. */
2509 tags
= xmalloc(sizeof(int) * (num_tags
+ 1));
2517 params
= tre_mem_alloc(mem
, sizeof(*params
)
2525 /* Second pass with tre_mach_empty() to get the list of
2526 tags and parameters. */
2527 status
= tre_match_empty(stack
, iter
->arg
, tags
,
2528 &assertions
, params
, NULL
, NULL
);
2529 if (status
!= REG_OK
)
2535 tre_set_union(mem
, iter
->arg
->lastpos
, iter
->arg
->lastpos
,
2536 tags
, assertions
, params
);
2542 node
->lastpos
= iter
->arg
->lastpos
;
2547 node
->lastpos
= iter
->arg
->lastpos
;
2549 node
->firstpos
= iter
->arg
->firstpos
;
2553 case NFL_POST_CATENATION
:
2555 int num_tags
, *tags
, assertions
, params_seen
;
2557 reg_errcode_t status
;
2558 tre_catenation_t
*cat
= node
->obj
;
2559 node
->nullable
= cat
->left
->nullable
&& cat
->right
->nullable
;
2561 /* Compute firstpos. */
2562 if (cat
->left
->nullable
)
2564 /* The left side matches the empty string. Make a first pass
2565 with tre_match_empty() to get the number of tags and
2567 status
= tre_match_empty(stack
, cat
->left
,
2568 NULL
, NULL
, NULL
, &num_tags
,
2570 if (status
!= REG_OK
)
2572 /* Allocate arrays for the tags and parameters. */
2573 tags
= xmalloc(sizeof(*tags
) * (num_tags
+ 1));
2581 params
= tre_mem_alloc(mem
, sizeof(*params
)
2589 /* Second pass with tre_mach_empty() to get the list of
2590 tags and parameters. */
2591 status
= tre_match_empty(stack
, cat
->left
, tags
,
2592 &assertions
, params
, NULL
, NULL
);
2593 if (status
!= REG_OK
)
2599 tre_set_union(mem
, cat
->right
->firstpos
, cat
->left
->firstpos
,
2600 tags
, assertions
, params
);
2602 if (!node
->firstpos
)
2607 node
->firstpos
= cat
->left
->firstpos
;
2610 /* Compute lastpos. */
2611 if (cat
->right
->nullable
)
2613 /* The right side matches the empty string. Make a first pass
2614 with tre_match_empty() to get the number of tags and
2616 status
= tre_match_empty(stack
, cat
->right
,
2617 NULL
, NULL
, NULL
, &num_tags
,
2619 if (status
!= REG_OK
)
2621 /* Allocate arrays for the tags and parameters. */
2622 tags
= xmalloc(sizeof(int) * (num_tags
+ 1));
2630 params
= tre_mem_alloc(mem
, sizeof(*params
)
2638 /* Second pass with tre_mach_empty() to get the list of
2639 tags and parameters. */
2640 status
= tre_match_empty(stack
, cat
->right
, tags
,
2641 &assertions
, params
, NULL
, NULL
);
2642 if (status
!= REG_OK
)
2648 tre_set_union(mem
, cat
->left
->lastpos
, cat
->right
->lastpos
,
2649 tags
, assertions
, params
);
2656 node
->lastpos
= cat
->right
->lastpos
;
2671 /* Adds a transition from each position in `p1' to each position in `p2'. */
2672 static reg_errcode_t
2673 tre_make_trans(tre_pos_and_tags_t
*p1
, tre_pos_and_tags_t
*p2
,
2674 tre_tnfa_transition_t
*transitions
,
2675 int *counts
, int *offs
)
2677 tre_pos_and_tags_t
*orig_p2
= p2
;
2678 tre_tnfa_transition_t
*trans
;
2679 int i
, j
, k
, l
, dup
, prev_p2_pos
;
2681 if (transitions
!= NULL
)
2682 while (p1
->position
>= 0)
2686 while (p2
->position
>= 0)
2688 /* Optimization: if this position was already handled, skip it. */
2689 if (p2
->position
== prev_p2_pos
)
2694 prev_p2_pos
= p2
->position
;
2695 /* Set `trans' to point to the next unused transition from
2696 position `p1->position'. */
2697 trans
= transitions
+ offs
[p1
->position
];
2698 while (trans
->state
!= NULL
)
2701 /* If we find a previous transition from `p1->position' to
2702 `p2->position', it is overwritten. This can happen only
2703 if there are nested loops in the regexp, like in "((a)*)*".
2704 In POSIX.2 repetition using the outer loop is always
2705 preferred over using the inner loop. Therefore the
2706 transition for the inner loop is useless and can be thrown
2708 /* XXX - The same position is used for all nodes in a bracket
2709 expression, so this optimization cannot be used (it will
2710 break bracket expressions) unless I figure out a way to
2712 if (trans
->state_id
== p2
->position
)
2721 if (trans
->state
== NULL
)
2722 (trans
+ 1)->state
= NULL
;
2723 /* Use the character ranges, assertions, etc. from `p1' for
2724 the transition from `p1' to `p2'. */
2725 trans
->code_min
= p1
->code_min
;
2726 trans
->code_max
= p1
->code_max
;
2727 trans
->state
= transitions
+ offs
[p2
->position
];
2728 trans
->state_id
= p2
->position
;
2729 trans
->assertions
= p1
->assertions
| p2
->assertions
2730 | (p1
->bracket_match_list
!= NULL
? ASSERT_BRACKET_MATCH
: 0);
2731 if (p1
->backref
>= 0)
2733 assert((trans
->assertions
& ASSERT_BRACKET_MATCH
) == 0);
2734 assert(p2
->backref
< 0);
2735 trans
->u
.backref
= p1
->backref
;
2736 trans
->assertions
|= ASSERT_BACKREF
;
2738 if (p1
->bracket_match_list
!= NULL
)
2740 trans
->u
.bracket_match_list
=
2741 xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1
->bracket_match_list
));
2742 if (trans
->u
.bracket_match_list
== NULL
)
2744 memcpy(trans
->u
.bracket_match_list
, p1
->bracket_match_list
,
2745 SIZEOF_BRACKET_MATCH_LIST(p1
->bracket_match_list
));
2748 /* Find out how many tags this transition has. */
2750 if (p1
->tags
!= NULL
)
2751 while(p1
->tags
[i
] >= 0)
2754 if (p2
->tags
!= NULL
)
2755 while(p2
->tags
[j
] >= 0)
2758 /* If we are overwriting a transition, free the old tag array. */
2759 if (trans
->tags
!= NULL
)
2763 /* If there were any tags, allocate an array and fill it. */
2766 trans
->tags
= xmalloc(sizeof(*trans
->tags
) * (i
+ j
+ 1));
2770 if (p1
->tags
!= NULL
)
2771 while(p1
->tags
[i
] >= 0)
2773 trans
->tags
[i
] = p1
->tags
[i
];
2778 if (p2
->tags
!= NULL
)
2779 while (p2
->tags
[j
] >= 0)
2781 /* Don't add duplicates. */
2783 for (k
= 0; k
< i
; k
++)
2784 if (trans
->tags
[k
] == p2
->tags
[j
])
2790 trans
->tags
[l
++] = p2
->tags
[j
];
2793 trans
->tags
[l
] = -1;
2796 /* Set the parameter array. If both `p2' and `p1' have same
2797 parameters, the values in `p2' override those in `p1'. */
2798 if (p1
->params
|| p2
->params
)
2801 trans
->params
= xmalloc(sizeof(*trans
->params
)
2805 for (i
= 0; i
< TRE_PARAM_LAST
; i
++)
2807 trans
->params
[i
] = TRE_PARAM_UNSET
;
2808 if (p1
->params
&& p1
->params
[i
] != TRE_PARAM_UNSET
)
2809 trans
->params
[i
] = p1
->params
[i
];
2810 if (p2
->params
&& p2
->params
[i
] != TRE_PARAM_UNSET
)
2811 trans
->params
[i
] = p2
->params
[i
];
2817 xfree(trans
->params
);
2818 trans
->params
= NULL
;
2826 DPRINT((" %2d -> %2d on %3d", p1
->position
, p2
->position
,
2828 if (p1
->code_max
!= p1
->code_min
)
2829 DPRINT(("-%3d", p1
->code_max
));
2833 DPRINT((", tags ["));
2836 DPRINT(("%d", *tags
));
2843 if (trans
->assertions
)
2844 DPRINT((", assert %d", trans
->assertions
));
2845 if (trans
->assertions
& ASSERT_BACKREF
)
2846 DPRINT((", backref %d", trans
->u
.backref
));
2847 else if (trans
->assertions
& ASSERT_BRACKET_MATCH
)
2848 DPRINT((", bracket_match_list %p",
2849 trans
->u
.bracket_match_list
));
2853 tre_print_params(trans
->params
);
2857 #endif /* TRE_DEBUG */
2863 /* Compute a maximum limit for the number of transitions leaving
2865 while (p1
->position
>= 0)
2868 while (p2
->position
>= 0)
2870 counts
[p1
->position
]++;
2878 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2879 labelled with one character range (there are no transitions on empty
2880 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2882 static reg_errcode_t
2883 tre_ast_to_tnfa(tre_ast_node_t
*node
, tre_tnfa_transition_t
*transitions
,
2884 int *counts
, int *offs
)
2887 tre_catenation_t
*cat
;
2888 tre_iteration_t
*iter
;
2889 reg_errcode_t errcode
= REG_OK
;
2891 /* XXX - recurse using a stack!. */
2897 uni
= (tre_union_t
*)node
->obj
;
2898 errcode
= tre_ast_to_tnfa(uni
->left
, transitions
, counts
, offs
);
2899 if (errcode
!= REG_OK
)
2901 errcode
= tre_ast_to_tnfa(uni
->right
, transitions
, counts
, offs
);
2905 cat
= (tre_catenation_t
*)node
->obj
;
2906 /* Add a transition from each position in cat->left->lastpos
2907 to each position in cat->right->firstpos. */
2908 errcode
= tre_make_trans(cat
->left
->lastpos
, cat
->right
->firstpos
,
2909 transitions
, counts
, offs
);
2910 if (errcode
!= REG_OK
)
2912 errcode
= tre_ast_to_tnfa(cat
->left
, transitions
, counts
, offs
);
2913 if (errcode
!= REG_OK
)
2915 errcode
= tre_ast_to_tnfa(cat
->right
, transitions
, counts
, offs
);
2919 iter
= (tre_iteration_t
*)node
->obj
;
2920 assert(iter
->max
== -1 || iter
->max
== 1);
2922 if (iter
->max
== -1)
2924 assert(iter
->min
== 0 || iter
->min
== 1);
2925 /* Add a transition from each last position in the iterated
2926 expression to each first position. */
2927 errcode
= tre_make_trans(iter
->arg
->lastpos
, iter
->arg
->firstpos
,
2928 transitions
, counts
, offs
);
2929 if (errcode
!= REG_OK
)
2932 errcode
= tre_ast_to_tnfa(iter
->arg
, transitions
, counts
, offs
);
2939 #define ERROR_EXIT(err) \
2943 if (/*CONSTCOND*/1) \
2946 while (/*CONSTCOND*/0)
2950 tre_compile(regex_t
*preg
, const tre_char_t
*regex
, size_t n
, int cflags
,
2954 tre_ast_node_t
*tree
, *tmp_ast_l
, *tmp_ast_r
;
2955 tre_pos_and_tags_t
*p
;
2956 int *counts
= NULL
, *offs
= NULL
;
2958 tre_tnfa_transition_t
*transitions
, *initial
;
2959 tre_tnfa_t
*tnfa
= NULL
;
2960 tre_submatch_data_t
*submatch_data
= NULL
;
2961 tre_tag_direction_t
*tag_directions
= NULL
;
2962 reg_errcode_t errcode
;
2965 /* Parse context. */
2966 tre_parse_ctx_t parse_ctx
;
2968 /* Allocate a stack used throughout the compilation process for various
2970 stack
= tre_stack_new(512, 10240, 128);
2973 /* Allocate a fast memory allocator. */
2974 mem
= tre_mem_new();
2977 tre_stack_destroy(stack
);
2981 /* Parse the regexp. */
2982 memset(&parse_ctx
, 0, sizeof(parse_ctx
));
2983 parse_ctx
.mem
= mem
;
2984 parse_ctx
.stack
= stack
;
2985 parse_ctx
.re
= regex
;
2987 /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2989 if ((cflags
& (REG_ENHANCED
| REG_EXTENDED
)) != (REG_ENHANCED
| REG_EXTENDED
))
2990 cflags
&= ~REG_UNGREEDY
;
2991 parse_ctx
.cflags
= cflags
;
2992 parse_ctx
.max_backref
= -1;
2993 parse_ctx
.loc
= loc
;
2994 parse_ctx
.submatch_id_invisible
= SUBMATCH_ID_INVISIBLE_START
;
2996 DPRINT(("tre_compile: parsing '%.*" STRF
"'\n", (int)n
, regex
));
2997 errcode
= tre_parse(&parse_ctx
);
2998 if (errcode
!= REG_OK
)
2999 ERROR_EXIT(errcode
);
3000 preg
->re_nsub
= parse_ctx
.submatch_id
- 1;
3001 tree
= parse_ctx
.result
;
3003 /* Back references and approximate matching cannot currently be used
3004 in the same regexp. */
3005 if (parse_ctx
.max_backref
>= 0 && parse_ctx
.have_approx
)
3006 ERROR_EXIT(REG_BADPAT
);
3009 tre_ast_print(tree
);
3010 #endif /* TRE_DEBUG */
3012 /* Referring to nonexistent subexpressions is illegal. */
3013 if (parse_ctx
.max_backref
> (int)preg
->re_nsub
)
3014 ERROR_EXIT(REG_ESUBREG
);
3016 /* Allocate the TNFA struct. */
3017 tnfa
= xcalloc(1, sizeof(tre_tnfa_t
));
3019 ERROR_EXIT(REG_ESPACE
);
3020 tnfa
->have_backrefs
= parse_ctx
.max_backref
>= 0;
3021 tnfa
->have_approx
= parse_ctx
.have_approx
;
3022 tnfa
->num_submatches
= parse_ctx
.submatch_id
;
3023 tnfa
->num_submatches_invisible
= parse_ctx
.submatch_id_invisible
3024 - SUBMATCH_ID_INVISIBLE_START
;
3025 tnfa
->num_reorder_tags
= parse_ctx
.num_reorder_tags
;
3026 tnfa
->loc
= parse_ctx
.loc
;
3028 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
3029 regexp does not have back references, this can be skipped. */
3030 if (tnfa
->num_reorder_tags
> 0 || !(cflags
& REG_NOSUB
))
3032 DPRINT(("tre_compile: setting up tags\n"));
3034 /* Figure out how many tags we will need. */
3035 errcode
= tre_add_tags(NULL
, stack
, tree
, tnfa
);
3036 if (errcode
!= REG_OK
)
3037 ERROR_EXIT(errcode
);
3039 tre_ast_print(tree
);
3040 #endif /* TRE_DEBUG */
3042 if (tnfa
->num_tags
> 0)
3044 tag_directions
= xmalloc(sizeof(*tag_directions
)
3045 * (tnfa
->num_tags
+ 1));
3046 if (tag_directions
== NULL
)
3047 ERROR_EXIT(REG_ESPACE
);
3048 tnfa
->tag_directions
= tag_directions
;
3049 memset(tag_directions
, -1,
3050 sizeof(*tag_directions
) * (tnfa
->num_tags
+ 1));
3052 tnfa
->minimal_tags
= xcalloc((unsigned)tnfa
->num_tags
* 2 + 3,
3053 sizeof(*tnfa
->minimal_tags
));
3054 if (tnfa
->minimal_tags
== NULL
)
3055 ERROR_EXIT(REG_ESPACE
);
3057 submatch_data
= xcalloc((unsigned)parse_ctx
.submatch_id
,
3058 sizeof(*submatch_data
));
3059 if (submatch_data
== NULL
)
3060 ERROR_EXIT(REG_ESPACE
);
3061 /* Set the eo_tag value to -1 to indicate that that corresponding
3062 * submatch has not be completed yet */
3063 for (i
= 0; i
< parse_ctx
.submatch_id
; i
++)
3065 submatch_data
[i
].eo_tag
= -1;
3067 tnfa
->submatch_data
= submatch_data
;
3069 errcode
= tre_add_tags(mem
, stack
, tree
, tnfa
);
3070 if (errcode
!= REG_OK
)
3071 ERROR_EXIT(errcode
);
3074 for (i
= 0; i
< parse_ctx
.submatch_id
; i
++)
3075 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3076 i
, submatch_data
[i
].so_tag
, submatch_data
[i
].eo_tag
));
3077 for (i
= 0; i
<= tnfa
->num_tags
; i
++)
3078 DPRINT(("t%d is %s\n", i
, tag_dir_str
[tag_directions
[i
]]));
3079 #endif /* TRE_DEBUG */
3082 /* Expand iteration nodes. */
3083 errcode
= tre_expand_ast(mem
, stack
, tree
, &parse_ctx
.position
,
3084 tag_directions
, &tnfa
->params_depth
);
3085 if (errcode
!= REG_OK
)
3086 ERROR_EXIT(errcode
);
3088 /* Add a dummy node for the final state.
3089 XXX - For certain patterns this dummy node can be optimized away,
3090 for example "a*" or "ab*". Figure out a simple way to detect
3091 this possibility. */
3093 tmp_ast_r
= tre_ast_new_literal(mem
, 0, 0, parse_ctx
.position
++);
3094 if (tmp_ast_r
== NULL
)
3095 ERROR_EXIT(REG_ESPACE
);
3097 tree
= tre_ast_new_catenation(mem
, tmp_ast_l
, tmp_ast_r
);
3099 ERROR_EXIT(REG_ESPACE
);
3102 tre_ast_print(tree
);
3103 DPRINT(("Number of states: %d\n", parse_ctx
.position
));
3105 for (i
= 0; i
< parse_ctx
.submatch_id
; i
++)
3106 DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3107 i
, submatch_data
[i
].so_tag
, submatch_data
[i
].eo_tag
));
3109 for (i
= 0; i
<= tnfa
->num_tags
; i
++)
3110 DPRINT(("t%d is %s\n", i
, tag_dir_str
[tag_directions
[i
]]));
3111 #endif /* TRE_DEBUG */
3113 errcode
= tre_compute_nfl(mem
, stack
, tree
);
3114 if (errcode
!= REG_OK
)
3115 ERROR_EXIT(errcode
);
3117 counts
= xmalloc(sizeof(int) * parse_ctx
.position
);
3119 ERROR_EXIT(REG_ESPACE
);
3121 offs
= xmalloc(sizeof(int) * parse_ctx
.position
);
3123 ERROR_EXIT(REG_ESPACE
);
3125 for (i
= 0; i
< parse_ctx
.position
; i
++)
3127 tre_ast_to_tnfa(tree
, NULL
, counts
, NULL
);
3130 for (i
= 0; i
< parse_ctx
.position
; i
++)
3133 add
+= counts
[i
] + 1;
3136 transitions
= xcalloc((unsigned)add
+ 1, sizeof(*transitions
));
3137 if (transitions
== NULL
)
3138 ERROR_EXIT(REG_ESPACE
);
3139 tnfa
->transitions
= transitions
;
3140 tnfa
->num_transitions
= add
;
3142 DPRINT(("Converting to TNFA:\n"));
3143 errcode
= tre_ast_to_tnfa(tree
, transitions
, counts
, offs
);
3144 if (errcode
!= REG_OK
)
3145 ERROR_EXIT(errcode
);
3147 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3148 /* If in eight bit mode, compute a table of characters that can be the
3149 first character of a match. */
3150 tnfa
->first_char
= -1;
3151 if (TRE_MB_CUR_MAX_L(tnfa
->loc
) == 1 && !tmp_ast_l
->nullable
)
3155 DPRINT(("Characters that can start a match:"));
3156 tnfa
->firstpos_chars
= xcalloc(256, sizeof(char));
3157 if (tnfa
->firstpos_chars
== NULL
)
3158 ERROR_EXIT(REG_ESPACE
);
3159 for (p
= tree
->firstpos
; p
->position
>= 0; p
++)
3161 tre_tnfa_transition_t
*j
= transitions
+ offs
[p
->position
];
3162 while (j
->state
!= NULL
)
3164 for (k
= j
->code_min
; k
<= j
->code_max
&& k
< 256; k
++)
3167 tnfa
->firstpos_chars
[k
] = 1;
3174 #define TRE_OPTIMIZE_FIRST_CHAR 1
3175 #if TRE_OPTIMIZE_FIRST_CHAR
3178 for (k
= 0; k
< 256; k
++)
3179 if (tnfa
->firstpos_chars
[k
])
3181 DPRINT(("first char must be %d\n", k
));
3182 tnfa
->first_char
= k
;
3183 xfree(tnfa
->firstpos_chars
);
3184 tnfa
->firstpos_chars
= NULL
;
3192 tnfa
->firstpos_chars
= NULL
;
3193 #else /* !USE_FIRSTPOS_CHARS */
3195 /* Set first_char only if there is only one character that can be the
3196 first character of a match */
3197 tnfa
->first_char
= -1;
3198 if (!tmp_ast_l
->nullable
)
3201 for (p
= tree
->firstpos
; scanning
&& p
->position
>= 0; p
++)
3203 tre_tnfa_transition_t
*j
= transitions
+ offs
[p
->position
];
3204 while (j
->state
!= NULL
)
3206 if (j
->code_min
<= j
->code_max
)
3208 if (j
->code_max
!= j
->code_min
|| j
->code_min
== -1 || tnfa
->first_char
!= -1)
3210 tnfa
->first_char
= -1;
3214 tnfa
->first_char
= j
->code_min
;
3220 if (tnfa
->first_char
>= 0)
3221 DPRINT(("first char must be %d\n", tnfa
->first_char
));
3222 #endif /* TRE_DEBUG */
3224 #endif /* !USE_FIRSTPOS_CHARS */
3228 while (p
->position
>= 0)
3235 DPRINT(("initial: %d", p
->position
));
3243 DPRINT(("%d", *tags
));
3249 DPRINT((", assert %d", p
->assertions
));
3253 tre_print_params(p
->params
);
3257 #endif /* TRE_DEBUG */
3262 initial
= xcalloc((unsigned)i
+ 1, sizeof(tre_tnfa_transition_t
));
3263 if (initial
== NULL
)
3264 ERROR_EXIT(REG_ESPACE
);
3265 tnfa
->initial
= initial
;
3268 for (p
= tree
->firstpos
; p
->position
>= 0; p
++)
3270 initial
[i
].state
= transitions
+ offs
[p
->position
];
3271 initial
[i
].state_id
= p
->position
;
3272 initial
[i
].tags
= NULL
;
3273 /* Copy the arrays p->tags, and p->params, they are allocated
3274 from a tre_mem object. */
3278 for (j
= 0; p
->tags
[j
] >= 0; j
++);
3279 initial
[i
].tags
= xmalloc(sizeof(*p
->tags
) * (j
+ 1));
3280 if (!initial
[i
].tags
)
3281 ERROR_EXIT(REG_ESPACE
);
3282 memcpy(initial
[i
].tags
, p
->tags
, sizeof(*p
->tags
) * (j
+ 1));
3284 initial
[i
].params
= NULL
;
3287 initial
[i
].params
= xmalloc(sizeof(*p
->params
) * TRE_PARAM_LAST
);
3288 if (!initial
[i
].params
)
3289 ERROR_EXIT(REG_ESPACE
);
3290 memcpy(initial
[i
].params
, p
->params
,
3291 sizeof(*p
->params
) * TRE_PARAM_LAST
);
3293 initial
[i
].assertions
= p
->assertions
;
3296 initial
[i
].state
= NULL
;
3298 tnfa
->num_transitions
= add
;
3299 tnfa
->final
= transitions
+ offs
[tree
->lastpos
[0].position
];
3300 tnfa
->num_states
= parse_ctx
.position
;
3301 tnfa
->cflags
= cflags
;
3303 DPRINT(("final state %d (%p)\n", tree
->lastpos
[0].position
,
3304 (void *)tnfa
->final
));
3306 tre_mem_destroy(mem
);
3307 tre_stack_destroy(stack
);
3311 #ifdef TRE_USE_SYSTEM_REGEX_H
3312 preg
->re_magic
= RE_MAGIC
;
3313 #endif /* TRE_USE_SYSTEM_REGEX_H */
3314 preg
->TRE_REGEX_T_FIELD
= (void *)tnfa
;
3316 /* In Libc, we need to retain the locale. Outside Libc, we already called
3317 duplocale() which does the retaining. */
3318 XL_RETAIN(tnfa
->loc
);
3319 #endif /* __LIBC__ */
3323 /* Free everything that was allocated and return the error code. */
3324 tre_mem_destroy(mem
);
3326 tre_stack_destroy(stack
);
3332 /* Set tnfa into preg, so that calling tre_free() will free the contents
3333 of tnfa. But in Libc, NULL out the loc field since we never retained
3334 the locale. Outside Libc, we let tre_free() call freelocale(). */
3335 preg
->TRE_REGEX_T_FIELD
= (void *)tnfa
;
3337 if(tnfa
) tnfa
->loc
= NULL
;
3338 #endif /* __LIBC__ */
3348 tre_free(regex_t
*preg
)
3352 tre_tnfa_transition_t
*trans
;
3354 #ifdef TRE_USE_SYSTEM_REGEX_H
3356 #endif /* TRE_USE_SYSTEM_REGEX_H */
3357 tnfa
= (void *)preg
->TRE_REGEX_T_FIELD
;
3360 preg
->TRE_REGEX_T_FIELD
= NULL
;
3362 for (i
= 0; i
< tnfa
->num_transitions
; i
++)
3363 if (tnfa
->transitions
[i
].state
)
3365 if (tnfa
->transitions
[i
].tags
)
3366 xfree(tnfa
->transitions
[i
].tags
);
3367 if (tnfa
->transitions
[i
].assertions
& ASSERT_BRACKET_MATCH
)
3368 xfree(tnfa
->transitions
[i
].u
.bracket_match_list
);
3369 if (tnfa
->transitions
[i
].params
)
3370 xfree(tnfa
->transitions
[i
].params
);
3372 if (tnfa
->transitions
)
3373 xfree(tnfa
->transitions
);
3377 for (trans
= tnfa
->initial
; trans
->state
; trans
++)
3382 xfree(trans
->params
);
3384 xfree(tnfa
->initial
);
3387 if (tnfa
->submatch_data
)
3389 xfree(tnfa
->submatch_data
);
3392 if (tnfa
->tag_directions
)
3393 xfree(tnfa
->tag_directions
);
3394 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3395 if (tnfa
->firstpos_chars
)
3396 xfree(tnfa
->firstpos_chars
);
3397 #endif /* USE_FIRSTPOS_CHARS */
3398 if (tnfa
->minimal_tags
)
3399 xfree(tnfa
->minimal_tags
);
3403 XL_RELEASE(tnfa
->loc
);
3404 #else /* !__LIBC__ */
3405 freelocale(tnfa
->loc
);
3406 #endif /* !__LIBC__ */
3408 if (tnfa
->last_matched_branch
)
3409 xfree(tnfa
->last_matched_branch
);
3418 static char str
[256];
3423 (void) tre_config(TRE_CONFIG_VERSION
, &version
);
3424 (void) snprintf(str
, sizeof(str
), "TRE %s (BSD)", version
);
3430 tre_config(int query
, void *result
)
3432 int *int_result
= result
;
3433 const char **string_result
= result
;
3437 case TRE_CONFIG_APPROX
:
3440 #else /* !TRE_APPROX */
3442 #endif /* !TRE_APPROX */
3445 case TRE_CONFIG_WCHAR
:
3448 #else /* !TRE_WCHAR */
3450 #endif /* !TRE_WCHAR */
3453 case TRE_CONFIG_MULTIBYTE
:
3454 #ifdef TRE_MULTIBYTE
3456 #else /* !TRE_MULTIBYTE */
3458 #endif /* !TRE_MULTIBYTE */
3461 case TRE_CONFIG_SYSTEM_ABI
:
3462 #ifdef TRE_CONFIG_SYSTEM_ABI
3464 #else /* !TRE_CONFIG_SYSTEM_ABI */
3466 #endif /* !TRE_CONFIG_SYSTEM_ABI */
3469 case TRE_CONFIG_VERSION
:
3470 *string_result
= TRE_VERSION
;
3476 #endif /* !__LIBC__ */