]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | tre-ast.h - Abstract syntax tree (AST) definitions | |
3 | ||
4 | This software is released under a BSD-style license. | |
5 | See the file LICENSE for details and copyright. | |
6 | ||
7 | */ | |
8 | ||
9 | ||
10 | #ifndef TRE_AST_H | |
11 | #define TRE_AST_H 1 | |
12 | ||
13 | #include <limits.h> | |
14 | ||
15 | #include "tre-mem.h" | |
16 | #include "tre-internal.h" | |
17 | #include "tre-compile.h" | |
18 | #include "tre-last-matched.h" | |
19 | ||
20 | /* The different AST node types. */ | |
21 | typedef enum { | |
22 | LITERAL, | |
23 | CATENATION, | |
24 | ITERATION, | |
25 | UNION | |
26 | } tre_ast_type_t; | |
27 | ||
28 | /* Special subtypes of TRE_LITERAL. */ | |
29 | #define EMPTY -1 /* Empty leaf (denotes empty string). */ | |
30 | #define ASSERTION -2 /* Assertion leaf. */ | |
31 | #define TAG -3 /* Tag leaf. */ | |
32 | #define BACKREF -4 /* Back reference leaf. */ | |
33 | #define PARAMETER -5 /* Parameter. */ | |
34 | ||
35 | #define IS_SPECIAL(x) ((x)->code_min < 0) | |
36 | #define IS_EMPTY(x) ((x)->code_min == EMPTY) | |
37 | #define IS_ASSERTION(x) ((x)->code_min == ASSERTION) | |
38 | #define IS_TAG(x) ((x)->code_min == TAG) | |
39 | #define IS_BACKREF(x) ((x)->code_min == BACKREF) | |
40 | #define IS_PARAMETER(x) ((x)->code_min == PARAMETER) | |
41 | ||
42 | #define SUBMATCH_ID_INVISIBLE_START (INT_MAX / 2 + 1) | |
43 | ||
44 | ||
45 | /* A generic AST node. All AST nodes consist of this node on the top | |
46 | level with `obj' pointing to the actual content. */ | |
47 | typedef struct _tre_ast_node { | |
48 | void *obj; /* Pointer to actual node. */ | |
49 | tre_last_matched_branch_pre_t *last_matched_branch; | |
50 | tre_last_matched_pre_t *last_matched_in_progress; | |
51 | tre_pos_and_tags_t *firstpos; | |
52 | tre_pos_and_tags_t *lastpos; | |
53 | /* The original pointer is used to point to the original node, when a | |
54 | * CATENATION and tag are added. If NULL, this is node is the original */ | |
55 | struct _tre_ast_node *original; | |
56 | tre_ast_type_t type; /* Type of the node. */ | |
57 | int submatch_id; | |
58 | int num_submatches; | |
59 | int num_tags; | |
60 | short nullable; | |
61 | short make_branches; | |
62 | } tre_ast_node_t; | |
63 | ||
64 | ||
65 | /* A "literal" node. These are created for assertions, back references, | |
66 | tags, matching parameter settings, and all expressions that match one | |
67 | character. */ | |
68 | typedef struct { | |
69 | tre_cint_t code_min; | |
70 | tre_cint_t code_max; | |
71 | int position; | |
72 | union { | |
73 | tre_bracket_match_list_t *bracket_match_list; | |
74 | int *params; | |
75 | } u; | |
76 | } tre_literal_t; | |
77 | ||
78 | /* A "catenation" node. These are created when two regexps are concatenated. | |
79 | If there are more than one subexpressions in sequence, the `left' part | |
80 | holds all but the last, and `right' part holds the last subexpression | |
81 | (catenation is left associative). */ | |
82 | typedef struct { | |
83 | tre_ast_node_t *left; | |
84 | tre_ast_node_t *right; | |
85 | } tre_catenation_t; | |
86 | ||
87 | /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" | |
88 | operators. */ | |
89 | typedef struct { | |
90 | /* Subexpression to match. */ | |
91 | tre_ast_node_t *arg; | |
92 | /* Minimum number of consecutive matches. */ | |
93 | int min; | |
94 | /* Maximum number of consecutive matches. */ | |
95 | int max; | |
96 | /* If 0, match as many characters as possible, if 1 match as few as | |
97 | possible. Note that this does not always mean the same thing as | |
98 | matching as many/few repetitions as possible. */ | |
99 | unsigned int minimal:1; | |
100 | /* Approximate matching parameters (or NULL). */ | |
101 | int *params; | |
102 | } tre_iteration_t; | |
103 | ||
104 | /* An "union" node. These are created for the "|" operator. */ | |
105 | typedef struct { | |
106 | tre_ast_node_t *left; | |
107 | tre_ast_node_t *right; | |
108 | /* The left end right end tags if non-zero */ | |
109 | int left_tag; | |
110 | int right_tag; | |
111 | } tre_union_t; | |
112 | ||
113 | __private_extern__ tre_ast_node_t * | |
114 | tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size); | |
115 | ||
116 | __private_extern__ tre_ast_node_t * | |
117 | tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position); | |
118 | ||
119 | __private_extern__ tre_ast_node_t * | |
120 | tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, | |
121 | int minimal); | |
122 | ||
123 | __private_extern__ tre_ast_node_t * | |
124 | tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right); | |
125 | ||
126 | __private_extern__ tre_ast_node_t * | |
127 | tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, | |
128 | tre_ast_node_t *right); | |
129 | ||
130 | #ifdef TRE_DEBUG | |
131 | void | |
132 | tre_ast_print(tre_ast_node_t *tree); | |
133 | ||
134 | /* XXX - rethink AST printing API */ | |
135 | void | |
136 | tre_print_params(int *params); | |
137 | #endif /* TRE_DEBUG */ | |
138 | ||
139 | #endif /* TRE_AST_H */ | |
140 | ||
141 | /* EOF */ |