]>
Commit | Line | Data |
---|---|---|
ad3c9f2a A |
1 | /* |
2 | tre-internal.h - TRE internal definitions | |
3 | ||
4 | This software is released under a BSD-style license. | |
5 | See the file LICENSE for details and copyright. | |
6 | ||
7 | */ | |
8 | ||
9 | #ifndef TRE_INTERNAL_H | |
10 | #define TRE_INTERNAL_H 1 | |
11 | ||
12 | #ifdef HAVE_WCHAR_H | |
13 | #include <wchar.h> | |
14 | #endif /* HAVE_WCHAR_H */ | |
15 | ||
16 | #ifdef HAVE_WCTYPE_H | |
17 | #include <wctype.h> | |
18 | #endif /* !HAVE_WCTYPE_H */ | |
19 | ||
20 | #include <ctype.h> | |
21 | ||
22 | #ifdef __LIBC__ | |
23 | #include <xlocale_private.h> | |
24 | #else /* !__LIBC__ */ | |
25 | #include <xlocale.h> | |
26 | #endif /* !__LIBC__ */ | |
27 | ||
28 | #include "tre.h" | |
29 | #include "tre-last-matched.h" | |
30 | ||
31 | #ifdef TRE_DEBUG | |
32 | #include <stdio.h> | |
33 | #define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0) | |
34 | #else /* !TRE_DEBUG */ | |
35 | #define DPRINT(msg) do { } while(/*CONSTCOND*/0) | |
36 | #endif /* !TRE_DEBUG */ | |
37 | ||
38 | #define elementsof(x) ( sizeof(x) / sizeof(x[0]) ) | |
39 | ||
40 | #ifdef HAVE_MBRTOWC | |
41 | #define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps))) | |
42 | /* xlocale */ | |
43 | #define tre_mbrtowc_l(pwc, s, n, ps, l) (mbrtowc_l((pwc), (s), (n), (ps), (l))) | |
44 | #else /* !HAVE_MBRTOWC */ | |
45 | #ifdef HAVE_MBTOWC | |
46 | #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n))) | |
47 | #endif /* HAVE_MBTOWC */ | |
48 | #endif /* !HAVE_MBRTOWC */ | |
49 | ||
50 | #ifdef TRE_MULTIBYTE | |
51 | #ifdef HAVE_MBSTATE_T | |
52 | #define TRE_MBSTATE | |
53 | #endif /* TRE_MULTIBYTE */ | |
54 | #endif /* HAVE_MBSTATE_T */ | |
55 | ||
56 | /* Define the character types and functions. */ | |
57 | #ifdef TRE_WCHAR | |
58 | ||
59 | /* Wide characters. */ | |
60 | typedef wint_t tre_cint_t; | |
61 | #define TRE_CHAR_MAX WCHAR_MAX | |
62 | ||
63 | #ifdef TRE_MULTIBYTE | |
64 | #define TRE_MB_CUR_MAX MB_CUR_MAX | |
65 | /* xlocale */ | |
66 | #define TRE_MB_CUR_MAX_L MB_CUR_MAX_L | |
67 | #else /* !TRE_MULTIBYTE */ | |
68 | #define TRE_MB_CUR_MAX 1 | |
69 | #endif /* !TRE_MULTIBYTE */ | |
70 | ||
71 | #define tre_isalnum iswalnum | |
72 | #define tre_isalpha iswalpha | |
73 | #ifdef HAVE_ISWBLANK | |
74 | #define tre_isblank iswblank | |
75 | #endif /* HAVE_ISWBLANK */ | |
76 | #define tre_iscntrl iswcntrl | |
77 | #define tre_isdigit iswdigit | |
78 | #define tre_isgraph iswgraph | |
79 | #define tre_islower iswlower | |
80 | #define tre_isprint iswprint | |
81 | #define tre_ispunct iswpunct | |
82 | #define tre_isspace iswspace | |
83 | #define tre_isupper iswupper | |
84 | #define tre_isxdigit iswxdigit | |
85 | ||
86 | #define tre_tolower towlower | |
87 | #define tre_toupper towupper | |
88 | #define tre_strlen wcslen | |
89 | ||
90 | /* xlocale */ | |
91 | #define tre_isalnum_l iswalnum_l | |
92 | #define tre_isdigit_l iswdigit_l | |
93 | #define tre_islower_l iswlower_l | |
94 | #define tre_isupper_l iswupper_l | |
95 | #define tre_isxdigit_l iswxdigit_l | |
96 | #define tre_tolower_l towlower_l | |
97 | #define tre_toupper_l towupper_l | |
98 | ||
99 | #else /* !TRE_WCHAR */ | |
100 | ||
101 | /* 8 bit characters. */ | |
102 | typedef short tre_cint_t; | |
103 | #define TRE_CHAR_MAX 255 | |
104 | #define TRE_MB_CUR_MAX 1 | |
105 | ||
106 | #define tre_isalnum isalnum | |
107 | #define tre_isalpha isalpha | |
108 | #ifdef HAVE_ISASCII | |
109 | #define tre_isascii isascii | |
110 | #endif /* HAVE_ISASCII */ | |
111 | #ifdef HAVE_ISBLANK | |
112 | #define tre_isblank isblank | |
113 | #endif /* HAVE_ISBLANK */ | |
114 | #define tre_iscntrl iscntrl | |
115 | #define tre_isdigit isdigit | |
116 | #define tre_isgraph isgraph | |
117 | #define tre_islower islower | |
118 | #define tre_isprint isprint | |
119 | #define tre_ispunct ispunct | |
120 | #define tre_isspace isspace | |
121 | #define tre_isupper isupper | |
122 | #define tre_isxdigit isxdigit | |
123 | ||
124 | #define tre_tolower(c) (tre_cint_t)(tolower(c)) | |
125 | #define tre_toupper(c) (tre_cint_t)(toupper(c)) | |
126 | #define tre_strlen(s) (strlen((const char*)s)) | |
127 | ||
128 | #endif /* !TRE_WCHAR */ | |
129 | ||
130 | #if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE) | |
131 | #define TRE_USE_SYSTEM_WCTYPE 1 | |
132 | #endif | |
133 | ||
134 | #ifdef TRE_USE_SYSTEM_WCTYPE | |
135 | /* Use system provided iswctype() and wctype(). */ | |
136 | typedef wctype_t tre_ctype_t; | |
137 | #define tre_isctype iswctype | |
138 | #define tre_ctype wctype | |
139 | ||
140 | /* xlocale */ | |
141 | #define tre_isctype_l iswctype_l | |
142 | #define tre_ctype_l wctype_l | |
143 | ||
144 | #else /* !TRE_USE_SYSTEM_WCTYPE */ | |
145 | /* Define our own versions of iswctype() and wctype(). */ | |
146 | typedef int (*tre_ctype_t)(tre_cint_t); | |
147 | #define tre_isctype(c, type) ( (type)(c) ) | |
148 | tre_ctype_t tre_ctype(const char *name); | |
149 | #endif /* !TRE_USE_SYSTEM_WCTYPE */ | |
150 | ||
151 | typedef enum { STR_WIDE, STR_BYTE, STR_MBS, | |
152 | #ifdef TRE_STR_USER | |
153 | STR_USER | |
154 | #endif /* TRE_STR_USER */ | |
155 | } tre_str_type_t; | |
156 | ||
157 | /* Returns number of bytes to add to (char *)ptr to make it | |
158 | properly aligned for the type. */ | |
159 | #define ALIGN(ptr, type) \ | |
160 | ((((long)ptr) % sizeof(type)) \ | |
161 | ? (sizeof(type) - (((long)ptr) % sizeof(type))) \ | |
162 | : 0) | |
163 | ||
164 | #undef MAX | |
165 | #undef MIN | |
166 | #define MAX(a, b) (((a) >= (b)) ? (a) : (b)) | |
167 | #define MIN(a, b) (((a) <= (b)) ? (a) : (b)) | |
168 | ||
169 | /* Define STRF to the correct printf formatter for strings. */ | |
170 | #ifdef TRE_WCHAR | |
171 | #define STRF "ls" | |
172 | #else /* !TRE_WCHAR */ | |
173 | #define STRF "s" | |
174 | #endif /* !TRE_WCHAR */ | |
175 | ||
176 | /* Types to handle bracket expressions. */ | |
177 | typedef enum { | |
178 | TRE_BRACKET_MATCH_TYPE_UNUSED = 0, | |
179 | TRE_BRACKET_MATCH_TYPE_CHAR, /* Single character value */ | |
180 | TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN, /* Collation range begin */ | |
181 | TRE_BRACKET_MATCH_TYPE_RANGE_END, /* Collation range end */ | |
182 | TRE_BRACKET_MATCH_TYPE_CLASS, /* Character class */ | |
183 | TRE_BRACKET_MATCH_TYPE_EQUIVALENCE, /* Collation equivalence value */ | |
184 | } tre_bracket_match_type_t; | |
185 | ||
186 | typedef struct { | |
187 | tre_bracket_match_type_t type; | |
188 | tre_cint_t value; | |
189 | } tre_bracket_match_t; | |
190 | ||
191 | #define TRE_BRACKET_MATCH_FLAG_NEGATE 1 | |
192 | ||
193 | typedef struct { | |
194 | int num_bracket_matches; | |
195 | int flags; | |
196 | tre_bracket_match_t bracket_matches[0]; | |
197 | } tre_bracket_match_list_t; | |
198 | ||
199 | #define SIZEOF_BRACKET_MATCH_LIST_N(n) (sizeof(tre_bracket_match_list_t) + \ | |
200 | sizeof(tre_bracket_match_t) * (n)) | |
201 | #define SIZEOF_BRACKET_MATCH_LIST(l) SIZEOF_BRACKET_MATCH_LIST_N( \ | |
202 | (l)->num_bracket_matches) | |
203 | ||
204 | /* The "count" field is the number of time the tag was set, initially zero. | |
205 | The "first" field contains the first set value (when "count" equals 1). | |
206 | The "value" field contains the current value of the tag, if "count" is | |
207 | greater than zero (the tag's current value is -1 if "count" is zero). | |
208 | The "touch" field is the touch value, a montonically increasing value | |
209 | (maintained by the caller) set each time the tag itself is set. */ | |
210 | typedef struct { | |
211 | int count; | |
212 | int first; | |
213 | int value; | |
214 | int touch; | |
215 | } tre_tag_t; | |
216 | ||
217 | /* TNFA transition type. A TNFA state is an array of transitions, | |
218 | the terminator is a transition with NULL `state'. */ | |
219 | typedef struct tnfa_transition tre_tnfa_transition_t; | |
220 | ||
221 | struct tnfa_transition { | |
222 | /* Range of accepted characters. */ | |
223 | tre_cint_t code_min; | |
224 | tre_cint_t code_max; | |
225 | /* Pointer to the destination state. */ | |
226 | tre_tnfa_transition_t *state; | |
227 | /* ID number of the destination state. */ | |
228 | int state_id; | |
229 | /* -1 terminated array of tags (or NULL). */ | |
230 | int *tags; | |
231 | /* Matching parameters settings (or NULL). */ | |
232 | int *params; | |
233 | /* Assertion bitmap. */ | |
234 | int assertions; | |
235 | /* Assertion parameters. */ | |
236 | union { | |
237 | /* Bracket matches. */ | |
238 | tre_bracket_match_list_t *bracket_match_list; | |
239 | /* Back reference assertion. */ | |
240 | int backref; | |
241 | } u; | |
242 | }; | |
243 | ||
244 | ||
245 | /* Assertions. */ | |
246 | #define ASSERT_AT_BOL 1 /* Beginning of line. */ | |
247 | #define ASSERT_AT_EOL 2 /* End of line. */ | |
248 | #define ASSERT_BRACKET_MATCH 4 /* Matches in `bracket_match_list'. */ | |
249 | #define ASSERT_AT_BOW 8 /* Beginning of word. */ | |
250 | #define ASSERT_AT_EOW 16 /* End of word. */ | |
251 | #define ASSERT_AT_WB 32 /* Word boundary. */ | |
252 | #define ASSERT_AT_WB_NEG 64 /* Not a word boundary. */ | |
253 | #define ASSERT_BACKREF 128 /* A back reference in `backref'. */ | |
254 | #define ASSERT_LAST 128 | |
255 | ||
256 | /* Tag directions. */ | |
257 | typedef enum { | |
258 | TRE_TAG_MINIMIZE = 0, | |
259 | TRE_TAG_MAXIMIZE, | |
260 | TRE_TAG_LEFT_MAXIMIZE, | |
261 | } tre_tag_direction_t; | |
262 | ||
263 | /* Parameters that can be changed dynamically while matching. */ | |
264 | typedef enum { | |
265 | TRE_PARAM_COST_INS = 0, | |
266 | TRE_PARAM_COST_DEL = 1, | |
267 | TRE_PARAM_COST_SUBST = 2, | |
268 | TRE_PARAM_COST_MAX = 3, | |
269 | TRE_PARAM_MAX_INS = 4, | |
270 | TRE_PARAM_MAX_DEL = 5, | |
271 | TRE_PARAM_MAX_SUBST = 6, | |
272 | TRE_PARAM_MAX_ERR = 7, | |
273 | TRE_PARAM_DEPTH = 8, | |
274 | TRE_PARAM_LAST = 9 | |
275 | } tre_param_t; | |
276 | ||
277 | /* Unset matching parameter */ | |
278 | #define TRE_PARAM_UNSET -1 | |
279 | ||
280 | /* Signifies the default matching parameter value. */ | |
281 | #define TRE_PARAM_DEFAULT -2 | |
282 | ||
283 | /* Instructions to compute submatch register values from tag values | |
284 | after a successful match. */ | |
285 | struct tre_submatch_data { | |
286 | /* Tag that gives the value for rm_so (submatch start offset). */ | |
287 | int so_tag; | |
288 | /* Tag that gives the value for rm_eo (submatch end offset). */ | |
289 | int eo_tag; | |
290 | }; | |
291 | ||
292 | typedef struct tre_submatch_data tre_submatch_data_t; | |
293 | ||
294 | ||
295 | /* TNFA definition. */ | |
296 | typedef struct tnfa tre_tnfa_t; | |
297 | ||
298 | struct tnfa { | |
299 | tre_tnfa_transition_t *transitions; | |
300 | tre_tnfa_transition_t *initial; | |
301 | tre_tnfa_transition_t *final; | |
302 | tre_submatch_data_t *submatch_data; | |
6465356a | 303 | #ifdef USE_FIRSTPOS_CHARS /* not defined */ |
ad3c9f2a | 304 | char *firstpos_chars; |
6465356a | 305 | #endif /* USE_FIRSTPOS_CHARS */ |
ad3c9f2a A |
306 | tre_tag_direction_t *tag_directions; |
307 | int *minimal_tags; | |
308 | tre_last_matched_branch_t *last_matched_branch; | |
309 | locale_t loc; | |
310 | unsigned int num_transitions; | |
311 | int first_char; | |
312 | unsigned int num_submatches; | |
313 | unsigned int num_submatches_invisible; | |
314 | int num_tags; | |
315 | int num_minimals; | |
316 | int end_tag; | |
317 | int num_states; | |
318 | int cflags; | |
319 | int have_backrefs; | |
320 | int num_reorder_tags; | |
321 | int have_approx; | |
322 | int params_depth; | |
323 | }; | |
324 | ||
325 | __private_extern__ int | |
326 | tre_compile(regex_t * __restrict preg, const tre_char_t * __restrict regex, size_t n, int cflags, | |
327 | locale_t __restrict loc); | |
328 | ||
329 | __private_extern__ void | |
330 | tre_free(regex_t *preg); | |
331 | ||
332 | __private_extern__ reg_errcode_t | |
333 | tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[ __restrict ], int cflags, | |
334 | const tre_tnfa_t * __restrict tnfa, const tre_tag_t * __restrict tags, int match_eo); | |
335 | ||
336 | __private_extern__ reg_errcode_t | |
337 | tre_tnfa_run_parallel(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len, | |
338 | tre_str_type_t type, tre_tag_t * __restrict match_tags, int eflags, | |
339 | int * __restrict match_end_ofs); | |
340 | ||
341 | __private_extern__ reg_errcode_t | |
342 | tre_tnfa_run_backtrack(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, | |
343 | int len, tre_str_type_t type, tre_tag_t * __restrict match_tags, | |
344 | int eflags, int * __restrict match_end_ofs); | |
345 | ||
346 | #ifdef TRE_APPROX | |
347 | __private_extern__ reg_errcode_t | |
348 | tre_tnfa_run_approx(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len, | |
349 | tre_str_type_t type, tre_tag_t * __restrict match_tags, | |
350 | regamatch_t * __restrict match, regaparams_t params, | |
351 | int eflags, int * __restrict match_end_ofs); | |
352 | #endif /* TRE_APPROX */ | |
353 | ||
354 | #endif /* TRE_INTERNAL_H */ | |
355 | ||
356 | /* EOF */ |