]> git.saurik.com Git - apple/libc.git/blob - regex/TRE/lib/tre-internal.h
Libc-1439.100.3.tar.gz
[apple/libc.git] / regex / TRE / lib / tre-internal.h
1 /*
2 tre-internal.h - TRE internal definitions
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 #ifndef TRE_INTERNAL_H
10 #define TRE_INTERNAL_H 1
11
12 #ifdef HAVE_WCHAR_H
13 #include <wchar.h>
14 #endif /* HAVE_WCHAR_H */
15
16 #ifdef HAVE_WCTYPE_H
17 #include <wctype.h>
18 #endif /* !HAVE_WCTYPE_H */
19
20 #include <ctype.h>
21
22 #ifdef __LIBC__
23 #include <xlocale_private.h>
24 #else /* !__LIBC__ */
25 #include <xlocale.h>
26 #endif /* !__LIBC__ */
27
28 #include "tre.h"
29 #include "tre-last-matched.h"
30
31 #ifdef TRE_DEBUG
32 #include <stdio.h>
33 #define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
34 #else /* !TRE_DEBUG */
35 #define DPRINT(msg) do { } while(/*CONSTCOND*/0)
36 #endif /* !TRE_DEBUG */
37
38 #define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
39
40 #ifdef HAVE_MBRTOWC
41 #define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
42 /* xlocale */
43 #define tre_mbrtowc_l(pwc, s, n, ps, l) (mbrtowc_l((pwc), (s), (n), (ps), (l)))
44 #else /* !HAVE_MBRTOWC */
45 #ifdef HAVE_MBTOWC
46 #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
47 #endif /* HAVE_MBTOWC */
48 #endif /* !HAVE_MBRTOWC */
49
50 #ifdef TRE_MULTIBYTE
51 #ifdef HAVE_MBSTATE_T
52 #define TRE_MBSTATE
53 #endif /* TRE_MULTIBYTE */
54 #endif /* HAVE_MBSTATE_T */
55
56 /* Define the character types and functions. */
57 #ifdef TRE_WCHAR
58
59 /* Wide characters. */
60 typedef wint_t tre_cint_t;
61 #define TRE_CHAR_MAX WCHAR_MAX
62
63 #ifdef TRE_MULTIBYTE
64 #define TRE_MB_CUR_MAX MB_CUR_MAX
65 /* xlocale */
66 #define TRE_MB_CUR_MAX_L MB_CUR_MAX_L
67 #else /* !TRE_MULTIBYTE */
68 #define TRE_MB_CUR_MAX 1
69 #endif /* !TRE_MULTIBYTE */
70
71 #define tre_isalnum iswalnum
72 #define tre_isalpha iswalpha
73 #ifdef HAVE_ISWBLANK
74 #define tre_isblank iswblank
75 #endif /* HAVE_ISWBLANK */
76 #define tre_iscntrl iswcntrl
77 #define tre_isdigit iswdigit
78 #define tre_isgraph iswgraph
79 #define tre_islower iswlower
80 #define tre_isprint iswprint
81 #define tre_ispunct iswpunct
82 #define tre_isspace iswspace
83 #define tre_isupper iswupper
84 #define tre_isxdigit iswxdigit
85
86 #define tre_tolower towlower
87 #define tre_toupper towupper
88 #define tre_strlen wcslen
89
90 /* xlocale */
91 #define tre_isalnum_l iswalnum_l
92 #define tre_isdigit_l iswdigit_l
93 #define tre_islower_l iswlower_l
94 #define tre_isupper_l iswupper_l
95 #define tre_isxdigit_l iswxdigit_l
96 #define tre_tolower_l towlower_l
97 #define tre_toupper_l towupper_l
98
99 #else /* !TRE_WCHAR */
100
101 /* 8 bit characters. */
102 typedef short tre_cint_t;
103 #define TRE_CHAR_MAX 255
104 #define TRE_MB_CUR_MAX 1
105
106 #define tre_isalnum isalnum
107 #define tre_isalpha isalpha
108 #ifdef HAVE_ISASCII
109 #define tre_isascii isascii
110 #endif /* HAVE_ISASCII */
111 #ifdef HAVE_ISBLANK
112 #define tre_isblank isblank
113 #endif /* HAVE_ISBLANK */
114 #define tre_iscntrl iscntrl
115 #define tre_isdigit isdigit
116 #define tre_isgraph isgraph
117 #define tre_islower islower
118 #define tre_isprint isprint
119 #define tre_ispunct ispunct
120 #define tre_isspace isspace
121 #define tre_isupper isupper
122 #define tre_isxdigit isxdigit
123
124 #define tre_tolower(c) (tre_cint_t)(tolower(c))
125 #define tre_toupper(c) (tre_cint_t)(toupper(c))
126 #define tre_strlen(s) (strlen((const char*)s))
127
128 #endif /* !TRE_WCHAR */
129
130 #if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
131 #define TRE_USE_SYSTEM_WCTYPE 1
132 #endif
133
134 #ifdef TRE_USE_SYSTEM_WCTYPE
135 /* Use system provided iswctype() and wctype(). */
136 typedef wctype_t tre_ctype_t;
137 #define tre_isctype iswctype
138 #define tre_ctype wctype
139
140 /* xlocale */
141 #define tre_isctype_l iswctype_l
142 #define tre_ctype_l wctype_l
143
144 #else /* !TRE_USE_SYSTEM_WCTYPE */
145 /* Define our own versions of iswctype() and wctype(). */
146 typedef int (*tre_ctype_t)(tre_cint_t);
147 #define tre_isctype(c, type) ( (type)(c) )
148 tre_ctype_t tre_ctype(const char *name);
149 #endif /* !TRE_USE_SYSTEM_WCTYPE */
150
151 typedef enum { STR_WIDE, STR_BYTE, STR_MBS,
152 #ifdef TRE_STR_USER
153 STR_USER
154 #endif /* TRE_STR_USER */
155 } tre_str_type_t;
156
157 /* Returns number of bytes to add to (char *)ptr to make it
158 properly aligned for the type. */
159 #define ALIGN(ptr, type) \
160 ((((long)ptr) % sizeof(type)) \
161 ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
162 : 0)
163
164 #undef MAX
165 #undef MIN
166 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
167 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
168
169 /* Define STRF to the correct printf formatter for strings. */
170 #ifdef TRE_WCHAR
171 #define STRF "ls"
172 #else /* !TRE_WCHAR */
173 #define STRF "s"
174 #endif /* !TRE_WCHAR */
175
176 /* Types to handle bracket expressions. */
177 typedef enum {
178 TRE_BRACKET_MATCH_TYPE_UNUSED = 0,
179 TRE_BRACKET_MATCH_TYPE_CHAR, /* Single character value */
180 TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN, /* Collation range begin */
181 TRE_BRACKET_MATCH_TYPE_RANGE_END, /* Collation range end */
182 TRE_BRACKET_MATCH_TYPE_CLASS, /* Character class */
183 TRE_BRACKET_MATCH_TYPE_EQUIVALENCE, /* Collation equivalence value */
184 } tre_bracket_match_type_t;
185
186 typedef struct {
187 tre_bracket_match_type_t type;
188 tre_cint_t value;
189 } tre_bracket_match_t;
190
191 #define TRE_BRACKET_MATCH_FLAG_NEGATE 1
192
193 typedef struct {
194 int num_bracket_matches;
195 int flags;
196 tre_bracket_match_t bracket_matches[0];
197 } tre_bracket_match_list_t;
198
199 #define SIZEOF_BRACKET_MATCH_LIST_N(n) (sizeof(tre_bracket_match_list_t) + \
200 sizeof(tre_bracket_match_t) * (n))
201 #define SIZEOF_BRACKET_MATCH_LIST(l) SIZEOF_BRACKET_MATCH_LIST_N( \
202 (l)->num_bracket_matches)
203
204 /* The "count" field is the number of time the tag was set, initially zero.
205 The "first" field contains the first set value (when "count" equals 1).
206 The "value" field contains the current value of the tag, if "count" is
207 greater than zero (the tag's current value is -1 if "count" is zero).
208 The "touch" field is the touch value, a montonically increasing value
209 (maintained by the caller) set each time the tag itself is set. */
210 typedef struct {
211 int count;
212 int first;
213 int value;
214 int touch;
215 } tre_tag_t;
216
217 /* TNFA transition type. A TNFA state is an array of transitions,
218 the terminator is a transition with NULL `state'. */
219 typedef struct tnfa_transition tre_tnfa_transition_t;
220
221 struct tnfa_transition {
222 /* Range of accepted characters. */
223 tre_cint_t code_min;
224 tre_cint_t code_max;
225 /* Pointer to the destination state. */
226 tre_tnfa_transition_t *state;
227 /* ID number of the destination state. */
228 int state_id;
229 /* -1 terminated array of tags (or NULL). */
230 int *tags;
231 /* Matching parameters settings (or NULL). */
232 int *params;
233 /* Assertion bitmap. */
234 int assertions;
235 /* Assertion parameters. */
236 union {
237 /* Bracket matches. */
238 tre_bracket_match_list_t *bracket_match_list;
239 /* Back reference assertion. */
240 int backref;
241 } u;
242 };
243
244
245 /* Assertions. */
246 #define ASSERT_AT_BOL 1 /* Beginning of line. */
247 #define ASSERT_AT_EOL 2 /* End of line. */
248 #define ASSERT_BRACKET_MATCH 4 /* Matches in `bracket_match_list'. */
249 #define ASSERT_AT_BOW 8 /* Beginning of word. */
250 #define ASSERT_AT_EOW 16 /* End of word. */
251 #define ASSERT_AT_WB 32 /* Word boundary. */
252 #define ASSERT_AT_WB_NEG 64 /* Not a word boundary. */
253 #define ASSERT_BACKREF 128 /* A back reference in `backref'. */
254 #define ASSERT_LAST 128
255
256 /* Tag directions. */
257 typedef enum {
258 TRE_TAG_MINIMIZE = 0,
259 TRE_TAG_MAXIMIZE,
260 TRE_TAG_LEFT_MAXIMIZE,
261 } tre_tag_direction_t;
262
263 /* Parameters that can be changed dynamically while matching. */
264 typedef enum {
265 TRE_PARAM_COST_INS = 0,
266 TRE_PARAM_COST_DEL = 1,
267 TRE_PARAM_COST_SUBST = 2,
268 TRE_PARAM_COST_MAX = 3,
269 TRE_PARAM_MAX_INS = 4,
270 TRE_PARAM_MAX_DEL = 5,
271 TRE_PARAM_MAX_SUBST = 6,
272 TRE_PARAM_MAX_ERR = 7,
273 TRE_PARAM_DEPTH = 8,
274 TRE_PARAM_LAST = 9
275 } tre_param_t;
276
277 /* Unset matching parameter */
278 #define TRE_PARAM_UNSET -1
279
280 /* Signifies the default matching parameter value. */
281 #define TRE_PARAM_DEFAULT -2
282
283 /* Instructions to compute submatch register values from tag values
284 after a successful match. */
285 struct tre_submatch_data {
286 /* Tag that gives the value for rm_so (submatch start offset). */
287 int so_tag;
288 /* Tag that gives the value for rm_eo (submatch end offset). */
289 int eo_tag;
290 };
291
292 typedef struct tre_submatch_data tre_submatch_data_t;
293
294
295 /* TNFA definition. */
296 typedef struct tnfa tre_tnfa_t;
297
298 struct tnfa {
299 tre_tnfa_transition_t *transitions;
300 tre_tnfa_transition_t *initial;
301 tre_tnfa_transition_t *final;
302 tre_submatch_data_t *submatch_data;
303 #ifdef USE_FIRSTPOS_CHARS /* not defined */
304 char *firstpos_chars;
305 #endif /* USE_FIRSTPOS_CHARS */
306 tre_tag_direction_t *tag_directions;
307 int *minimal_tags;
308 tre_last_matched_branch_t *last_matched_branch;
309 locale_t loc;
310 unsigned int num_transitions;
311 int first_char;
312 unsigned int num_submatches;
313 unsigned int num_submatches_invisible;
314 int num_tags;
315 int num_minimals;
316 int end_tag;
317 int num_states;
318 int cflags;
319 int have_backrefs;
320 int num_reorder_tags;
321 int have_approx;
322 int params_depth;
323 };
324
325 __private_extern__ int
326 tre_compile(regex_t * __restrict preg, const tre_char_t * __restrict regex, size_t n, int cflags,
327 locale_t __restrict loc);
328
329 __private_extern__ void
330 tre_free(regex_t *preg);
331
332 __private_extern__ reg_errcode_t
333 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[ __restrict ], int cflags,
334 const tre_tnfa_t * __restrict tnfa, const tre_tag_t * __restrict tags, int match_eo);
335
336 __private_extern__ reg_errcode_t
337 tre_tnfa_run_parallel(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len,
338 tre_str_type_t type, tre_tag_t * __restrict match_tags, int eflags,
339 int * __restrict match_end_ofs);
340
341 __private_extern__ reg_errcode_t
342 tre_tnfa_run_backtrack(const tre_tnfa_t * __restrict tnfa, const void * __restrict string,
343 int len, tre_str_type_t type, tre_tag_t * __restrict match_tags,
344 int eflags, int * __restrict match_end_ofs);
345
346 #ifdef TRE_APPROX
347 __private_extern__ reg_errcode_t
348 tre_tnfa_run_approx(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len,
349 tre_str_type_t type, tre_tag_t * __restrict match_tags,
350 regamatch_t * __restrict match, regaparams_t params,
351 int eflags, int * __restrict match_end_ofs);
352 #endif /* TRE_APPROX */
353
354 #endif /* TRE_INTERNAL_H */
355
356 /* EOF */