2 tre-internal.h - TRE internal definitions
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
10 #define TRE_INTERNAL_H 1
14 #endif /* HAVE_WCHAR_H */
18 #endif /* !HAVE_WCTYPE_H */
23 #include <xlocale_private.h>
26 #endif /* !__LIBC__ */
29 #include "tre-last-matched.h"
33 #define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
34 #else /* !TRE_DEBUG */
35 #define DPRINT(msg) do { } while(/*CONSTCOND*/0)
36 #endif /* !TRE_DEBUG */
38 #define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
41 #define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
43 #define tre_mbrtowc_l(pwc, s, n, ps, l) (mbrtowc_l((pwc), (s), (n), (ps), (l)))
44 #else /* !HAVE_MBRTOWC */
46 #define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
47 #endif /* HAVE_MBTOWC */
48 #endif /* !HAVE_MBRTOWC */
53 #endif /* TRE_MULTIBYTE */
54 #endif /* HAVE_MBSTATE_T */
56 /* Define the character types and functions. */
59 /* Wide characters. */
60 typedef wint_t tre_cint_t
;
61 #define TRE_CHAR_MAX WCHAR_MAX
64 #define TRE_MB_CUR_MAX MB_CUR_MAX
66 #define TRE_MB_CUR_MAX_L MB_CUR_MAX_L
67 #else /* !TRE_MULTIBYTE */
68 #define TRE_MB_CUR_MAX 1
69 #endif /* !TRE_MULTIBYTE */
71 #define tre_isalnum iswalnum
72 #define tre_isalpha iswalpha
74 #define tre_isblank iswblank
75 #endif /* HAVE_ISWBLANK */
76 #define tre_iscntrl iswcntrl
77 #define tre_isdigit iswdigit
78 #define tre_isgraph iswgraph
79 #define tre_islower iswlower
80 #define tre_isprint iswprint
81 #define tre_ispunct iswpunct
82 #define tre_isspace iswspace
83 #define tre_isupper iswupper
84 #define tre_isxdigit iswxdigit
86 #define tre_tolower towlower
87 #define tre_toupper towupper
88 #define tre_strlen wcslen
91 #define tre_isalnum_l iswalnum_l
92 #define tre_isdigit_l iswdigit_l
93 #define tre_islower_l iswlower_l
94 #define tre_isupper_l iswupper_l
95 #define tre_isxdigit_l iswxdigit_l
96 #define tre_tolower_l towlower_l
97 #define tre_toupper_l towupper_l
99 #else /* !TRE_WCHAR */
101 /* 8 bit characters. */
102 typedef short tre_cint_t
;
103 #define TRE_CHAR_MAX 255
104 #define TRE_MB_CUR_MAX 1
106 #define tre_isalnum isalnum
107 #define tre_isalpha isalpha
109 #define tre_isascii isascii
110 #endif /* HAVE_ISASCII */
112 #define tre_isblank isblank
113 #endif /* HAVE_ISBLANK */
114 #define tre_iscntrl iscntrl
115 #define tre_isdigit isdigit
116 #define tre_isgraph isgraph
117 #define tre_islower islower
118 #define tre_isprint isprint
119 #define tre_ispunct ispunct
120 #define tre_isspace isspace
121 #define tre_isupper isupper
122 #define tre_isxdigit isxdigit
124 #define tre_tolower(c) (tre_cint_t)(tolower(c))
125 #define tre_toupper(c) (tre_cint_t)(toupper(c))
126 #define tre_strlen(s) (strlen((const char*)s))
128 #endif /* !TRE_WCHAR */
130 #if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
131 #define TRE_USE_SYSTEM_WCTYPE 1
134 #ifdef TRE_USE_SYSTEM_WCTYPE
135 /* Use system provided iswctype() and wctype(). */
136 typedef wctype_t tre_ctype_t
;
137 #define tre_isctype iswctype
138 #define tre_ctype wctype
141 #define tre_isctype_l iswctype_l
142 #define tre_ctype_l wctype_l
144 #else /* !TRE_USE_SYSTEM_WCTYPE */
145 /* Define our own versions of iswctype() and wctype(). */
146 typedef int (*tre_ctype_t
)(tre_cint_t
);
147 #define tre_isctype(c, type) ( (type)(c) )
148 tre_ctype_t
tre_ctype(const char *name
);
149 #endif /* !TRE_USE_SYSTEM_WCTYPE */
151 typedef enum { STR_WIDE
, STR_BYTE
, STR_MBS
,
154 #endif /* TRE_STR_USER */
157 /* Returns number of bytes to add to (char *)ptr to make it
158 properly aligned for the type. */
159 #define ALIGN(ptr, type) \
160 ((((long)ptr) % sizeof(type)) \
161 ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
166 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
167 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
169 /* Define STRF to the correct printf formatter for strings. */
172 #else /* !TRE_WCHAR */
174 #endif /* !TRE_WCHAR */
176 /* Types to handle bracket expressions. */
178 TRE_BRACKET_MATCH_TYPE_UNUSED
= 0,
179 TRE_BRACKET_MATCH_TYPE_CHAR
, /* Single character value */
180 TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN
, /* Collation range begin */
181 TRE_BRACKET_MATCH_TYPE_RANGE_END
, /* Collation range end */
182 TRE_BRACKET_MATCH_TYPE_CLASS
, /* Character class */
183 TRE_BRACKET_MATCH_TYPE_EQUIVALENCE
, /* Collation equivalence value */
184 } tre_bracket_match_type_t
;
187 tre_bracket_match_type_t type
;
189 } tre_bracket_match_t
;
191 #define TRE_BRACKET_MATCH_FLAG_NEGATE 1
194 int num_bracket_matches
;
196 tre_bracket_match_t bracket_matches
[0];
197 } tre_bracket_match_list_t
;
199 #define SIZEOF_BRACKET_MATCH_LIST_N(n) (sizeof(tre_bracket_match_list_t) + \
200 sizeof(tre_bracket_match_t) * (n))
201 #define SIZEOF_BRACKET_MATCH_LIST(l) SIZEOF_BRACKET_MATCH_LIST_N( \
202 (l)->num_bracket_matches)
204 /* The "count" field is the number of time the tag was set, initially zero.
205 The "first" field contains the first set value (when "count" equals 1).
206 The "value" field contains the current value of the tag, if "count" is
207 greater than zero (the tag's current value is -1 if "count" is zero).
208 The "touch" field is the touch value, a montonically increasing value
209 (maintained by the caller) set each time the tag itself is set. */
217 /* TNFA transition type. A TNFA state is an array of transitions,
218 the terminator is a transition with NULL `state'. */
219 typedef struct tnfa_transition tre_tnfa_transition_t
;
221 struct tnfa_transition
{
222 /* Range of accepted characters. */
225 /* Pointer to the destination state. */
226 tre_tnfa_transition_t
*state
;
227 /* ID number of the destination state. */
229 /* -1 terminated array of tags (or NULL). */
231 /* Matching parameters settings (or NULL). */
233 /* Assertion bitmap. */
235 /* Assertion parameters. */
237 /* Bracket matches. */
238 tre_bracket_match_list_t
*bracket_match_list
;
239 /* Back reference assertion. */
246 #define ASSERT_AT_BOL 1 /* Beginning of line. */
247 #define ASSERT_AT_EOL 2 /* End of line. */
248 #define ASSERT_BRACKET_MATCH 4 /* Matches in `bracket_match_list'. */
249 #define ASSERT_AT_BOW 8 /* Beginning of word. */
250 #define ASSERT_AT_EOW 16 /* End of word. */
251 #define ASSERT_AT_WB 32 /* Word boundary. */
252 #define ASSERT_AT_WB_NEG 64 /* Not a word boundary. */
253 #define ASSERT_BACKREF 128 /* A back reference in `backref'. */
254 #define ASSERT_LAST 128
256 /* Tag directions. */
258 TRE_TAG_MINIMIZE
= 0,
260 TRE_TAG_LEFT_MAXIMIZE
,
261 } tre_tag_direction_t
;
263 /* Parameters that can be changed dynamically while matching. */
265 TRE_PARAM_COST_INS
= 0,
266 TRE_PARAM_COST_DEL
= 1,
267 TRE_PARAM_COST_SUBST
= 2,
268 TRE_PARAM_COST_MAX
= 3,
269 TRE_PARAM_MAX_INS
= 4,
270 TRE_PARAM_MAX_DEL
= 5,
271 TRE_PARAM_MAX_SUBST
= 6,
272 TRE_PARAM_MAX_ERR
= 7,
277 /* Unset matching parameter */
278 #define TRE_PARAM_UNSET -1
280 /* Signifies the default matching parameter value. */
281 #define TRE_PARAM_DEFAULT -2
283 /* Instructions to compute submatch register values from tag values
284 after a successful match. */
285 struct tre_submatch_data
{
286 /* Tag that gives the value for rm_so (submatch start offset). */
288 /* Tag that gives the value for rm_eo (submatch end offset). */
292 typedef struct tre_submatch_data tre_submatch_data_t
;
295 /* TNFA definition. */
296 typedef struct tnfa tre_tnfa_t
;
299 tre_tnfa_transition_t
*transitions
;
300 tre_tnfa_transition_t
*initial
;
301 tre_tnfa_transition_t
*final
;
302 tre_submatch_data_t
*submatch_data
;
303 #ifdef USE_FIRSTPOS_CHARS /* not defined */
304 char *firstpos_chars
;
305 #endif /* USE_FIRSTPOS_CHARS */
306 tre_tag_direction_t
*tag_directions
;
308 tre_last_matched_branch_t
*last_matched_branch
;
310 unsigned int num_transitions
;
312 unsigned int num_submatches
;
313 unsigned int num_submatches_invisible
;
320 int num_reorder_tags
;
325 __private_extern__
int
326 tre_compile(regex_t
* __restrict preg
, const tre_char_t
* __restrict regex
, size_t n
, int cflags
,
327 locale_t __restrict loc
);
329 __private_extern__
void
330 tre_free(regex_t
*preg
);
332 __private_extern__ reg_errcode_t
333 tre_fill_pmatch(size_t nmatch
, regmatch_t pmatch
[ __restrict
], int cflags
,
334 const tre_tnfa_t
* __restrict tnfa
, const tre_tag_t
* __restrict tags
, int match_eo
);
336 __private_extern__ reg_errcode_t
337 tre_tnfa_run_parallel(const tre_tnfa_t
* __restrict tnfa
, const void * __restrict string
, int len
,
338 tre_str_type_t type
, tre_tag_t
* __restrict match_tags
, int eflags
,
339 int * __restrict match_end_ofs
);
341 __private_extern__ reg_errcode_t
342 tre_tnfa_run_backtrack(const tre_tnfa_t
* __restrict tnfa
, const void * __restrict string
,
343 int len
, tre_str_type_t type
, tre_tag_t
* __restrict match_tags
,
344 int eflags
, int * __restrict match_end_ofs
);
347 __private_extern__ reg_errcode_t
348 tre_tnfa_run_approx(const tre_tnfa_t
* __restrict tnfa
, const void * __restrict string
, int len
,
349 tre_str_type_t type
, tre_tag_t
* __restrict match_tags
,
350 regamatch_t
* __restrict match
, regaparams_t params
,
351 int eflags
, int * __restrict match_end_ofs
);
352 #endif /* TRE_APPROX */
354 #endif /* TRE_INTERNAL_H */