]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | tre.h - TRE public API definitions | |
3 | ||
4 | This software is released under a BSD-style license. | |
5 | See the file LICENSE for details and copyright. | |
6 | ||
7 | */ | |
8 | ||
9 | #ifndef TRE_H | |
10 | #define TRE_H 1 | |
11 | ||
12 | #include "tre-config.h" | |
13 | ||
14 | #ifdef HAVE_SYS_TYPES_H | |
15 | #include <sys/types.h> | |
16 | #endif /* HAVE_SYS_TYPES_H */ | |
17 | ||
18 | #ifdef HAVE_LIBUTF8_H | |
19 | #include <libutf8.h> | |
20 | #endif /* HAVE_LIBUTF8_H */ | |
21 | ||
22 | #ifdef TRE_USE_SYSTEM_REGEX_H | |
23 | /* Include the system regex.h to make TRE ABI compatible with the | |
24 | system regex. */ | |
25 | #include TRE_SYSTEM_REGEX_H_PATH | |
26 | #define tre_regcomp regcomp | |
27 | #define tre_regexec regexec | |
28 | #define tre_regerror regerror | |
29 | #define tre_regfree regfree | |
30 | ||
31 | #ifdef TRE_APPROX | |
32 | #define tre_regaexec regaexec | |
33 | #define tre_reganexec reganexec | |
34 | #define tre_regawexec regawexec | |
35 | #define tre_regawnexec regawnexec | |
36 | #endif /* TRE_APPROX */ | |
37 | #define tre_regncomp regncomp | |
38 | #define tre_regnexec regnexec | |
39 | #define tre_regwcomp regwcomp | |
40 | #define tre_regwexec regwexec | |
41 | #define tre_regwncomp regwncomp | |
42 | #define tre_regwnexec regwnexec | |
43 | ||
44 | #define tre_regcomp_l regcomp_l | |
45 | #define tre_regncomp_l regncomp_l | |
46 | #define tre_regwcomp_l regwcomp_l | |
47 | #define tre_regwncomp_l regwncomp_l | |
48 | #endif /* TRE_USE_SYSTEM_REGEX_H */ | |
49 | ||
50 | #ifdef __cplusplus | |
51 | extern "C" { | |
52 | #endif | |
53 | ||
54 | #ifdef TRE_USE_SYSTEM_REGEX_H | |
55 | ||
56 | #ifndef REG_OK | |
57 | #define REG_OK 0 | |
58 | #endif /* !REG_OK */ | |
59 | ||
60 | #ifndef HAVE_REG_ERRCODE_T | |
61 | typedef int reg_errcode_t; | |
62 | #endif /* !HAVE_REG_ERRCODE_T */ | |
63 | ||
64 | #if !defined(REG_NOSPEC) && !defined(REG_LITERAL) | |
65 | #define REG_LITERAL 0x1000 | |
66 | #endif | |
67 | ||
68 | /* Extra tre_regcomp() flags. */ | |
69 | #ifndef REG_BASIC | |
70 | #define REG_BASIC 0 | |
71 | #endif /* !REG_BASIC */ | |
72 | #if 0 | |
73 | #define REG_LEFT_ASSOC (REG_LITERAL << 1) | |
74 | #define REG_UNGREEDY (REG_LEFT_ASSOC << 1) | |
75 | ||
76 | /* Extra tre_regexec() flags. */ | |
77 | #define REG_APPROX_MATCHER 0x1000 | |
78 | #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) | |
79 | #endif | |
80 | ||
81 | #define RE_MAGIC ((('r'^0200)<<8) | 'e') | |
82 | ||
83 | #else /* !TRE_USE_SYSTEM_REGEX_H */ | |
84 | ||
85 | /* If the we're not using system regex.h, we need to define the | |
86 | structs and enums ourselves. */ | |
87 | ||
88 | typedef int regoff_t; | |
89 | typedef struct { | |
90 | size_t re_nsub; /* Number of parenthesized subexpressions. */ | |
91 | const void *re_endp; /* regex string end pointer (REG_PEND) */ | |
92 | void *value; /* For internal use only. */ | |
93 | } regex_t; | |
94 | ||
95 | typedef struct { | |
96 | regoff_t rm_so; | |
97 | regoff_t rm_eo; | |
98 | } regmatch_t; | |
99 | ||
100 | ||
101 | typedef enum { | |
102 | REG_OK = 0, /* No error. */ | |
103 | /* POSIX tre_regcomp() return error codes. (In the order listed in the | |
104 | standard.) */ | |
105 | REG_NOMATCH, /* No match. */ | |
106 | REG_BADPAT, /* Invalid regexp. */ | |
107 | REG_ECOLLATE, /* Unknown collating element. */ | |
108 | REG_ECTYPE, /* Unknown character class name. */ | |
109 | REG_EESCAPE, /* Trailing backslash. */ | |
110 | REG_ESUBREG, /* Invalid back reference. */ | |
111 | REG_EBRACK, /* "[]" imbalance */ | |
112 | REG_EPAREN, /* "\(\)" or "()" imbalance */ | |
113 | REG_EBRACE, /* "\{\}" or "{}" imbalance */ | |
114 | REG_BADBR, /* Invalid content of {} */ | |
115 | REG_ERANGE, /* Invalid use of range operator */ | |
116 | REG_ESPACE, /* Out of memory. */ | |
117 | REG_BADRPT, /* Invalid use of repetition operators. */ | |
118 | REG_INVARG, /* Invalid argument, e.g. negative-length string */ | |
119 | REG_ILLSEQ, /* illegal byte sequence (bad multibyte character) */ | |
120 | } reg_errcode_t; | |
121 | ||
122 | /* POSIX tre_regcomp() flags. */ | |
123 | #define REG_EXTENDED 1 | |
124 | #define REG_ICASE (REG_EXTENDED << 1) | |
125 | #define REG_NEWLINE (REG_ICASE << 1) | |
126 | #define REG_NOSUB (REG_NEWLINE << 1) | |
127 | ||
128 | /* Extra tre_regcomp() flags. */ | |
129 | #define REG_BASIC 0 | |
130 | #define REG_LITERAL (REG_NOSUB << 1) | |
131 | #define REG_LEFT_ASSOC (REG_LITERAL << 1) | |
132 | #define REG_UNGREEDY (REG_LEFT_ASSOC << 1) | |
133 | #define REG_PEND (REG_UNGREEDY << 1) | |
134 | ||
135 | /* POSIX tre_regexec() flags. */ | |
136 | #define REG_NOTBOL 1 | |
137 | #define REG_NOTEOL (REG_NOTBOL << 1) | |
138 | ||
139 | /* Extra tre_regexec() flags. */ | |
140 | #define REG_APPROX_MATCHER (REG_NOTEOL << 1) | |
141 | #define REG_BACKTRACKING_MATCHER (REG_APPROX_MATCHER << 1) | |
142 | #define REG_STARTEND (REG_BACKTRACKING_MATCHER << 1) | |
143 | ||
144 | #endif /* !TRE_USE_SYSTEM_REGEX_H */ | |
145 | ||
146 | /* REG_NOSPEC and REG_LITERAL mean the same thing. */ | |
147 | #if defined(REG_LITERAL) && !defined(REG_NOSPEC) | |
148 | #define REG_NOSPEC REG_LITERAL | |
149 | #elif defined(REG_NOSPEC) && !defined(REG_LITERAL) | |
150 | #define REG_LITERAL REG_NOSPEC | |
151 | #endif /* defined(REG_NOSPEC) */ | |
152 | ||
153 | /* The maximum number of iterations in a bound expression. */ | |
154 | #undef RE_DUP_MAX | |
155 | #define RE_DUP_MAX 255 | |
156 | ||
157 | /* The POSIX.2 regexp functions */ | |
158 | extern int | |
159 | tre_regcomp(regex_t * __restrict preg, const char * __restrict regex, int cflags); | |
160 | ||
161 | extern int | |
162 | tre_regexec(const regex_t * __restrict preg, const char * __restrict string, size_t nmatch, | |
163 | regmatch_t pmatch[ __restrict ], int eflags); | |
164 | ||
165 | extern size_t | |
166 | tre_regerror(int errcode, const regex_t * __restrict preg, char * __restrict errbuf, | |
167 | size_t errbuf_size); | |
168 | ||
169 | extern void | |
170 | tre_regfree(regex_t *preg); | |
171 | ||
172 | #ifdef TRE_WCHAR | |
173 | #ifdef HAVE_WCHAR_H | |
174 | #include <wchar.h> | |
175 | #endif /* HAVE_WCHAR_H */ | |
176 | ||
177 | /* Wide character versions (not in POSIX.2). */ | |
178 | extern int | |
179 | tre_regwcomp(regex_t * __restrict preg, const wchar_t * __restrict regex, int cflags); | |
180 | ||
181 | extern int | |
182 | tre_regwexec(const regex_t * __restrict preg, const wchar_t * __restrict string, | |
183 | size_t nmatch, regmatch_t pmatch[ __restrict ], int eflags); | |
184 | #endif /* TRE_WCHAR */ | |
185 | ||
186 | /* Versions with a maximum length argument and therefore the capability to | |
187 | handle null characters in the middle of the strings (not in POSIX.2). */ | |
188 | extern int | |
189 | tre_regncomp(regex_t * __restrict preg, const char * __restrict regex, size_t len, int cflags); | |
190 | ||
191 | extern int | |
192 | tre_regnexec(const regex_t * __restrict preg, const char * __restrict string, size_t len, | |
193 | size_t nmatch, regmatch_t pmatch[ __restrict ], int eflags); | |
194 | ||
195 | #ifdef TRE_WCHAR | |
196 | extern int | |
197 | tre_regwncomp(regex_t * __restrict preg, const wchar_t * __restrict regex, size_t len, int cflags); | |
198 | ||
199 | extern int | |
200 | tre_regwnexec(const regex_t * __restrict preg, const wchar_t * __restrict string, size_t len, | |
201 | size_t nmatch, regmatch_t pmatch[ __restrict ], int eflags); | |
202 | #endif /* TRE_WCHAR */ | |
203 | ||
204 | /* Extended locale versions */ | |
205 | #include <xlocale.h> | |
206 | ||
207 | extern int | |
208 | tre_regcomp_l(regex_t * __restrict preg, const char * __restrict regex, int cflags, locale_t __restrict loc); | |
209 | ||
210 | #ifdef TRE_WCHAR | |
211 | extern int | |
212 | tre_regwcomp_l(regex_t * __restrict preg, const wchar_t * __restrict regex, int cflags, locale_t __restrict loc); | |
213 | #endif /* TRE_WCHAR */ | |
214 | ||
215 | extern int | |
216 | tre_regncomp_l(regex_t * __restrict preg, const char * __restrict regex, size_t len, int cflags, locale_t __restrict loc); | |
217 | ||
218 | #ifdef TRE_WCHAR | |
219 | extern int | |
220 | tre_regwncomp_l(regex_t * __restrict preg, const wchar_t * __restrict regex, size_t len, int cflags, locale_t __restrict loc); | |
221 | #endif /* TRE_WCHAR */ | |
222 | ||
223 | #ifdef TRE_APPROX | |
224 | ||
225 | /* Approximate matching parameter struct. */ | |
226 | typedef struct { | |
227 | int cost_ins; /* Default cost of an inserted character. */ | |
228 | int cost_del; /* Default cost of a deleted character. */ | |
229 | int cost_subst; /* Default cost of a substituted character. */ | |
230 | int max_cost; /* Maximum allowed cost of a match. */ | |
231 | ||
232 | int max_ins; /* Maximum allowed number of inserts. */ | |
233 | int max_del; /* Maximum allowed number of deletes. */ | |
234 | int max_subst; /* Maximum allowed number of substitutes. */ | |
235 | int max_err; /* Maximum allowed number of errors total. */ | |
236 | } regaparams_t; | |
237 | ||
238 | /* Approximate matching result struct. */ | |
239 | typedef struct { | |
240 | size_t nmatch; /* Length of pmatch[] array. */ | |
241 | regmatch_t *pmatch; /* Submatch data. */ | |
242 | int cost; /* Cost of the match. */ | |
243 | int num_ins; /* Number of inserts in the match. */ | |
244 | int num_del; /* Number of deletes in the match. */ | |
245 | int num_subst; /* Number of substitutes in the match. */ | |
246 | } regamatch_t; | |
247 | ||
248 | ||
249 | /* Approximate matching functions. */ | |
250 | extern int | |
251 | tre_regaexec(const regex_t * __restrict preg, const char * __restrict string, | |
252 | regamatch_t * __restrict match, regaparams_t params, int eflags); | |
253 | ||
254 | extern int | |
255 | tre_reganexec(const regex_t * __restrict preg, const char * __restrict string, size_t len, | |
256 | regamatch_t * __restrict match, regaparams_t params, int eflags); | |
257 | #ifdef TRE_WCHAR | |
258 | /* Wide character approximate matching. */ | |
259 | extern int | |
260 | tre_regawexec(const regex_t * __restrict preg, const wchar_t * __restrict string, | |
261 | regamatch_t * __restrict match, regaparams_t params, int eflags); | |
262 | ||
263 | extern int | |
264 | tre_regawnexec(const regex_t * __restrict preg, const wchar_t * __restrict string, size_t len, | |
265 | regamatch_t * __restrict match, regaparams_t params, int eflags); | |
266 | #endif /* TRE_WCHAR */ | |
267 | ||
268 | /* Sets the parameters to default values. */ | |
269 | extern void | |
270 | tre_regaparams_default(regaparams_t *params); | |
271 | #endif /* TRE_APPROX */ | |
272 | ||
273 | #ifdef TRE_WCHAR | |
274 | typedef wchar_t tre_char_t; | |
275 | #else /* !TRE_WCHAR */ | |
276 | typedef unsigned char tre_char_t; | |
277 | #endif /* !TRE_WCHAR */ | |
278 | ||
279 | #ifdef TRE_STR_USER | |
280 | typedef struct { | |
281 | int (*get_next_char)(tre_char_t *c, unsigned int *pos_add, void *context); | |
282 | void (*rewind)(size_t pos, void *context); | |
283 | int (*compare)(size_t pos1, size_t pos2, size_t len, void *context); | |
284 | void *context; | |
285 | } tre_str_source; | |
286 | ||
287 | extern int | |
288 | tre_reguexec(const regex_t * __restrict preg, const tre_str_source * __restrict string, | |
289 | size_t nmatch, regmatch_t pmatch[ __restrict ], int eflags); | |
290 | #endif /* TRE_STR_USER */ | |
291 | ||
292 | #ifndef __LIBC__ | |
293 | /* Returns the version string. The returned string is static. */ | |
294 | extern char * | |
295 | tre_version(void); | |
296 | ||
297 | /* Returns the value for a config parameter. The type to which `result' | |
298 | must point to depends of the value of `query', see documentation for | |
299 | more details. */ | |
300 | extern int | |
301 | tre_config(int query, void *result); | |
302 | ||
303 | enum { | |
304 | TRE_CONFIG_APPROX, | |
305 | TRE_CONFIG_WCHAR, | |
306 | TRE_CONFIG_MULTIBYTE, | |
307 | TRE_CONFIG_SYSTEM_ABI, | |
308 | TRE_CONFIG_VERSION | |
309 | }; | |
310 | ||
311 | /* Returns 1 if the compiled pattern has back references, 0 if not. */ | |
312 | extern int | |
313 | tre_have_backrefs(const regex_t *preg); | |
314 | ||
315 | #ifdef TRE_APPROX | |
316 | /* Returns 1 if the compiled pattern uses approximate matching features, | |
317 | 0 if not. */ | |
318 | extern int | |
319 | tre_have_approx(const regex_t *preg); | |
320 | #endif /* TRE_APPROX */ | |
321 | #endif /* !__LIBC__ */ | |
322 | ||
323 | #ifdef __cplusplus | |
324 | } | |
325 | #endif | |
326 | #endif /* TRE_H */ | |
327 | ||
328 | /* EOF */ |