]>
Commit | Line | Data |
---|---|---|
1 | #ifndef _REGEX_H_ | |
2 | #define _REGEX_H_ /* never again */ | |
3 | /* | |
4 | * regular expressions | |
5 | * | |
6 | * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. | |
7 | * | |
8 | * Development of this software was funded, in part, by Cray Research Inc., | |
9 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics | |
10 | * Corporation, none of whom are responsible for the results. The author | |
11 | * thanks all of them. | |
12 | * | |
13 | * Redistribution and use in source and binary forms -- with or without | |
14 | * modification -- are permitted for any purpose, provided that | |
15 | * redistributions in source form retain this entire copyright notice and | |
16 | * indicate the origin and nature of any modifications. | |
17 | * | |
18 | * I'd appreciate being given credit for this package in the documentation | |
19 | * of software which uses it, but that is not a requirement. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, | |
22 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
23 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL | |
24 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
25 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
26 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | |
27 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, | |
28 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
29 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | |
30 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
31 | * | |
32 | * | |
33 | * | |
34 | * Prototypes etc. marked with "^" within comments get gathered up (and | |
35 | * possibly edited) by the regfwd program and inserted near the bottom of | |
36 | * this file. | |
37 | * | |
38 | * We offer the option of declaring one wide-character version of the | |
39 | * RE functions as well as the char versions. To do that, define | |
40 | * __REG_WIDE_T to the type of wide characters (unfortunately, there | |
41 | * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and | |
42 | * __REG_WIDE_EXEC to the names to be used for the compile and execute | |
43 | * functions (suggestion: re_Xcomp and re_Xexec, where X is a letter | |
44 | * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode). | |
45 | * For cranky old compilers, it may be necessary to do something like: | |
46 | * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) | |
47 | * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g) | |
48 | * rather than just #defining the names as parameterless macros. | |
49 | * | |
50 | * For some specialized purposes, it may be desirable to suppress the | |
51 | * declarations of the "front end" functions, regcomp() and regexec(), | |
52 | * or of the char versions of the compile and execute functions. To | |
53 | * suppress the front-end functions, define __REG_NOFRONT. To suppress | |
54 | * the char versions, define __REG_NOCHAR. | |
55 | * | |
56 | * The right place to do those defines (and some others you may want, see | |
57 | * below) would be <sys/types.h>. If you don't have control of that file, | |
58 | * the right place to add your own defines to this file is marked below. | |
59 | * This is normally done automatically, by the makefile and regmkhdr, based | |
60 | * on the contents of regcustom.h. | |
61 | */ | |
62 | ||
63 | ||
64 | ||
65 | /* | |
66 | * voodoo for C++ | |
67 | */ | |
68 | #ifdef __cplusplus | |
69 | extern "C" { | |
70 | #endif | |
71 | ||
72 | ||
73 | ||
74 | /* | |
75 | * Add your own defines, if needed, here. | |
76 | */ | |
77 | ||
78 | ||
79 | ||
80 | /* | |
81 | * Location where a chunk of regcustom.h is automatically spliced into | |
82 | * this file (working from its prototype, regproto.h). | |
83 | */ | |
84 | /* --- begin --- */ | |
85 | /* ensure certain things don't sneak in from system headers */ | |
86 | #ifdef __REG_WIDE_T | |
87 | #undef __REG_WIDE_T | |
88 | #endif | |
89 | #ifdef __REG_WIDE_COMPILE | |
90 | #undef __REG_WIDE_COMPILE | |
91 | #endif | |
92 | #ifdef __REG_WIDE_EXEC | |
93 | #undef __REG_WIDE_EXEC | |
94 | #endif | |
95 | #ifdef __REG_REGOFF_T | |
96 | #undef __REG_REGOFF_T | |
97 | #endif | |
98 | #ifdef __REG_VOID_T | |
99 | #undef __REG_VOID_T | |
100 | #endif | |
101 | #ifdef __REG_CONST | |
102 | #undef __REG_CONST | |
103 | #endif | |
104 | #ifdef __REG_NOFRONT | |
105 | #undef __REG_NOFRONT | |
106 | #endif | |
107 | #ifdef __REG_NOCHAR | |
108 | #undef __REG_NOCHAR | |
109 | #endif | |
110 | #if wxUSE_UNICODE | |
111 | # define __REG_WIDE_T wxChar | |
112 | # define __REG_WIDE_COMPILE wx_re_comp | |
113 | # define __REG_WIDE_EXEC wx_re_exec | |
114 | # define __REG_NOCHAR /* don't want the char versions */ | |
115 | #endif | |
116 | #define __REG_NOFRONT /* don't want regcomp() and regexec() */ | |
117 | #define _ANSI_ARGS_(x) x | |
118 | /* --- end --- */ | |
119 | ||
120 | ||
121 | /* | |
122 | * interface types etc. | |
123 | */ | |
124 | ||
125 | /* | |
126 | * regoff_t has to be large enough to hold either off_t or ssize_t, | |
127 | * and must be signed; it's only a guess that long is suitable, so we | |
128 | * offer <sys/types.h> an override. | |
129 | */ | |
130 | #ifdef __REG_REGOFF_T | |
131 | typedef __REG_REGOFF_T regoff_t; | |
132 | #else | |
133 | typedef long regoff_t; | |
134 | #endif | |
135 | ||
136 | /* | |
137 | * For benefit of old compilers, we offer <sys/types.h> the option of | |
138 | * overriding the `void' type used to declare nonexistent return types. | |
139 | */ | |
140 | #ifdef __REG_VOID_T | |
141 | typedef __REG_VOID_T re_void; | |
142 | #else | |
143 | typedef void re_void; | |
144 | #endif | |
145 | ||
146 | /* | |
147 | * Also for benefit of old compilers, <sys/types.h> can supply a macro | |
148 | * which expands to a substitute for `const'. | |
149 | */ | |
150 | #ifndef __REG_CONST | |
151 | #define __REG_CONST const | |
152 | #endif | |
153 | ||
154 | ||
155 | ||
156 | /* | |
157 | * other interface types | |
158 | */ | |
159 | ||
160 | /* the biggie, a compiled RE (or rather, a front end to same) */ | |
161 | typedef struct { | |
162 | int re_magic; /* magic number */ | |
163 | size_t re_nsub; /* number of subexpressions */ | |
164 | long re_info; /* information about RE */ | |
165 | # define REG_UBACKREF 000001 | |
166 | # define REG_ULOOKAHEAD 000002 | |
167 | # define REG_UBOUNDS 000004 | |
168 | # define REG_UBRACES 000010 | |
169 | # define REG_UBSALNUM 000020 | |
170 | # define REG_UPBOTCH 000040 | |
171 | # define REG_UBBS 000100 | |
172 | # define REG_UNONPOSIX 000200 | |
173 | # define REG_UUNSPEC 000400 | |
174 | # define REG_UUNPORT 001000 | |
175 | # define REG_ULOCALE 002000 | |
176 | # define REG_UEMPTYMATCH 004000 | |
177 | # define REG_UIMPOSSIBLE 010000 | |
178 | # define REG_USHORTEST 020000 | |
179 | int re_csize; /* sizeof(character) */ | |
180 | char *re_endp; /* backward compatibility kludge */ | |
181 | /* the rest is opaque pointers to hidden innards */ | |
182 | char *re_guts; /* `char *' is more portable than `void *' */ | |
183 | char *re_fns; | |
184 | } regex_t; | |
185 | ||
186 | /* result reporting (may acquire more fields later) */ | |
187 | typedef struct { | |
188 | regoff_t rm_so; /* start of substring */ | |
189 | regoff_t rm_eo; /* end of substring */ | |
190 | } regmatch_t; | |
191 | ||
192 | /* supplementary control and reporting */ | |
193 | typedef struct { | |
194 | regmatch_t rm_extend; /* see REG_EXPECT */ | |
195 | } rm_detail_t; | |
196 | ||
197 | ||
198 | ||
199 | /* | |
200 | * compilation | |
201 | ^ #ifndef __REG_NOCHAR | |
202 | ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); | |
203 | ^ #endif | |
204 | ^ #ifndef __REG_NOFRONT | |
205 | ^ int regcomp(regex_t *, __REG_CONST char *, int); | |
206 | ^ #endif | |
207 | ^ #ifdef __REG_WIDE_T | |
208 | ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); | |
209 | ^ #endif | |
210 | */ | |
211 | #define REG_BASIC 000000 /* BREs (convenience) */ | |
212 | #define REG_EXTENDED 000001 /* EREs */ | |
213 | #define REG_ADVF 000002 /* advanced features in EREs */ | |
214 | #define REG_ADVANCED 000003 /* AREs (which are also EREs) */ | |
215 | #define REG_QUOTE 000004 /* no special characters, none */ | |
216 | #define REG_NOSPEC REG_QUOTE /* historical synonym */ | |
217 | #define REG_ICASE 000010 /* ignore case */ | |
218 | #define REG_NOSUB 000020 /* don't care about subexpressions */ | |
219 | #define REG_EXPANDED 000040 /* expanded format, white space & comments */ | |
220 | #define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ | |
221 | #define REG_NLANCH 000200 /* ^ matches after \n, $ before */ | |
222 | #define REG_NEWLINE 000300 /* newlines are line terminators */ | |
223 | #define REG_PEND 000400 /* ugh -- backward-compatibility hack */ | |
224 | #define REG_EXPECT 001000 /* report details on partial/limited matches */ | |
225 | #define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ | |
226 | #define REG_DUMP 004000 /* none of your business :-) */ | |
227 | #define REG_FAKE 010000 /* none of your business :-) */ | |
228 | #define REG_PROGRESS 020000 /* none of your business :-) */ | |
229 | ||
230 | ||
231 | ||
232 | /* | |
233 | * execution | |
234 | ^ #ifndef __REG_NOCHAR | |
235 | ^ int re_exec(regex_t *, __REG_CONST char *, size_t, | |
236 | ^ rm_detail_t *, size_t, regmatch_t [], int); | |
237 | ^ #endif | |
238 | ^ #ifndef __REG_NOFRONT | |
239 | ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); | |
240 | ^ #endif | |
241 | ^ #ifdef __REG_WIDE_T | |
242 | ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, | |
243 | ^ rm_detail_t *, size_t, regmatch_t [], int); | |
244 | ^ #endif | |
245 | */ | |
246 | #define REG_NOTBOL 0001 /* BOS is not BOL */ | |
247 | #define REG_NOTEOL 0002 /* EOS is not EOL */ | |
248 | #define REG_STARTEND 0004 /* backward compatibility kludge */ | |
249 | #define REG_FTRACE 0010 /* none of your business */ | |
250 | #define REG_MTRACE 0020 /* none of your business */ | |
251 | #define REG_SMALL 0040 /* none of your business */ | |
252 | ||
253 | ||
254 | ||
255 | /* | |
256 | * misc generics (may be more functions here eventually) | |
257 | ^ re_void regfree(regex_t *); | |
258 | */ | |
259 | ||
260 | ||
261 | ||
262 | /* | |
263 | * error reporting | |
264 | * Be careful if modifying the list of error codes -- the table used by | |
265 | * regerror() is generated automatically from this file! | |
266 | * | |
267 | * Note that there is no wide-char variant of regerror at this time; what | |
268 | * kind of character is used for error reports is independent of what kind | |
269 | * is used in matching. | |
270 | * | |
271 | ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); | |
272 | */ | |
273 | #define REG_OKAY 0 /* no errors detected */ | |
274 | #define REG_NOMATCH 1 /* failed to match */ | |
275 | #define REG_BADPAT 2 /* invalid regexp */ | |
276 | #define REG_ECOLLATE 3 /* invalid collating element */ | |
277 | #define REG_ECTYPE 4 /* invalid character class */ | |
278 | #define REG_EESCAPE 5 /* invalid escape \ sequence */ | |
279 | #define REG_ESUBREG 6 /* invalid backreference number */ | |
280 | #define REG_EBRACK 7 /* brackets [] not balanced */ | |
281 | #define REG_EPAREN 8 /* parentheses () not balanced */ | |
282 | #define REG_EBRACE 9 /* braces {} not balanced */ | |
283 | #define REG_BADBR 10 /* invalid repetition count(s) */ | |
284 | #define REG_ERANGE 11 /* invalid character range */ | |
285 | #define REG_ESPACE 12 /* out of memory */ | |
286 | #define REG_BADRPT 13 /* quantifier operand invalid */ | |
287 | #define REG_ASSERT 15 /* "can't happen" -- you found a bug */ | |
288 | #define REG_INVARG 16 /* invalid argument to regex function */ | |
289 | #define REG_MIXED 17 /* character widths of regex and string differ */ | |
290 | #define REG_BADOPT 18 /* invalid embedded option */ | |
291 | /* two specials for debugging and testing */ | |
292 | #define REG_ATOI 101 /* convert error-code name to number */ | |
293 | #define REG_ITOA 102 /* convert error-code number to name */ | |
294 | ||
295 | ||
296 | ||
297 | /* | |
298 | * the prototypes, as possibly munched by regfwd | |
299 | */ | |
300 | /* =====^!^===== begin forwards =====^!^===== */ | |
301 | /* automatically gathered by fwd; do not hand-edit */ | |
302 | /* === regproto.h === */ | |
303 | #ifndef __REG_NOCHAR | |
304 | int wx_re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int)); | |
305 | #endif | |
306 | #ifndef __REG_NOFRONT | |
307 | int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int)); | |
308 | #endif | |
309 | #ifdef __REG_WIDE_T | |
310 | int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int)); | |
311 | #endif | |
312 | #ifndef __REG_NOCHAR | |
313 | int wx_re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); | |
314 | #endif | |
315 | #ifndef __REG_NOFRONT | |
316 | int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int)); | |
317 | #endif | |
318 | #ifdef __REG_WIDE_T | |
319 | int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); | |
320 | #endif | |
321 | re_void regfree _ANSI_ARGS_((regex_t *)); | |
322 | extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t)); | |
323 | /* automatically gathered by fwd; do not hand-edit */ | |
324 | /* =====^!^===== end forwards =====^!^===== */ | |
325 | ||
326 | ||
327 | ||
328 | /* | |
329 | * more C++ voodoo | |
330 | */ | |
331 | #ifdef __cplusplus | |
332 | } | |
333 | #endif | |
334 | ||
335 | ||
336 | ||
337 | #endif |