]>
Commit | Line | Data |
---|---|---|
224c7076 A |
1 | /* |
2 | * Copyright (c) 1989, 1993, 1994 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Guido van Rossum. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. All advertising materials mentioning features or use of this software | |
17 | * must display the following acknowledgement: | |
18 | * This product includes software developed by the University of | |
19 | * California, Berkeley and its contributors. | |
20 | * 4. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | |
23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
35 | */ | |
36 | ||
37 | #if defined(LIBC_SCCS) && !defined(lint) | |
38 | static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94"; | |
39 | #endif /* LIBC_SCCS and not lint */ | |
40 | #include <sys/cdefs.h> | |
41 | __FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.16 2004/07/29 03:13:10 tjr Exp $"); | |
42 | ||
43 | #include "xlocale_private.h" | |
44 | ||
45 | /* | |
46 | * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. | |
47 | * Compares a filename or pathname to a pattern. | |
48 | */ | |
49 | ||
50 | /* | |
51 | * Some notes on multibyte character support: | |
52 | * 1. Patterns with illegal byte sequences match nothing. | |
53 | * 2. Illegal byte sequences in the "string" argument are handled by treating | |
54 | * them as single-byte characters with a value of the first byte of the | |
55 | * sequence cast to wchar_t. | |
56 | * 3. Multibyte conversion state objects (mbstate_t) are passed around and | |
57 | * used for most, but not all, conversions. Further work will be required | |
58 | * to support state-dependent encodings. | |
59 | */ | |
60 | ||
61 | #include <fnmatch.h> | |
62 | #include <limits.h> | |
63 | #include <string.h> | |
64 | #include <wchar.h> | |
65 | #include <wctype.h> | |
66 | ||
67 | #include "collate.h" | |
68 | ||
69 | #define EOS '\0' | |
70 | ||
71 | #if __DARWIN_UNIX03 | |
72 | #define RETURN_ERROR 2 /* neither 0 or FNM_NOMATCH */ | |
73 | #endif /* __DARWIN_UNIX03 */ | |
74 | #define RANGE_MATCH 1 | |
75 | #define RANGE_NOMATCH 0 | |
76 | #define RANGE_ERROR (-1) | |
77 | ||
78 | __private_extern__ int rangematch(const char *, wchar_t, const char *, int, char **, char **, mbstate_t *, mbstate_t *, locale_t); | |
79 | static int fnmatch1(const char *, const char *, int, mbstate_t, mbstate_t, locale_t); | |
80 | ||
81 | int | |
82 | fnmatch(pattern, string, flags) | |
83 | const char *pattern, *string; | |
84 | int flags; | |
85 | { | |
86 | static const mbstate_t initial; | |
87 | ||
88 | return (fnmatch1(pattern, string, flags, initial, initial, __current_locale())); | |
89 | } | |
90 | ||
91 | static int | |
92 | fnmatch1(pattern, string, flags, patmbs, strmbs, loc) | |
93 | const char *pattern, *string; | |
94 | int flags; | |
95 | mbstate_t patmbs, strmbs; | |
96 | locale_t loc; | |
97 | { | |
98 | const char *stringstart; | |
99 | char *newp, *news; | |
100 | char c; | |
101 | wchar_t pc, sc; | |
102 | size_t pclen, sclen; | |
103 | ||
104 | for (stringstart = string;;) { | |
105 | pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc); | |
106 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
107 | #if __DARWIN_UNIX03 | |
108 | return (RETURN_ERROR); | |
109 | #else /* !__DARWIN_UNIX03 */ | |
110 | return (FNM_NOMATCH); | |
111 | #endif /* __DARWIN_UNIX03 */ | |
112 | pattern += pclen; | |
113 | sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc); | |
114 | if (sclen == (size_t)-1 || sclen == (size_t)-2) { | |
115 | sc = (unsigned char)*string; | |
116 | sclen = 1; | |
117 | memset(&strmbs, 0, sizeof(strmbs)); | |
118 | } | |
119 | switch (pc) { | |
120 | case EOS: | |
121 | if ((flags & FNM_LEADING_DIR) && sc == '/') | |
122 | return (0); | |
123 | return (sc == EOS ? 0 : FNM_NOMATCH); | |
124 | case '?': | |
125 | if (sc == EOS) | |
126 | return (FNM_NOMATCH); | |
127 | if (sc == '/' && (flags & FNM_PATHNAME)) | |
128 | return (FNM_NOMATCH); | |
129 | if (sc == '.' && (flags & FNM_PERIOD) && | |
130 | (string == stringstart || | |
131 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
132 | return (FNM_NOMATCH); | |
133 | string += sclen; | |
134 | break; | |
135 | case '*': | |
136 | c = *pattern; | |
137 | /* Collapse multiple stars. */ | |
138 | while (c == '*') | |
139 | c = *++pattern; | |
140 | ||
141 | if (sc == '.' && (flags & FNM_PERIOD) && | |
142 | (string == stringstart || | |
143 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
144 | return (FNM_NOMATCH); | |
145 | ||
146 | /* Optimize for pattern with * at end or before /. */ | |
147 | if (c == EOS) | |
148 | if (flags & FNM_PATHNAME) | |
149 | return ((flags & FNM_LEADING_DIR) || | |
150 | strchr(string, '/') == NULL ? | |
151 | 0 : FNM_NOMATCH); | |
152 | else | |
153 | return (0); | |
154 | else if (c == '/' && flags & FNM_PATHNAME) { | |
155 | if ((string = strchr(string, '/')) == NULL) | |
156 | return (FNM_NOMATCH); | |
157 | break; | |
158 | } | |
159 | ||
160 | /* General case, use recursion. */ | |
161 | while (sc != EOS) { | |
162 | if (!fnmatch1(pattern, string, | |
163 | flags & ~FNM_PERIOD, patmbs, strmbs, loc)) | |
164 | return (0); | |
165 | sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, | |
166 | &strmbs, loc); | |
167 | if (sclen == (size_t)-1 || | |
168 | sclen == (size_t)-2) { | |
169 | sc = (unsigned char)*string; | |
170 | sclen = 1; | |
171 | memset(&strmbs, 0, sizeof(strmbs)); | |
172 | } | |
173 | if (sc == '/' && flags & FNM_PATHNAME) | |
174 | break; | |
175 | string += sclen; | |
176 | } | |
177 | return (FNM_NOMATCH); | |
178 | case '[': | |
179 | if (sc == EOS) | |
180 | return (FNM_NOMATCH); | |
181 | if (sc == '/' && (flags & FNM_PATHNAME)) | |
182 | return (FNM_NOMATCH); | |
183 | if (sc == '.' && (flags & FNM_PERIOD) && | |
184 | (string == stringstart || | |
185 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
186 | return (FNM_NOMATCH); | |
187 | ||
188 | switch (rangematch(pattern, sc, string + sclen, flags, | |
189 | &newp, &news, &patmbs, &strmbs, loc)) { | |
190 | case RANGE_ERROR: | |
191 | #if __DARWIN_UNIX03 | |
192 | return (RETURN_ERROR); | |
193 | #else /* !__DARWIN_UNIX03 */ | |
194 | goto norm; | |
195 | #endif /* __DARWIN_UNIX03 */ | |
196 | case RANGE_MATCH: | |
197 | pattern = newp; | |
198 | string = news; | |
199 | break; | |
200 | case RANGE_NOMATCH: | |
201 | return (FNM_NOMATCH); | |
202 | } | |
203 | break; | |
204 | case '\\': | |
205 | if (!(flags & FNM_NOESCAPE)) { | |
206 | pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, | |
207 | &patmbs, loc); | |
208 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
209 | #if __DARWIN_UNIX03 | |
210 | return (RETURN_ERROR); | |
211 | #else /* !__DARWIN_UNIX03 */ | |
212 | return (FNM_NOMATCH); | |
213 | #endif /* __DARWIN_UNIX03 */ | |
214 | if (pclen == 0) | |
215 | pc = '\\'; | |
216 | pattern += pclen; | |
217 | } | |
218 | /* FALLTHROUGH */ | |
219 | default: | |
220 | #if !__DARWIN_UNIX03 | |
221 | norm: | |
222 | #endif /* !__DARWIN_UNIX03 */ | |
223 | if (pc == sc) | |
224 | ; | |
225 | else if ((flags & FNM_CASEFOLD) && | |
226 | (towlower_l(pc, loc) == towlower_l(sc, loc))) | |
227 | ; | |
228 | else | |
229 | return (FNM_NOMATCH); | |
230 | string += sclen; | |
231 | break; | |
232 | } | |
233 | } | |
234 | /* NOTREACHED */ | |
235 | } | |
236 | ||
237 | #ifndef BUILDING_VARIANT | |
238 | __private_extern__ int | |
239 | rangematch(pattern, test, string, flags, newp, news, patmbs, strmbs, loc) | |
240 | const char *pattern, *string; | |
241 | wchar_t test; | |
242 | int flags; | |
243 | char **newp, **news; | |
244 | mbstate_t *patmbs, *strmbs; | |
245 | locale_t loc; | |
246 | { | |
247 | int negate, ok, special; | |
248 | wchar_t c, c2; | |
249 | wchar_t buf[STR_LEN]; /* STR_LEN defined in collate.h */ | |
250 | size_t pclen, sclen, len; | |
251 | const char *origpat, *cp, *savestring; | |
252 | mbstate_t save; | |
253 | ||
254 | /* | |
255 | * A bracket expression starting with an unquoted circumflex | |
256 | * character produces unspecified results (IEEE 1003.2-1992, | |
257 | * 3.13.2). This implementation treats it like '!', for | |
258 | * consistency with the regular expression syntax. | |
259 | * J.T. Conklin (conklin@ngai.kaleida.com) | |
260 | */ | |
261 | if ( (negate = (*pattern == '!' || *pattern == '^')) ) | |
262 | ++pattern; | |
263 | ||
264 | if (flags & FNM_CASEFOLD) | |
265 | test = towlower_l(test, loc); | |
266 | ||
267 | /* | |
268 | * A right bracket shall lose its special meaning and represent | |
269 | * itself in a bracket expression if it occurs first in the list. | |
270 | * -- POSIX.2 2.8.3.2 | |
271 | */ | |
272 | ok = 0; | |
273 | origpat = pattern; | |
274 | for (;;) { | |
275 | c = 0; | |
276 | if (*pattern == ']' && pattern > origpat) { | |
277 | break; | |
278 | } else if (*pattern == '\0') { | |
279 | return (RANGE_ERROR); | |
280 | } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { | |
281 | pattern++; | |
282 | return (RANGE_NOMATCH); | |
283 | } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) | |
284 | pattern++; | |
285 | else if (*pattern == '[' && ((special = *(pattern + 1)) == '.' || special == '=' || special == ':')) { | |
286 | cp = (pattern += 2); | |
287 | while(cp = strchr(cp, special)) { | |
288 | if (*(cp + 1) == ']') | |
289 | break; | |
290 | cp++; | |
291 | } | |
292 | if (!cp) | |
293 | return (RANGE_ERROR); | |
294 | if (special == '.') { | |
295 | treat_like_collating_symbol: | |
296 | len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc); | |
297 | if (len == (size_t)-1 || len == 0) | |
298 | return (RANGE_ERROR); | |
299 | pattern = cp + 2; | |
300 | if (len > 1) { | |
301 | wchar_t *wp, sc; | |
302 | /* no multi-character collation symbols as start of range */ | |
303 | if (*(cp + 2) == '-' && *(cp + 3) != EOS | |
304 | && *(cp + 3) != ']') | |
305 | return (RANGE_ERROR); | |
306 | wp = buf; | |
307 | if (test != *wp++) | |
308 | continue; | |
309 | if (len == 1) { | |
310 | ok = 1; | |
311 | break; | |
312 | } | |
313 | memcpy(&save, strmbs, sizeof(save)); | |
314 | savestring = string; | |
315 | while (--len > 0) { | |
316 | sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, strmbs, loc); | |
317 | if (sclen == (size_t)-1 || sclen == (size_t)-2) { | |
318 | sc = (unsigned char)*string; | |
319 | sclen = 1; | |
320 | memset(&strmbs, 0, sizeof(strmbs)); | |
321 | } | |
322 | if (sc != *wp++) { | |
323 | memcpy(strmbs, &save, sizeof(save)); | |
324 | string = savestring; | |
325 | break; | |
326 | } | |
327 | string += sclen; | |
328 | } | |
329 | if (len == 0) { | |
330 | ok = 1; | |
331 | break; | |
332 | } | |
333 | continue; /* no match */ | |
334 | } | |
335 | c = *buf; | |
336 | } else if (special == '=') { | |
337 | int ec; | |
338 | memcpy(&save, patmbs, sizeof(save)); | |
339 | ec = __collate_equiv_class(pattern, cp - pattern, patmbs, loc); | |
340 | if (ec < 0) | |
341 | return (RANGE_ERROR); | |
342 | if (ec == 0) { | |
343 | memcpy(patmbs, &save, sizeof(save)); | |
344 | goto treat_like_collating_symbol; | |
345 | } | |
346 | pattern = cp + 2; | |
347 | /* no equivalence classes as start of range */ | |
348 | if (*(cp + 2) == '-' && *(cp + 3) != EOS && | |
349 | *(cp + 3) != ']') | |
350 | return (RANGE_ERROR); | |
351 | len = __collate_equiv_match(ec, NULL, 0, test, string, strlen(string), strmbs, &sclen, loc); | |
352 | if (len < 0) | |
353 | return (RANGE_ERROR); | |
354 | if (len > 0) { | |
355 | ok = 1; | |
356 | string += sclen; | |
357 | break; | |
358 | } | |
359 | continue; | |
360 | } else { /* special == ':' */ | |
361 | wctype_t charclass; | |
362 | char name[CHARCLASS_NAME_MAX + 1]; | |
363 | /* no character classes as start of range */ | |
364 | if (*(cp + 2) == '-' && *(cp + 3) != EOS && | |
365 | *(cp + 3) != ']') | |
366 | return (RANGE_ERROR); | |
367 | /* assume character class names are ascii */ | |
368 | if (cp - pattern > CHARCLASS_NAME_MAX) | |
369 | return (RANGE_ERROR); | |
370 | strlcpy(name, pattern, cp - pattern + 1); | |
371 | pattern = cp + 2; | |
372 | if ((charclass = wctype(name)) == 0) | |
373 | return (RANGE_ERROR); | |
374 | if (iswctype_l(test, charclass, loc)) { | |
375 | ok = 1; | |
376 | break; | |
377 | } | |
378 | continue; | |
379 | } | |
380 | } | |
381 | if (!c) { | |
382 | pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc); | |
383 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
384 | return (RANGE_ERROR); | |
385 | pattern += pclen; | |
386 | } | |
387 | ||
388 | if (flags & FNM_CASEFOLD) | |
389 | c = towlower_l(c, loc); | |
390 | ||
391 | if (*pattern == '-' && *(pattern + 1) != EOS && | |
392 | *(pattern + 1) != ']') { | |
393 | if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) | |
394 | if (*pattern != EOS) | |
395 | pattern++; | |
396 | pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs, loc); | |
397 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
398 | return (RANGE_ERROR); | |
399 | pattern += pclen; | |
400 | if (c2 == EOS) | |
401 | return (RANGE_ERROR); | |
402 | ||
403 | if (c2 == '[' && (special = *pattern) == '.' || special == '=' || special == ':') { | |
404 | /* no equivalence classes or character classes as end of range */ | |
405 | if (special == '=' || special == ':') | |
406 | return (RANGE_ERROR); | |
407 | cp = ++pattern; | |
408 | while(cp = strchr(cp, special)) { | |
409 | if (*(cp + 1) == ']') | |
410 | break; | |
411 | cp++; | |
412 | } | |
413 | if (!cp) | |
414 | return (RANGE_ERROR); | |
415 | len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc); | |
416 | /* no multi-character collation symbols as end of range */ | |
417 | if (len != 1) | |
418 | return (RANGE_ERROR); | |
419 | pattern = cp + 2; | |
420 | c2 = *buf; | |
421 | } | |
422 | ||
423 | if (flags & FNM_CASEFOLD) | |
424 | c2 = towlower_l(c2, loc); | |
425 | ||
426 | if (loc->__collate_load_error ? | |
427 | c <= test && test <= c2 : | |
428 | __collate_range_cmp(c, test, loc) <= 0 | |
429 | && __collate_range_cmp(test, c2, loc) <= 0 | |
430 | ) { | |
431 | ok = 1; | |
432 | break; | |
433 | } | |
434 | } else if (c == test) { | |
435 | ok = 1; | |
436 | break; | |
437 | } | |
438 | } | |
439 | /* go to end of bracket expression */ | |
440 | special = 0; | |
441 | while(*pattern != ']') { | |
442 | if (*pattern == 0) | |
443 | return (RANGE_ERROR); | |
444 | if (*pattern == special) { | |
445 | if (*++pattern == ']') { | |
446 | special = 0; | |
447 | pattern++; | |
448 | } | |
449 | continue; | |
450 | } | |
451 | if (!special && *pattern == '[') { | |
452 | special = *++pattern; | |
453 | if (special != '.' && special != '=' && special != ':') | |
454 | special = 0; | |
455 | else | |
456 | pattern++; | |
457 | continue; | |
458 | } | |
459 | pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc); | |
460 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
461 | return (RANGE_ERROR); | |
462 | pattern += pclen; | |
463 | } | |
464 | ||
465 | *newp = (char *)++pattern; | |
466 | *news = (char *)string; | |
467 | return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); | |
468 | } | |
469 | #endif /* BUILDING_VARIANT */ |