]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 1989, 1993, 1994 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Guido van Rossum. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 4. Neither the name of the University nor the names of its contributors | |
17 | * may be used to endorse or promote products derived from this software | |
18 | * without specific prior written permission. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
26 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
28 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
29 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
30 | * SUCH DAMAGE. | |
31 | */ | |
32 | ||
33 | #if defined(LIBC_SCCS) && !defined(lint) | |
34 | static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94"; | |
35 | #endif /* LIBC_SCCS and not lint */ | |
36 | #include <sys/cdefs.h> | |
37 | __FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.19 2010/04/16 22:29:24 jilles Exp $"); | |
38 | ||
39 | #include "xlocale_private.h" | |
40 | ||
41 | /* | |
42 | * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. | |
43 | * Compares a filename or pathname to a pattern. | |
44 | */ | |
45 | ||
46 | /* | |
47 | * Some notes on multibyte character support: | |
48 | * 1. Patterns with illegal byte sequences match nothing. | |
49 | * 2. Illegal byte sequences in the "string" argument are handled by treating | |
50 | * them as single-byte characters with a value of the first byte of the | |
51 | * sequence cast to wchar_t. | |
52 | * 3. Multibyte conversion state objects (mbstate_t) are passed around and | |
53 | * used for most, but not all, conversions. Further work will be required | |
54 | * to support state-dependent encodings. | |
55 | */ | |
56 | ||
57 | #include <fnmatch.h> | |
58 | #include <limits.h> | |
59 | #include <string.h> | |
60 | #include <wchar.h> | |
61 | #include <wctype.h> | |
62 | ||
63 | #include "collate.h" | |
64 | ||
65 | #define EOS '\0' | |
66 | ||
67 | #define RETURN_ERROR 2 /* neither 0 or FNM_NOMATCH */ | |
68 | #define RANGE_MATCH 1 | |
69 | #define RANGE_NOMATCH 0 | |
70 | #define RANGE_ERROR (-1) | |
71 | ||
72 | #define RECURSION_MAX 64 | |
73 | ||
74 | __private_extern__ int rangematch(const char *, wchar_t, const char *, int, char **, char **, mbstate_t *, mbstate_t *, locale_t); | |
75 | static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, | |
76 | mbstate_t, locale_t, int); | |
77 | ||
78 | int | |
79 | fnmatch(pattern, string, flags) | |
80 | const char *pattern, *string; | |
81 | int flags; | |
82 | { | |
83 | static const mbstate_t initial; | |
84 | #if __DARWIN_UNIX03 | |
85 | return (fnmatch1(pattern, string, string, flags, initial, initial, __current_locale(), RECURSION_MAX)); | |
86 | #else /* !__DARWIN_UNIX03 */ | |
87 | return (fnmatch1(pattern, string, string, flags, initial, initial, __current_locale(), RECURSION_MAX) != 0 ? FNM_NOMATCH : 0); | |
88 | #endif /* __DARWIN_UNIX03 */ | |
89 | } | |
90 | ||
91 | static int | |
92 | fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs, loc, recursion) | |
93 | const char *pattern, *string, *stringstart; | |
94 | int flags; | |
95 | mbstate_t patmbs, strmbs; | |
96 | locale_t loc; | |
97 | int recursion; | |
98 | { | |
99 | char *newp, *news; | |
100 | char c; | |
101 | wchar_t pc, sc; | |
102 | size_t pclen, sclen; | |
103 | ||
104 | if (recursion-- <= 0) | |
105 | return RETURN_ERROR; | |
106 | for (;;) { | |
107 | pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc); | |
108 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
109 | #if __DARWIN_UNIX03 | |
110 | return (RETURN_ERROR); | |
111 | #else /* !__DARWIN_UNIX03 */ | |
112 | return (FNM_NOMATCH); | |
113 | #endif /* __DARWIN_UNIX03 */ | |
114 | pattern += pclen; | |
115 | sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc); | |
116 | if (sclen == (size_t)-1 || sclen == (size_t)-2) { | |
117 | sc = (unsigned char)*string; | |
118 | sclen = 1; | |
119 | memset(&strmbs, 0, sizeof(strmbs)); | |
120 | } | |
121 | switch (pc) { | |
122 | case EOS: | |
123 | if ((flags & FNM_LEADING_DIR) && sc == '/') | |
124 | return (0); | |
125 | return (sc == EOS ? 0 : FNM_NOMATCH); | |
126 | case '?': | |
127 | if (sc == EOS) | |
128 | return (FNM_NOMATCH); | |
129 | if (sc == '/' && (flags & FNM_PATHNAME)) | |
130 | return (FNM_NOMATCH); | |
131 | if (sc == '.' && (flags & FNM_PERIOD) && | |
132 | (string == stringstart || | |
133 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
134 | return (FNM_NOMATCH); | |
135 | string += sclen; | |
136 | break; | |
137 | case '*': | |
138 | c = *pattern; | |
139 | /* Collapse multiple stars. */ | |
140 | while (c == '*') | |
141 | c = *++pattern; | |
142 | ||
143 | if (sc == '.' && (flags & FNM_PERIOD) && | |
144 | (string == stringstart || | |
145 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
146 | return (FNM_NOMATCH); | |
147 | ||
148 | /* Optimize for pattern with * at end or before /. */ | |
149 | if (c == EOS) | |
150 | if (flags & FNM_PATHNAME) | |
151 | return ((flags & FNM_LEADING_DIR) || | |
152 | strchr(string, '/') == NULL ? | |
153 | 0 : FNM_NOMATCH); | |
154 | else | |
155 | return (0); | |
156 | else if (c == '/' && flags & FNM_PATHNAME) { | |
157 | if ((string = strchr(string, '/')) == NULL) | |
158 | return (FNM_NOMATCH); | |
159 | break; | |
160 | } | |
161 | ||
162 | /* General case, use recursion. */ | |
163 | int ret; | |
164 | while (sc != EOS) { | |
165 | if ((ret = fnmatch1(pattern, string, stringstart, | |
166 | flags, patmbs, strmbs, loc, recursion)) != FNM_NOMATCH) | |
167 | return (ret); | |
168 | sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, | |
169 | &strmbs, loc); | |
170 | if (sclen == (size_t)-1 || | |
171 | sclen == (size_t)-2) { | |
172 | sc = (unsigned char)*string; | |
173 | sclen = 1; | |
174 | memset(&strmbs, 0, sizeof(strmbs)); | |
175 | } | |
176 | if (sc == '/' && flags & FNM_PATHNAME) | |
177 | break; | |
178 | string += sclen; | |
179 | } | |
180 | return (FNM_NOMATCH); | |
181 | case '[': | |
182 | if (sc == EOS) | |
183 | return (FNM_NOMATCH); | |
184 | if (sc == '/' && (flags & FNM_PATHNAME)) | |
185 | return (FNM_NOMATCH); | |
186 | if (sc == '.' && (flags & FNM_PERIOD) && | |
187 | (string == stringstart || | |
188 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
189 | return (FNM_NOMATCH); | |
190 | ||
191 | switch (rangematch(pattern, sc, string + sclen, flags, | |
192 | &newp, &news, &patmbs, &strmbs, loc)) { | |
193 | case RANGE_ERROR: | |
194 | #if __DARWIN_UNIX03 | |
195 | return (RETURN_ERROR); | |
196 | #else /* !__DARWIN_UNIX03 */ | |
197 | goto norm; | |
198 | #endif /* __DARWIN_UNIX03 */ | |
199 | case RANGE_MATCH: | |
200 | pattern = newp; | |
201 | string = news; | |
202 | break; | |
203 | case RANGE_NOMATCH: | |
204 | return (FNM_NOMATCH); | |
205 | } | |
206 | break; | |
207 | case '\\': | |
208 | if (!(flags & FNM_NOESCAPE)) { | |
209 | pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, | |
210 | &patmbs, loc); | |
211 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
212 | #if __DARWIN_UNIX03 | |
213 | return (RETURN_ERROR); | |
214 | #else /* !__DARWIN_UNIX03 */ | |
215 | return (FNM_NOMATCH); | |
216 | #endif /* __DARWIN_UNIX03 */ | |
217 | if (pclen == 0) | |
218 | pc = '\\'; | |
219 | pattern += pclen; | |
220 | } | |
221 | /* FALLTHROUGH */ | |
222 | default: | |
223 | #if !__DARWIN_UNIX03 | |
224 | norm: | |
225 | #endif /* !__DARWIN_UNIX03 */ | |
226 | if (pc == sc) | |
227 | ; | |
228 | else if ((flags & FNM_CASEFOLD) && | |
229 | (towlower_l(pc, loc) == towlower_l(sc, loc))) | |
230 | ; | |
231 | else | |
232 | return (FNM_NOMATCH); | |
233 | string += sclen; | |
234 | break; | |
235 | } | |
236 | } | |
237 | /* NOTREACHED */ | |
238 | } | |
239 | ||
240 | #ifndef BUILDING_VARIANT | |
241 | __private_extern__ int | |
242 | rangematch(pattern, test, string, flags, newp, news, patmbs, strmbs, loc) | |
243 | const char *pattern, *string; | |
244 | wchar_t test; | |
245 | int flags; | |
246 | char **newp, **news; | |
247 | mbstate_t *patmbs, *strmbs; | |
248 | locale_t loc; | |
249 | { | |
250 | int negate, ok, special; | |
251 | wchar_t c, c2; | |
252 | wchar_t buf[STR_LEN]; /* STR_LEN defined in collate.h */ | |
253 | size_t pclen, sclen, len; | |
254 | const char *origpat, *cp, *savestring; | |
255 | mbstate_t save; | |
256 | ||
257 | /* | |
258 | * A bracket expression starting with an unquoted circumflex | |
259 | * character produces unspecified results (IEEE 1003.2-1992, | |
260 | * 3.13.2). This implementation treats it like '!', for | |
261 | * consistency with the regular expression syntax. | |
262 | * J.T. Conklin (conklin@ngai.kaleida.com) | |
263 | */ | |
264 | if ( (negate = (*pattern == '!' || *pattern == '^')) ) | |
265 | ++pattern; | |
266 | ||
267 | if (flags & FNM_CASEFOLD) | |
268 | test = towlower_l(test, loc); | |
269 | ||
270 | /* | |
271 | * A right bracket shall lose its special meaning and represent | |
272 | * itself in a bracket expression if it occurs first in the list. | |
273 | * -- POSIX.2 2.8.3.2 | |
274 | */ | |
275 | ok = 0; | |
276 | origpat = pattern; | |
277 | for (;;) { | |
278 | c = 0; | |
279 | if (*pattern == ']' && pattern > origpat) { | |
280 | break; | |
281 | } else if (*pattern == '\0') { | |
282 | return (RANGE_ERROR); | |
283 | } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { | |
284 | return (RANGE_NOMATCH); | |
285 | } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) | |
286 | pattern++; | |
287 | else if (*pattern == '[' && ((special = *(pattern + 1)) == '.' || special == '=' || special == ':')) { | |
288 | cp = (pattern += 2); | |
289 | while((cp = strchr(cp, special))) { | |
290 | if (*(cp + 1) == ']') | |
291 | break; | |
292 | cp++; | |
293 | } | |
294 | if (!cp) | |
295 | return (RANGE_ERROR); | |
296 | if (special == '.') { | |
297 | treat_like_collating_symbol: | |
298 | len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc); | |
299 | if (len == (size_t)-1 || len == 0) | |
300 | return (RANGE_ERROR); | |
301 | pattern = cp + 2; | |
302 | if (len > 1) { | |
303 | wchar_t *wp, sc; | |
304 | /* no multi-character collation symbols as start of range */ | |
305 | if (*(cp + 2) == '-' && *(cp + 3) != EOS | |
306 | && *(cp + 3) != ']') | |
307 | return (RANGE_ERROR); | |
308 | wp = buf; | |
309 | if (test != *wp++) | |
310 | continue; | |
311 | if (len == 1) { | |
312 | ok = 1; | |
313 | break; | |
314 | } | |
315 | memcpy(&save, strmbs, sizeof(save)); | |
316 | savestring = string; | |
317 | while (--len > 0) { | |
318 | sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, strmbs, loc); | |
319 | if (sclen == (size_t)-1 || sclen == (size_t)-2) { | |
320 | sc = (unsigned char)*string; | |
321 | sclen = 1; | |
322 | memset(&strmbs, 0, sizeof(strmbs)); | |
323 | } | |
324 | if (sc != *wp++) { | |
325 | memcpy(strmbs, &save, sizeof(save)); | |
326 | string = savestring; | |
327 | break; | |
328 | } | |
329 | string += sclen; | |
330 | } | |
331 | if (len == 0) { | |
332 | ok = 1; | |
333 | break; | |
334 | } | |
335 | continue; /* no match */ | |
336 | } | |
337 | c = *buf; | |
338 | } else if (special == '=') { | |
339 | int ec; | |
340 | memcpy(&save, patmbs, sizeof(save)); | |
341 | ec = __collate_equiv_class(pattern, cp - pattern, patmbs, loc); | |
342 | if (ec < 0) | |
343 | return (RANGE_ERROR); | |
344 | if (ec == 0) { | |
345 | memcpy(patmbs, &save, sizeof(save)); | |
346 | goto treat_like_collating_symbol; | |
347 | } | |
348 | pattern = cp + 2; | |
349 | /* no equivalence classes as start of range */ | |
350 | if (*(cp + 2) == '-' && *(cp + 3) != EOS && | |
351 | *(cp + 3) != ']') | |
352 | return (RANGE_ERROR); | |
353 | len = __collate_equiv_match(ec, NULL, 0, test, string, strlen(string), strmbs, &sclen, loc); | |
354 | if (len == (size_t)-1) { | |
355 | return (RANGE_ERROR); | |
356 | } | |
357 | if (len > 0) { | |
358 | ok = 1; | |
359 | string += sclen; | |
360 | break; | |
361 | } | |
362 | continue; | |
363 | } else { /* special == ':' */ | |
364 | wctype_t charclass; | |
365 | char name[CHARCLASS_NAME_MAX + 1]; | |
366 | /* no character classes as start of range */ | |
367 | if (*(cp + 2) == '-' && *(cp + 3) != EOS && | |
368 | *(cp + 3) != ']') | |
369 | return (RANGE_ERROR); | |
370 | /* assume character class names are ascii */ | |
371 | if (cp - pattern > CHARCLASS_NAME_MAX) | |
372 | return (RANGE_ERROR); | |
373 | strlcpy(name, pattern, cp - pattern + 1); | |
374 | pattern = cp + 2; | |
375 | if ((charclass = wctype(name)) == 0) | |
376 | return (RANGE_ERROR); | |
377 | if (iswctype_l(test, charclass, loc)) { | |
378 | ok = 1; | |
379 | break; | |
380 | } | |
381 | continue; | |
382 | } | |
383 | } | |
384 | if (!c) { | |
385 | pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc); | |
386 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
387 | return (RANGE_ERROR); | |
388 | pattern += pclen; | |
389 | } | |
390 | ||
391 | if (flags & FNM_CASEFOLD) | |
392 | c = towlower_l(c, loc); | |
393 | ||
394 | if (*pattern == '-' && *(pattern + 1) != EOS && | |
395 | *(pattern + 1) != ']') { | |
396 | if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) | |
397 | if (*pattern != EOS) | |
398 | pattern++; | |
399 | pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs, loc); | |
400 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
401 | return (RANGE_ERROR); | |
402 | pattern += pclen; | |
403 | if (c2 == EOS) | |
404 | return (RANGE_ERROR); | |
405 | ||
406 | if ((c2 == '[' && (special = *pattern) == '.') || special == '=' || special == ':') { | |
407 | /* no equivalence classes or character classes as end of range */ | |
408 | if (special == '=' || special == ':') | |
409 | return (RANGE_ERROR); | |
410 | cp = ++pattern; | |
411 | while((cp = strchr(cp, special))) { | |
412 | if (*(cp + 1) == ']') | |
413 | break; | |
414 | cp++; | |
415 | } | |
416 | if (!cp) | |
417 | return (RANGE_ERROR); | |
418 | len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc); | |
419 | /* no multi-character collation symbols as end of range */ | |
420 | if (len != 1) | |
421 | return (RANGE_ERROR); | |
422 | pattern = cp + 2; | |
423 | c2 = *buf; | |
424 | } | |
425 | ||
426 | if (flags & FNM_CASEFOLD) | |
427 | c2 = towlower_l(c2, loc); | |
428 | ||
429 | if (loc->__collate_load_error ? | |
430 | c <= test && test <= c2 : | |
431 | __collate_range_cmp(c, test, loc) <= 0 | |
432 | && __collate_range_cmp(test, c2, loc) <= 0 | |
433 | ) { | |
434 | ok = 1; | |
435 | break; | |
436 | } | |
437 | } else if (c == test) { | |
438 | ok = 1; | |
439 | break; | |
440 | } | |
441 | } | |
442 | /* go to end of bracket expression */ | |
443 | special = 0; | |
444 | while(*pattern != ']') { | |
445 | if (*pattern == 0) | |
446 | return (RANGE_ERROR); | |
447 | if (*pattern == special) { | |
448 | if (*++pattern == ']') { | |
449 | special = 0; | |
450 | pattern++; | |
451 | } | |
452 | continue; | |
453 | } | |
454 | if (!special && *pattern == '[') { | |
455 | special = *++pattern; | |
456 | if (special != '.' && special != '=' && special != ':') | |
457 | special = 0; | |
458 | else | |
459 | pattern++; | |
460 | continue; | |
461 | } | |
462 | pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc); | |
463 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
464 | return (RANGE_ERROR); | |
465 | pattern += pclen; | |
466 | } | |
467 | ||
468 | *newp = (char *)++pattern; | |
469 | *news = (char *)string; | |
470 | return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); | |
471 | } | |
472 | #endif /* BUILDING_VARIANT */ |