]> git.saurik.com Git - apple/libc.git/blob - gen/fnmatch-fbsd.c
Libc-498.1.7.tar.gz
[apple/libc.git] / gen / fnmatch-fbsd.c
1 /*
2 * Copyright (c) 1989, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94";
39 #endif /* LIBC_SCCS and not lint */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.16 2004/07/29 03:13:10 tjr Exp $");
42
43 #include "xlocale_private.h"
44
45 /*
46 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
47 * Compares a filename or pathname to a pattern.
48 */
49
50 /*
51 * Some notes on multibyte character support:
52 * 1. Patterns with illegal byte sequences match nothing.
53 * 2. Illegal byte sequences in the "string" argument are handled by treating
54 * them as single-byte characters with a value of the first byte of the
55 * sequence cast to wchar_t.
56 * 3. Multibyte conversion state objects (mbstate_t) are passed around and
57 * used for most, but not all, conversions. Further work will be required
58 * to support state-dependent encodings.
59 */
60
61 #include <fnmatch.h>
62 #include <limits.h>
63 #include <string.h>
64 #include <wchar.h>
65 #include <wctype.h>
66
67 #include "collate.h"
68
69 #define EOS '\0'
70
71 #if __DARWIN_UNIX03
72 #define RETURN_ERROR 2 /* neither 0 or FNM_NOMATCH */
73 #endif /* __DARWIN_UNIX03 */
74 #define RANGE_MATCH 1
75 #define RANGE_NOMATCH 0
76 #define RANGE_ERROR (-1)
77
78 __private_extern__ int rangematch(const char *, wchar_t, const char *, int, char **, char **, mbstate_t *, mbstate_t *, locale_t);
79 static int fnmatch1(const char *, const char *, int, mbstate_t, mbstate_t, locale_t);
80
81 int
82 fnmatch(pattern, string, flags)
83 const char *pattern, *string;
84 int flags;
85 {
86 static const mbstate_t initial;
87
88 return (fnmatch1(pattern, string, flags, initial, initial, __current_locale()));
89 }
90
91 static int
92 fnmatch1(pattern, string, flags, patmbs, strmbs, loc)
93 const char *pattern, *string;
94 int flags;
95 mbstate_t patmbs, strmbs;
96 locale_t loc;
97 {
98 const char *stringstart;
99 char *newp, *news;
100 char c;
101 wchar_t pc, sc;
102 size_t pclen, sclen;
103
104 for (stringstart = string;;) {
105 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc);
106 if (pclen == (size_t)-1 || pclen == (size_t)-2)
107 #if __DARWIN_UNIX03
108 return (RETURN_ERROR);
109 #else /* !__DARWIN_UNIX03 */
110 return (FNM_NOMATCH);
111 #endif /* __DARWIN_UNIX03 */
112 pattern += pclen;
113 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc);
114 if (sclen == (size_t)-1 || sclen == (size_t)-2) {
115 sc = (unsigned char)*string;
116 sclen = 1;
117 memset(&strmbs, 0, sizeof(strmbs));
118 }
119 switch (pc) {
120 case EOS:
121 if ((flags & FNM_LEADING_DIR) && sc == '/')
122 return (0);
123 return (sc == EOS ? 0 : FNM_NOMATCH);
124 case '?':
125 if (sc == EOS)
126 return (FNM_NOMATCH);
127 if (sc == '/' && (flags & FNM_PATHNAME))
128 return (FNM_NOMATCH);
129 if (sc == '.' && (flags & FNM_PERIOD) &&
130 (string == stringstart ||
131 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
132 return (FNM_NOMATCH);
133 string += sclen;
134 break;
135 case '*':
136 c = *pattern;
137 /* Collapse multiple stars. */
138 while (c == '*')
139 c = *++pattern;
140
141 if (sc == '.' && (flags & FNM_PERIOD) &&
142 (string == stringstart ||
143 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
144 return (FNM_NOMATCH);
145
146 /* Optimize for pattern with * at end or before /. */
147 if (c == EOS)
148 if (flags & FNM_PATHNAME)
149 return ((flags & FNM_LEADING_DIR) ||
150 strchr(string, '/') == NULL ?
151 0 : FNM_NOMATCH);
152 else
153 return (0);
154 else if (c == '/' && flags & FNM_PATHNAME) {
155 if ((string = strchr(string, '/')) == NULL)
156 return (FNM_NOMATCH);
157 break;
158 }
159
160 /* General case, use recursion. */
161 while (sc != EOS) {
162 if (!fnmatch1(pattern, string,
163 flags & ~FNM_PERIOD, patmbs, strmbs, loc))
164 return (0);
165 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX,
166 &strmbs, loc);
167 if (sclen == (size_t)-1 ||
168 sclen == (size_t)-2) {
169 sc = (unsigned char)*string;
170 sclen = 1;
171 memset(&strmbs, 0, sizeof(strmbs));
172 }
173 if (sc == '/' && flags & FNM_PATHNAME)
174 break;
175 string += sclen;
176 }
177 return (FNM_NOMATCH);
178 case '[':
179 if (sc == EOS)
180 return (FNM_NOMATCH);
181 if (sc == '/' && (flags & FNM_PATHNAME))
182 return (FNM_NOMATCH);
183 if (sc == '.' && (flags & FNM_PERIOD) &&
184 (string == stringstart ||
185 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
186 return (FNM_NOMATCH);
187
188 switch (rangematch(pattern, sc, string + sclen, flags,
189 &newp, &news, &patmbs, &strmbs, loc)) {
190 case RANGE_ERROR:
191 #if __DARWIN_UNIX03
192 return (RETURN_ERROR);
193 #else /* !__DARWIN_UNIX03 */
194 goto norm;
195 #endif /* __DARWIN_UNIX03 */
196 case RANGE_MATCH:
197 pattern = newp;
198 string = news;
199 break;
200 case RANGE_NOMATCH:
201 return (FNM_NOMATCH);
202 }
203 break;
204 case '\\':
205 if (!(flags & FNM_NOESCAPE)) {
206 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX,
207 &patmbs, loc);
208 if (pclen == (size_t)-1 || pclen == (size_t)-2)
209 #if __DARWIN_UNIX03
210 return (RETURN_ERROR);
211 #else /* !__DARWIN_UNIX03 */
212 return (FNM_NOMATCH);
213 #endif /* __DARWIN_UNIX03 */
214 if (pclen == 0)
215 pc = '\\';
216 pattern += pclen;
217 }
218 /* FALLTHROUGH */
219 default:
220 #if !__DARWIN_UNIX03
221 norm:
222 #endif /* !__DARWIN_UNIX03 */
223 if (pc == sc)
224 ;
225 else if ((flags & FNM_CASEFOLD) &&
226 (towlower_l(pc, loc) == towlower_l(sc, loc)))
227 ;
228 else
229 return (FNM_NOMATCH);
230 string += sclen;
231 break;
232 }
233 }
234 /* NOTREACHED */
235 }
236
237 #ifndef BUILDING_VARIANT
238 __private_extern__ int
239 rangematch(pattern, test, string, flags, newp, news, patmbs, strmbs, loc)
240 const char *pattern, *string;
241 wchar_t test;
242 int flags;
243 char **newp, **news;
244 mbstate_t *patmbs, *strmbs;
245 locale_t loc;
246 {
247 int negate, ok, special;
248 wchar_t c, c2;
249 wchar_t buf[STR_LEN]; /* STR_LEN defined in collate.h */
250 size_t pclen, sclen, len;
251 const char *origpat, *cp, *savestring;
252 mbstate_t save;
253
254 /*
255 * A bracket expression starting with an unquoted circumflex
256 * character produces unspecified results (IEEE 1003.2-1992,
257 * 3.13.2). This implementation treats it like '!', for
258 * consistency with the regular expression syntax.
259 * J.T. Conklin (conklin@ngai.kaleida.com)
260 */
261 if ( (negate = (*pattern == '!' || *pattern == '^')) )
262 ++pattern;
263
264 if (flags & FNM_CASEFOLD)
265 test = towlower_l(test, loc);
266
267 /*
268 * A right bracket shall lose its special meaning and represent
269 * itself in a bracket expression if it occurs first in the list.
270 * -- POSIX.2 2.8.3.2
271 */
272 ok = 0;
273 origpat = pattern;
274 for (;;) {
275 c = 0;
276 if (*pattern == ']' && pattern > origpat) {
277 break;
278 } else if (*pattern == '\0') {
279 return (RANGE_ERROR);
280 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
281 pattern++;
282 return (RANGE_NOMATCH);
283 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
284 pattern++;
285 else if (*pattern == '[' && ((special = *(pattern + 1)) == '.' || special == '=' || special == ':')) {
286 cp = (pattern += 2);
287 while(cp = strchr(cp, special)) {
288 if (*(cp + 1) == ']')
289 break;
290 cp++;
291 }
292 if (!cp)
293 return (RANGE_ERROR);
294 if (special == '.') {
295 treat_like_collating_symbol:
296 len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc);
297 if (len == (size_t)-1 || len == 0)
298 return (RANGE_ERROR);
299 pattern = cp + 2;
300 if (len > 1) {
301 wchar_t *wp, sc;
302 /* no multi-character collation symbols as start of range */
303 if (*(cp + 2) == '-' && *(cp + 3) != EOS
304 && *(cp + 3) != ']')
305 return (RANGE_ERROR);
306 wp = buf;
307 if (test != *wp++)
308 continue;
309 if (len == 1) {
310 ok = 1;
311 break;
312 }
313 memcpy(&save, strmbs, sizeof(save));
314 savestring = string;
315 while (--len > 0) {
316 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, strmbs, loc);
317 if (sclen == (size_t)-1 || sclen == (size_t)-2) {
318 sc = (unsigned char)*string;
319 sclen = 1;
320 memset(&strmbs, 0, sizeof(strmbs));
321 }
322 if (sc != *wp++) {
323 memcpy(strmbs, &save, sizeof(save));
324 string = savestring;
325 break;
326 }
327 string += sclen;
328 }
329 if (len == 0) {
330 ok = 1;
331 break;
332 }
333 continue; /* no match */
334 }
335 c = *buf;
336 } else if (special == '=') {
337 int ec;
338 memcpy(&save, patmbs, sizeof(save));
339 ec = __collate_equiv_class(pattern, cp - pattern, patmbs, loc);
340 if (ec < 0)
341 return (RANGE_ERROR);
342 if (ec == 0) {
343 memcpy(patmbs, &save, sizeof(save));
344 goto treat_like_collating_symbol;
345 }
346 pattern = cp + 2;
347 /* no equivalence classes as start of range */
348 if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
349 *(cp + 3) != ']')
350 return (RANGE_ERROR);
351 len = __collate_equiv_match(ec, NULL, 0, test, string, strlen(string), strmbs, &sclen, loc);
352 if (len < 0)
353 return (RANGE_ERROR);
354 if (len > 0) {
355 ok = 1;
356 string += sclen;
357 break;
358 }
359 continue;
360 } else { /* special == ':' */
361 wctype_t charclass;
362 char name[CHARCLASS_NAME_MAX + 1];
363 /* no character classes as start of range */
364 if (*(cp + 2) == '-' && *(cp + 3) != EOS &&
365 *(cp + 3) != ']')
366 return (RANGE_ERROR);
367 /* assume character class names are ascii */
368 if (cp - pattern > CHARCLASS_NAME_MAX)
369 return (RANGE_ERROR);
370 strlcpy(name, pattern, cp - pattern + 1);
371 pattern = cp + 2;
372 if ((charclass = wctype(name)) == 0)
373 return (RANGE_ERROR);
374 if (iswctype_l(test, charclass, loc)) {
375 ok = 1;
376 break;
377 }
378 continue;
379 }
380 }
381 if (!c) {
382 pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc);
383 if (pclen == (size_t)-1 || pclen == (size_t)-2)
384 return (RANGE_ERROR);
385 pattern += pclen;
386 }
387
388 if (flags & FNM_CASEFOLD)
389 c = towlower_l(c, loc);
390
391 if (*pattern == '-' && *(pattern + 1) != EOS &&
392 *(pattern + 1) != ']') {
393 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
394 if (*pattern != EOS)
395 pattern++;
396 pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs, loc);
397 if (pclen == (size_t)-1 || pclen == (size_t)-2)
398 return (RANGE_ERROR);
399 pattern += pclen;
400 if (c2 == EOS)
401 return (RANGE_ERROR);
402
403 if (c2 == '[' && (special = *pattern) == '.' || special == '=' || special == ':') {
404 /* no equivalence classes or character classes as end of range */
405 if (special == '=' || special == ':')
406 return (RANGE_ERROR);
407 cp = ++pattern;
408 while(cp = strchr(cp, special)) {
409 if (*(cp + 1) == ']')
410 break;
411 cp++;
412 }
413 if (!cp)
414 return (RANGE_ERROR);
415 len = __collate_collating_symbol(buf, STR_LEN, pattern, cp - pattern, patmbs, loc);
416 /* no multi-character collation symbols as end of range */
417 if (len != 1)
418 return (RANGE_ERROR);
419 pattern = cp + 2;
420 c2 = *buf;
421 }
422
423 if (flags & FNM_CASEFOLD)
424 c2 = towlower_l(c2, loc);
425
426 if (loc->__collate_load_error ?
427 c <= test && test <= c2 :
428 __collate_range_cmp(c, test, loc) <= 0
429 && __collate_range_cmp(test, c2, loc) <= 0
430 ) {
431 ok = 1;
432 break;
433 }
434 } else if (c == test) {
435 ok = 1;
436 break;
437 }
438 }
439 /* go to end of bracket expression */
440 special = 0;
441 while(*pattern != ']') {
442 if (*pattern == 0)
443 return (RANGE_ERROR);
444 if (*pattern == special) {
445 if (*++pattern == ']') {
446 special = 0;
447 pattern++;
448 }
449 continue;
450 }
451 if (!special && *pattern == '[') {
452 special = *++pattern;
453 if (special != '.' && special != '=' && special != ':')
454 special = 0;
455 else
456 pattern++;
457 continue;
458 }
459 pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc);
460 if (pclen == (size_t)-1 || pclen == (size_t)-2)
461 return (RANGE_ERROR);
462 pattern += pclen;
463 }
464
465 *newp = (char *)++pattern;
466 *news = (char *)string;
467 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
468 }
469 #endif /* BUILDING_VARIANT */