]>
Commit | Line | Data |
---|---|---|
e9ce8d39 A |
1 | /* |
2 | * Copyright (c) 1989, 1993, 1994 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Guido van Rossum. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
e9ce8d39 A |
16 | * 4. Neither the name of the University nor the names of its contributors |
17 | * may be used to endorse or promote products derived from this software | |
18 | * without specific prior written permission. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
21 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
23 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
26 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
27 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
28 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
29 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
30 | * SUCH DAMAGE. | |
31 | */ | |
32 | ||
5b2abdfb A |
33 | #if defined(LIBC_SCCS) && !defined(lint) |
34 | static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94"; | |
35 | #endif /* LIBC_SCCS and not lint */ | |
9385eb3d | 36 | #include <sys/cdefs.h> |
1f2f436a | 37 | __FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.19 2010/04/16 22:29:24 jilles Exp $"); |
e9ce8d39 A |
38 | |
39 | /* | |
40 | * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. | |
41 | * Compares a filename or pathname to a pattern. | |
42 | */ | |
43 | ||
3d9156a7 A |
44 | /* |
45 | * Some notes on multibyte character support: | |
46 | * 1. Patterns with illegal byte sequences match nothing. | |
47 | * 2. Illegal byte sequences in the "string" argument are handled by treating | |
48 | * them as single-byte characters with a value of the first byte of the | |
49 | * sequence cast to wchar_t. | |
50 | * 3. Multibyte conversion state objects (mbstate_t) are passed around and | |
51 | * used for most, but not all, conversions. Further work will be required | |
52 | * to support state-dependent encodings. | |
53 | */ | |
54 | ||
e9ce8d39 | 55 | #include <fnmatch.h> |
3d9156a7 | 56 | #include <limits.h> |
e9ce8d39 | 57 | #include <string.h> |
3d9156a7 A |
58 | #include <wchar.h> |
59 | #include <wctype.h> | |
5b2abdfb A |
60 | |
61 | #include "collate.h" | |
e9ce8d39 A |
62 | |
63 | #define EOS '\0' | |
64 | ||
5b2abdfb A |
65 | #define RANGE_MATCH 1 |
66 | #define RANGE_NOMATCH 0 | |
67 | #define RANGE_ERROR (-1) | |
68 | ||
3d9156a7 | 69 | static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); |
1f2f436a A |
70 | static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, |
71 | mbstate_t); | |
e9ce8d39 A |
72 | |
73 | int | |
74 | fnmatch(pattern, string, flags) | |
75 | const char *pattern, *string; | |
76 | int flags; | |
3d9156a7 A |
77 | { |
78 | static const mbstate_t initial; | |
79 | ||
1f2f436a | 80 | return (fnmatch1(pattern, string, string, flags, initial, initial)); |
3d9156a7 A |
81 | } |
82 | ||
83 | static int | |
1f2f436a A |
84 | fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs) |
85 | const char *pattern, *string, *stringstart; | |
3d9156a7 A |
86 | int flags; |
87 | mbstate_t patmbs, strmbs; | |
e9ce8d39 | 88 | { |
5b2abdfb | 89 | char *newp; |
3d9156a7 A |
90 | char c; |
91 | wchar_t pc, sc; | |
92 | size_t pclen, sclen; | |
e9ce8d39 | 93 | |
1f2f436a | 94 | for (;;) { |
3d9156a7 A |
95 | pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); |
96 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
97 | return (FNM_NOMATCH); | |
98 | pattern += pclen; | |
99 | sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); | |
100 | if (sclen == (size_t)-1 || sclen == (size_t)-2) { | |
101 | sc = (unsigned char)*string; | |
102 | sclen = 1; | |
103 | memset(&strmbs, 0, sizeof(strmbs)); | |
104 | } | |
105 | switch (pc) { | |
e9ce8d39 | 106 | case EOS: |
3d9156a7 | 107 | if ((flags & FNM_LEADING_DIR) && sc == '/') |
5b2abdfb | 108 | return (0); |
3d9156a7 | 109 | return (sc == EOS ? 0 : FNM_NOMATCH); |
e9ce8d39 | 110 | case '?': |
3d9156a7 | 111 | if (sc == EOS) |
e9ce8d39 | 112 | return (FNM_NOMATCH); |
3d9156a7 | 113 | if (sc == '/' && (flags & FNM_PATHNAME)) |
e9ce8d39 | 114 | return (FNM_NOMATCH); |
3d9156a7 | 115 | if (sc == '.' && (flags & FNM_PERIOD) && |
e9ce8d39 A |
116 | (string == stringstart || |
117 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
118 | return (FNM_NOMATCH); | |
3d9156a7 | 119 | string += sclen; |
e9ce8d39 A |
120 | break; |
121 | case '*': | |
122 | c = *pattern; | |
123 | /* Collapse multiple stars. */ | |
124 | while (c == '*') | |
125 | c = *++pattern; | |
126 | ||
3d9156a7 | 127 | if (sc == '.' && (flags & FNM_PERIOD) && |
e9ce8d39 A |
128 | (string == stringstart || |
129 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
130 | return (FNM_NOMATCH); | |
131 | ||
132 | /* Optimize for pattern with * at end or before /. */ | |
133 | if (c == EOS) | |
134 | if (flags & FNM_PATHNAME) | |
5b2abdfb A |
135 | return ((flags & FNM_LEADING_DIR) || |
136 | strchr(string, '/') == NULL ? | |
e9ce8d39 A |
137 | 0 : FNM_NOMATCH); |
138 | else | |
139 | return (0); | |
140 | else if (c == '/' && flags & FNM_PATHNAME) { | |
141 | if ((string = strchr(string, '/')) == NULL) | |
142 | return (FNM_NOMATCH); | |
143 | break; | |
144 | } | |
145 | ||
146 | /* General case, use recursion. */ | |
3d9156a7 | 147 | while (sc != EOS) { |
1f2f436a A |
148 | if (!fnmatch1(pattern, string, stringstart, |
149 | flags, patmbs, strmbs)) | |
e9ce8d39 | 150 | return (0); |
3d9156a7 A |
151 | sclen = mbrtowc(&sc, string, MB_LEN_MAX, |
152 | &strmbs); | |
153 | if (sclen == (size_t)-1 || | |
154 | sclen == (size_t)-2) { | |
155 | sc = (unsigned char)*string; | |
156 | sclen = 1; | |
157 | memset(&strmbs, 0, sizeof(strmbs)); | |
158 | } | |
159 | if (sc == '/' && flags & FNM_PATHNAME) | |
e9ce8d39 | 160 | break; |
3d9156a7 | 161 | string += sclen; |
e9ce8d39 A |
162 | } |
163 | return (FNM_NOMATCH); | |
164 | case '[': | |
3d9156a7 | 165 | if (sc == EOS) |
e9ce8d39 | 166 | return (FNM_NOMATCH); |
3d9156a7 | 167 | if (sc == '/' && (flags & FNM_PATHNAME)) |
5b2abdfb | 168 | return (FNM_NOMATCH); |
3d9156a7 | 169 | if (sc == '.' && (flags & FNM_PERIOD) && |
5b2abdfb A |
170 | (string == stringstart || |
171 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
e9ce8d39 | 172 | return (FNM_NOMATCH); |
5b2abdfb | 173 | |
3d9156a7 A |
174 | switch (rangematch(pattern, sc, flags, &newp, |
175 | &patmbs)) { | |
5b2abdfb A |
176 | case RANGE_ERROR: |
177 | goto norm; | |
178 | case RANGE_MATCH: | |
179 | pattern = newp; | |
180 | break; | |
181 | case RANGE_NOMATCH: | |
e9ce8d39 | 182 | return (FNM_NOMATCH); |
5b2abdfb | 183 | } |
3d9156a7 | 184 | string += sclen; |
e9ce8d39 A |
185 | break; |
186 | case '\\': | |
187 | if (!(flags & FNM_NOESCAPE)) { | |
3d9156a7 A |
188 | pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, |
189 | &patmbs); | |
190 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
191 | return (FNM_NOMATCH); | |
192 | if (pclen == 0) | |
193 | pc = '\\'; | |
194 | pattern += pclen; | |
e9ce8d39 A |
195 | } |
196 | /* FALLTHROUGH */ | |
197 | default: | |
5b2abdfb | 198 | norm: |
3d9156a7 | 199 | if (pc == sc) |
5b2abdfb A |
200 | ; |
201 | else if ((flags & FNM_CASEFOLD) && | |
3d9156a7 | 202 | (towlower(pc) == towlower(sc))) |
5b2abdfb A |
203 | ; |
204 | else | |
e9ce8d39 | 205 | return (FNM_NOMATCH); |
3d9156a7 | 206 | string += sclen; |
e9ce8d39 A |
207 | break; |
208 | } | |
3d9156a7 | 209 | } |
e9ce8d39 A |
210 | /* NOTREACHED */ |
211 | } | |
212 | ||
5b2abdfb | 213 | static int |
3d9156a7 | 214 | rangematch(pattern, test, flags, newp, patmbs) |
e9ce8d39 | 215 | const char *pattern; |
3d9156a7 | 216 | wchar_t test; |
5b2abdfb A |
217 | int flags; |
218 | char **newp; | |
3d9156a7 | 219 | mbstate_t *patmbs; |
e9ce8d39 A |
220 | { |
221 | int negate, ok; | |
3d9156a7 A |
222 | wchar_t c, c2; |
223 | size_t pclen; | |
224 | const char *origpat; | |
e9ce8d39 A |
225 | |
226 | /* | |
227 | * A bracket expression starting with an unquoted circumflex | |
228 | * character produces unspecified results (IEEE 1003.2-1992, | |
229 | * 3.13.2). This implementation treats it like '!', for | |
230 | * consistency with the regular expression syntax. | |
231 | * J.T. Conklin (conklin@ngai.kaleida.com) | |
232 | */ | |
5b2abdfb | 233 | if ( (negate = (*pattern == '!' || *pattern == '^')) ) |
e9ce8d39 | 234 | ++pattern; |
5b2abdfb A |
235 | |
236 | if (flags & FNM_CASEFOLD) | |
3d9156a7 | 237 | test = towlower(test); |
5b2abdfb A |
238 | |
239 | /* | |
240 | * A right bracket shall lose its special meaning and represent | |
241 | * itself in a bracket expression if it occurs first in the list. | |
242 | * -- POSIX.2 2.8.3.2 | |
243 | */ | |
244 | ok = 0; | |
3d9156a7 A |
245 | origpat = pattern; |
246 | for (;;) { | |
247 | if (*pattern == ']' && pattern > origpat) { | |
248 | pattern++; | |
249 | break; | |
250 | } else if (*pattern == '\0') { | |
5b2abdfb | 251 | return (RANGE_ERROR); |
3d9156a7 | 252 | } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { |
3d9156a7 A |
253 | return (RANGE_NOMATCH); |
254 | } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) | |
255 | pattern++; | |
256 | pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); | |
257 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
5b2abdfb | 258 | return (RANGE_NOMATCH); |
3d9156a7 | 259 | pattern += pclen; |
5b2abdfb A |
260 | |
261 | if (flags & FNM_CASEFOLD) | |
3d9156a7 | 262 | c = towlower(c); |
5b2abdfb | 263 | |
3d9156a7 A |
264 | if (*pattern == '-' && *(pattern + 1) != EOS && |
265 | *(pattern + 1) != ']') { | |
266 | if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) | |
267 | if (*pattern != EOS) | |
268 | pattern++; | |
269 | pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); | |
270 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
271 | return (RANGE_NOMATCH); | |
272 | pattern += pclen; | |
e9ce8d39 | 273 | if (c2 == EOS) |
5b2abdfb A |
274 | return (RANGE_ERROR); |
275 | ||
276 | if (flags & FNM_CASEFOLD) | |
3d9156a7 | 277 | c2 = towlower(c2); |
5b2abdfb A |
278 | |
279 | if (__collate_load_error ? | |
280 | c <= test && test <= c2 : | |
281 | __collate_range_cmp(c, test) <= 0 | |
282 | && __collate_range_cmp(test, c2) <= 0 | |
283 | ) | |
e9ce8d39 A |
284 | ok = 1; |
285 | } else if (c == test) | |
286 | ok = 1; | |
3d9156a7 | 287 | } |
5b2abdfb A |
288 | |
289 | *newp = (char *)pattern; | |
290 | return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); | |
e9ce8d39 | 291 | } |