]>
Commit | Line | Data |
---|---|---|
e9ce8d39 A |
1 | /* |
2 | * Copyright (c) 1989, 1993, 1994 | |
3 | * The Regents of the University of California. All rights reserved. | |
4 | * | |
5 | * This code is derived from software contributed to Berkeley by | |
6 | * Guido van Rossum. | |
7 | * | |
8 | * Redistribution and use in source and binary forms, with or without | |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. All advertising materials mentioning features or use of this software | |
17 | * must display the following acknowledgement: | |
18 | * This product includes software developed by the University of | |
19 | * California, Berkeley and its contributors. | |
20 | * 4. Neither the name of the University nor the names of its contributors | |
21 | * may be used to endorse or promote products derived from this software | |
22 | * without specific prior written permission. | |
23 | * | |
24 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
25 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
26 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
28 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
29 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
30 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
31 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
32 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
33 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
34 | * SUCH DAMAGE. | |
35 | */ | |
36 | ||
5b2abdfb A |
37 | #if defined(LIBC_SCCS) && !defined(lint) |
38 | static char sccsid[] = "@(#)fnmatch.c 8.2 (Berkeley) 4/16/94"; | |
39 | #endif /* LIBC_SCCS and not lint */ | |
9385eb3d | 40 | #include <sys/cdefs.h> |
3d9156a7 | 41 | __FBSDID("$FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.16 2004/07/29 03:13:10 tjr Exp $"); |
e9ce8d39 A |
42 | |
43 | /* | |
44 | * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. | |
45 | * Compares a filename or pathname to a pattern. | |
46 | */ | |
47 | ||
3d9156a7 A |
48 | /* |
49 | * Some notes on multibyte character support: | |
50 | * 1. Patterns with illegal byte sequences match nothing. | |
51 | * 2. Illegal byte sequences in the "string" argument are handled by treating | |
52 | * them as single-byte characters with a value of the first byte of the | |
53 | * sequence cast to wchar_t. | |
54 | * 3. Multibyte conversion state objects (mbstate_t) are passed around and | |
55 | * used for most, but not all, conversions. Further work will be required | |
56 | * to support state-dependent encodings. | |
57 | */ | |
58 | ||
e9ce8d39 | 59 | #include <fnmatch.h> |
3d9156a7 | 60 | #include <limits.h> |
e9ce8d39 | 61 | #include <string.h> |
3d9156a7 A |
62 | #include <wchar.h> |
63 | #include <wctype.h> | |
5b2abdfb A |
64 | |
65 | #include "collate.h" | |
e9ce8d39 A |
66 | |
67 | #define EOS '\0' | |
68 | ||
5b2abdfb A |
69 | #define RANGE_MATCH 1 |
70 | #define RANGE_NOMATCH 0 | |
71 | #define RANGE_ERROR (-1) | |
72 | ||
3d9156a7 A |
73 | static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); |
74 | static int fnmatch1(const char *, const char *, int, mbstate_t, mbstate_t); | |
e9ce8d39 A |
75 | |
76 | int | |
77 | fnmatch(pattern, string, flags) | |
78 | const char *pattern, *string; | |
79 | int flags; | |
3d9156a7 A |
80 | { |
81 | static const mbstate_t initial; | |
82 | ||
83 | return (fnmatch1(pattern, string, flags, initial, initial)); | |
84 | } | |
85 | ||
86 | static int | |
87 | fnmatch1(pattern, string, flags, patmbs, strmbs) | |
88 | const char *pattern, *string; | |
89 | int flags; | |
90 | mbstate_t patmbs, strmbs; | |
e9ce8d39 A |
91 | { |
92 | const char *stringstart; | |
5b2abdfb | 93 | char *newp; |
3d9156a7 A |
94 | char c; |
95 | wchar_t pc, sc; | |
96 | size_t pclen, sclen; | |
e9ce8d39 | 97 | |
3d9156a7 A |
98 | for (stringstart = string;;) { |
99 | pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); | |
100 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
101 | return (FNM_NOMATCH); | |
102 | pattern += pclen; | |
103 | sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); | |
104 | if (sclen == (size_t)-1 || sclen == (size_t)-2) { | |
105 | sc = (unsigned char)*string; | |
106 | sclen = 1; | |
107 | memset(&strmbs, 0, sizeof(strmbs)); | |
108 | } | |
109 | switch (pc) { | |
e9ce8d39 | 110 | case EOS: |
3d9156a7 | 111 | if ((flags & FNM_LEADING_DIR) && sc == '/') |
5b2abdfb | 112 | return (0); |
3d9156a7 | 113 | return (sc == EOS ? 0 : FNM_NOMATCH); |
e9ce8d39 | 114 | case '?': |
3d9156a7 | 115 | if (sc == EOS) |
e9ce8d39 | 116 | return (FNM_NOMATCH); |
3d9156a7 | 117 | if (sc == '/' && (flags & FNM_PATHNAME)) |
e9ce8d39 | 118 | return (FNM_NOMATCH); |
3d9156a7 | 119 | if (sc == '.' && (flags & FNM_PERIOD) && |
e9ce8d39 A |
120 | (string == stringstart || |
121 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
122 | return (FNM_NOMATCH); | |
3d9156a7 | 123 | string += sclen; |
e9ce8d39 A |
124 | break; |
125 | case '*': | |
126 | c = *pattern; | |
127 | /* Collapse multiple stars. */ | |
128 | while (c == '*') | |
129 | c = *++pattern; | |
130 | ||
3d9156a7 | 131 | if (sc == '.' && (flags & FNM_PERIOD) && |
e9ce8d39 A |
132 | (string == stringstart || |
133 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
134 | return (FNM_NOMATCH); | |
135 | ||
136 | /* Optimize for pattern with * at end or before /. */ | |
137 | if (c == EOS) | |
138 | if (flags & FNM_PATHNAME) | |
5b2abdfb A |
139 | return ((flags & FNM_LEADING_DIR) || |
140 | strchr(string, '/') == NULL ? | |
e9ce8d39 A |
141 | 0 : FNM_NOMATCH); |
142 | else | |
143 | return (0); | |
144 | else if (c == '/' && flags & FNM_PATHNAME) { | |
145 | if ((string = strchr(string, '/')) == NULL) | |
146 | return (FNM_NOMATCH); | |
147 | break; | |
148 | } | |
149 | ||
150 | /* General case, use recursion. */ | |
3d9156a7 A |
151 | while (sc != EOS) { |
152 | if (!fnmatch1(pattern, string, | |
153 | flags & ~FNM_PERIOD, patmbs, strmbs)) | |
e9ce8d39 | 154 | return (0); |
3d9156a7 A |
155 | sclen = mbrtowc(&sc, string, MB_LEN_MAX, |
156 | &strmbs); | |
157 | if (sclen == (size_t)-1 || | |
158 | sclen == (size_t)-2) { | |
159 | sc = (unsigned char)*string; | |
160 | sclen = 1; | |
161 | memset(&strmbs, 0, sizeof(strmbs)); | |
162 | } | |
163 | if (sc == '/' && flags & FNM_PATHNAME) | |
e9ce8d39 | 164 | break; |
3d9156a7 | 165 | string += sclen; |
e9ce8d39 A |
166 | } |
167 | return (FNM_NOMATCH); | |
168 | case '[': | |
3d9156a7 | 169 | if (sc == EOS) |
e9ce8d39 | 170 | return (FNM_NOMATCH); |
3d9156a7 | 171 | if (sc == '/' && (flags & FNM_PATHNAME)) |
5b2abdfb | 172 | return (FNM_NOMATCH); |
3d9156a7 | 173 | if (sc == '.' && (flags & FNM_PERIOD) && |
5b2abdfb A |
174 | (string == stringstart || |
175 | ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) | |
e9ce8d39 | 176 | return (FNM_NOMATCH); |
5b2abdfb | 177 | |
3d9156a7 A |
178 | switch (rangematch(pattern, sc, flags, &newp, |
179 | &patmbs)) { | |
5b2abdfb A |
180 | case RANGE_ERROR: |
181 | goto norm; | |
182 | case RANGE_MATCH: | |
183 | pattern = newp; | |
184 | break; | |
185 | case RANGE_NOMATCH: | |
e9ce8d39 | 186 | return (FNM_NOMATCH); |
5b2abdfb | 187 | } |
3d9156a7 | 188 | string += sclen; |
e9ce8d39 A |
189 | break; |
190 | case '\\': | |
191 | if (!(flags & FNM_NOESCAPE)) { | |
3d9156a7 A |
192 | pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, |
193 | &patmbs); | |
194 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
195 | return (FNM_NOMATCH); | |
196 | if (pclen == 0) | |
197 | pc = '\\'; | |
198 | pattern += pclen; | |
e9ce8d39 A |
199 | } |
200 | /* FALLTHROUGH */ | |
201 | default: | |
5b2abdfb | 202 | norm: |
3d9156a7 | 203 | if (pc == sc) |
5b2abdfb A |
204 | ; |
205 | else if ((flags & FNM_CASEFOLD) && | |
3d9156a7 | 206 | (towlower(pc) == towlower(sc))) |
5b2abdfb A |
207 | ; |
208 | else | |
e9ce8d39 | 209 | return (FNM_NOMATCH); |
3d9156a7 | 210 | string += sclen; |
e9ce8d39 A |
211 | break; |
212 | } | |
3d9156a7 | 213 | } |
e9ce8d39 A |
214 | /* NOTREACHED */ |
215 | } | |
216 | ||
5b2abdfb | 217 | static int |
3d9156a7 | 218 | rangematch(pattern, test, flags, newp, patmbs) |
e9ce8d39 | 219 | const char *pattern; |
3d9156a7 | 220 | wchar_t test; |
5b2abdfb A |
221 | int flags; |
222 | char **newp; | |
3d9156a7 | 223 | mbstate_t *patmbs; |
e9ce8d39 A |
224 | { |
225 | int negate, ok; | |
3d9156a7 A |
226 | wchar_t c, c2; |
227 | size_t pclen; | |
228 | const char *origpat; | |
e9ce8d39 A |
229 | |
230 | /* | |
231 | * A bracket expression starting with an unquoted circumflex | |
232 | * character produces unspecified results (IEEE 1003.2-1992, | |
233 | * 3.13.2). This implementation treats it like '!', for | |
234 | * consistency with the regular expression syntax. | |
235 | * J.T. Conklin (conklin@ngai.kaleida.com) | |
236 | */ | |
5b2abdfb | 237 | if ( (negate = (*pattern == '!' || *pattern == '^')) ) |
e9ce8d39 | 238 | ++pattern; |
5b2abdfb A |
239 | |
240 | if (flags & FNM_CASEFOLD) | |
3d9156a7 | 241 | test = towlower(test); |
5b2abdfb A |
242 | |
243 | /* | |
244 | * A right bracket shall lose its special meaning and represent | |
245 | * itself in a bracket expression if it occurs first in the list. | |
246 | * -- POSIX.2 2.8.3.2 | |
247 | */ | |
248 | ok = 0; | |
3d9156a7 A |
249 | origpat = pattern; |
250 | for (;;) { | |
251 | if (*pattern == ']' && pattern > origpat) { | |
252 | pattern++; | |
253 | break; | |
254 | } else if (*pattern == '\0') { | |
5b2abdfb | 255 | return (RANGE_ERROR); |
3d9156a7 A |
256 | } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { |
257 | pattern++; | |
258 | return (RANGE_NOMATCH); | |
259 | } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) | |
260 | pattern++; | |
261 | pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); | |
262 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
5b2abdfb | 263 | return (RANGE_NOMATCH); |
3d9156a7 | 264 | pattern += pclen; |
5b2abdfb A |
265 | |
266 | if (flags & FNM_CASEFOLD) | |
3d9156a7 | 267 | c = towlower(c); |
5b2abdfb | 268 | |
3d9156a7 A |
269 | if (*pattern == '-' && *(pattern + 1) != EOS && |
270 | *(pattern + 1) != ']') { | |
271 | if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) | |
272 | if (*pattern != EOS) | |
273 | pattern++; | |
274 | pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); | |
275 | if (pclen == (size_t)-1 || pclen == (size_t)-2) | |
276 | return (RANGE_NOMATCH); | |
277 | pattern += pclen; | |
e9ce8d39 | 278 | if (c2 == EOS) |
5b2abdfb A |
279 | return (RANGE_ERROR); |
280 | ||
281 | if (flags & FNM_CASEFOLD) | |
3d9156a7 | 282 | c2 = towlower(c2); |
5b2abdfb A |
283 | |
284 | if (__collate_load_error ? | |
285 | c <= test && test <= c2 : | |
286 | __collate_range_cmp(c, test) <= 0 | |
287 | && __collate_range_cmp(test, c2) <= 0 | |
288 | ) | |
e9ce8d39 A |
289 | ok = 1; |
290 | } else if (c == test) | |
291 | ok = 1; | |
3d9156a7 | 292 | } |
5b2abdfb A |
293 | |
294 | *newp = (char *)pattern; | |
295 | return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); | |
e9ce8d39 | 296 | } |