Commit | Line | Data |
---|---|---|
f6bcfd97 BP |
1 | /*--------------------------------------------------------------------------- |
2 | ||
3 | match.c | |
4 | ||
5 | The match() routine recursively compares a string to a "pattern" (regular | |
6 | expression), returning TRUE if a match is found or FALSE if not. This | |
7 | version is specifically for use with unzip.c: as did the previous match() | |
8 | routines from SEA and J. Kercheval, it leaves the case (upper, lower, or | |
9 | mixed) of the string alone, but converts any uppercase characters in the | |
10 | pattern to lowercase if indicated by the global var pInfo->lcflag (which | |
11 | is to say, string is assumed to have been converted to lowercase already, | |
12 | if such was necessary). | |
13 | ||
14 | GRR: reversed order of text, pattern in matche() (now same as match()); | |
15 | added ignore_case/ic flags, Case() macro. | |
16 | ||
17 | PaulK: replaced matche() with recmatch() from Zip, modified to have an | |
18 | ignore_case argument; replaced test frame with simpler one. | |
19 | ||
20 | --------------------------------------------------------------------------- | |
21 | ||
22 | Copyright on recmatch() from Zip's util.c (although recmatch() was almost | |
23 | certainly written by Mark Adler...ask me how I can tell :-) ): | |
24 | ||
25 | Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly, | |
26 | Kai Uwe Rommel and Igor Mandrichenko. | |
27 | ||
28 | Permission is granted to any individual or institution to use, copy, | |
29 | or redistribute this software so long as all of the original files are | |
30 | included unmodified, that it is not sold for profit, and that this copy- | |
31 | right notice is retained. | |
32 | ||
33 | --------------------------------------------------------------------------- | |
34 | ||
35 | Match the pattern (wildcard) against the string (fixed): | |
36 | ||
37 | match(string, pattern, ignore_case); | |
38 | ||
39 | returns TRUE if string matches pattern, FALSE otherwise. In the pattern: | |
40 | ||
41 | `*' matches any sequence of characters (zero or more) | |
42 | `?' matches any single character | |
43 | [SET] matches any character in the specified set, | |
44 | [!SET] or [^SET] matches any character not in the specified set. | |
45 | ||
46 | A set is composed of characters or ranges; a range looks like ``character | |
47 | hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of | |
48 | characters allowed in the [..] pattern construct. Other characters are | |
49 | allowed (i.e., 8-bit characters) if your system will support them. | |
50 | ||
51 | To suppress the special syntactic significance of any of ``[]*?!^-\'', in- | |
52 | side or outside a [..] construct, and match the character exactly, precede | |
53 | it with a ``\'' (backslash). | |
54 | ||
55 | Note that "*.*" and "*." are treated specially under MS-DOS if DOSWILD is | |
56 | defined. See the DOSWILD section below for an explanation. Note also | |
57 | that with VMSWILD defined, '%' is used instead of '?', and sets (ranges) | |
58 | are delimited by () instead of []. | |
59 | ||
60 | ---------------------------------------------------------------------------*/ | |
61 | ||
62 | ||
63 | ||
64 | /* define ToLower() in here (for Unix, define ToLower to be macro (using | |
65 | * isupper()); otherwise just use tolower() */ | |
66 | #define UNZIP_INTERNAL | |
67 | #include "unzip.h" | |
68 | ||
69 | #if 0 /* this is not useful until it matches Amiga names insensitively */ | |
70 | #ifdef AMIGA /* some other platforms might also want to use this */ | |
71 | # define ANSI_CHARSET /* MOVE INTO UNZIP.H EVENTUALLY */ | |
72 | #endif | |
73 | #endif /* 0 */ | |
74 | ||
75 | #ifdef ANSI_CHARSET | |
76 | # ifdef ToLower | |
77 | # undef ToLower | |
78 | # endif | |
79 | /* uppercase letters are values 41 thru 5A, C0 thru D6, and D8 thru DE */ | |
80 | # define IsUpper(c) (c>=0xC0 ? c<=0xDE && c!=0xD7 : c>=0x41 && c<=0x5A) | |
81 | # define ToLower(c) (IsUpper((uch) c) ? (unsigned) c | 0x20 : (unsigned) c) | |
82 | #endif | |
83 | #define Case(x) (ic? ToLower(x) : (x)) | |
84 | ||
85 | #ifdef VMSWILD | |
86 | # define WILDCHAR '%' | |
87 | # define BEG_RANGE '(' | |
88 | # define END_RANGE ')' | |
89 | #else | |
90 | # define WILDCHAR '?' | |
91 | # define BEG_RANGE '[' | |
92 | # define END_RANGE ']' | |
93 | #endif | |
94 | ||
95 | #if 0 /* GRR: add this to unzip.h someday... */ | |
96 | #if !(defined(MSDOS) && defined(DOSWILD)) | |
97 | #define match(s,p,ic) (recmatch((ZCONST uch *)p,(ZCONST uch *)s,ic) == 1) | |
98 | int recmatch OF((ZCONST uch *pattern, ZCONST uch *string, int ignore_case)); | |
99 | #endif | |
100 | #endif /* 0 */ | |
101 | static int recmatch OF((ZCONST uch *pattern, ZCONST uch *string, | |
102 | int ignore_case)); | |
103 | ||
104 | ||
105 | ||
106 | /* match() is a shell to recmatch() to return only Boolean values. */ | |
107 | ||
108 | int match(string, pattern, ignore_case) | |
109 | ZCONST char *string, *pattern; | |
110 | int ignore_case; | |
111 | { | |
112 | #if (defined(MSDOS) && defined(DOSWILD)) | |
113 | char *dospattern; | |
114 | int j = strlen(pattern); | |
115 | ||
116 | /*--------------------------------------------------------------------------- | |
117 | Optional MS-DOS preprocessing section: compare last three chars of the | |
118 | wildcard to "*.*" and translate to "*" if found; else compare the last | |
119 | two characters to "*." and, if found, scan the non-wild string for dots. | |
120 | If in the latter case a dot is found, return failure; else translate the | |
121 | "*." to "*". In either case, continue with the normal (Unix-like) match | |
122 | procedure after translation. (If not enough memory, default to normal | |
123 | match.) This causes "a*.*" and "a*." to behave as MS-DOS users expect. | |
124 | ---------------------------------------------------------------------------*/ | |
125 | ||
126 | if ((dospattern = (char *)malloc(j+1)) != NULL) { | |
127 | strcpy(dospattern, pattern); | |
128 | if (!strcmp(dospattern+j-3, "*.*")) { | |
129 | dospattern[j-2] = '\0'; /* nuke the ".*" */ | |
130 | } else if (!strcmp(dospattern+j-2, "*.")) { | |
131 | char *p = strchr(string, '.'); | |
132 | ||
133 | if (p) { /* found a dot: match fails */ | |
134 | free(dospattern); | |
135 | return 0; | |
136 | } | |
137 | dospattern[j-1] = '\0'; /* nuke the end "." */ | |
138 | } | |
139 | j = recmatch((uch *)dospattern, (uch *)string, ignore_case); | |
140 | free(dospattern); | |
141 | return j == 1; | |
142 | } else | |
143 | #endif /* MSDOS && DOSWILD */ | |
144 | return recmatch((uch *)pattern, (uch *)string, ignore_case) == 1; | |
145 | } | |
146 | ||
147 | ||
148 | ||
149 | static int recmatch(p, s, ic) | |
150 | ZCONST uch *p; /* sh pattern to match */ | |
151 | ZCONST uch *s; /* string to which to match it */ | |
152 | int ic; /* true for case insensitivity */ | |
153 | /* Recursively compare the sh pattern p with the string s and return 1 if | |
154 | * they match, and 0 or 2 if they don't or if there is a syntax error in the | |
155 | * pattern. This routine recurses on itself no more deeply than the number | |
156 | * of characters in the pattern. */ | |
157 | { | |
158 | unsigned int c; /* pattern char or start of range in [-] loop */ | |
159 | ||
160 | /* Get first character, the pattern for new recmatch calls follows */ | |
161 | c = *p++; | |
162 | ||
163 | /* If that was the end of the pattern, match if string empty too */ | |
164 | if (c == 0) | |
165 | return *s == 0; | |
166 | ||
167 | /* '?' (or '%') matches any character (but not an empty string) */ | |
168 | if (c == WILDCHAR) | |
169 | return *s ? recmatch(p, s + 1, ic) : 0; | |
170 | ||
171 | /* '*' matches any number of characters, including zero */ | |
172 | #ifdef AMIGA | |
173 | if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */ | |
174 | c = '*', p++; | |
175 | #endif /* AMIGA */ | |
176 | if (c == '*') { | |
177 | if (*p == 0) | |
178 | return 1; | |
179 | for (; *s; s++) | |
180 | if ((c = recmatch(p, s, ic)) != 0) | |
181 | return (int)c; | |
182 | return 2; /* 2 means give up--match will return false */ | |
183 | } | |
184 | ||
185 | /* Parse and process the list of characters and ranges in brackets */ | |
186 | if (c == BEG_RANGE) { | |
187 | int e; /* flag true if next char to be taken literally */ | |
188 | ZCONST uch *q; /* pointer to end of [-] group */ | |
189 | int r; /* flag true to match anything but the range */ | |
190 | ||
191 | if (*s == 0) /* need a character to match */ | |
192 | return 0; | |
193 | p += (r = (*p == '!' || *p == '^')); /* see if reverse */ | |
194 | for (q = p, e = 0; *q; q++) /* find closing bracket */ | |
195 | if (e) | |
196 | e = 0; | |
197 | else | |
198 | if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */ | |
199 | e = 1; | |
200 | else if (*q == END_RANGE) | |
201 | break; | |
202 | if (*q != END_RANGE) /* nothing matches if bad syntax */ | |
203 | return 0; | |
204 | for (c = 0, e = *p == '-'; p < q; p++) { /* go through the list */ | |
205 | if (e == 0 && *p == '\\') /* set escape flag if \ */ | |
206 | e = 1; | |
207 | else if (e == 0 && *p == '-') /* set start of range if - */ | |
208 | c = *(p-1); | |
209 | else { | |
210 | unsigned int cc = Case(*s); | |
211 | ||
212 | if (*(p+1) != '-') | |
213 | for (c = c ? c : *p; c <= *p; c++) /* compare range */ | |
214 | if ((unsigned)Case(c) == cc) /* typecast for MSC bug */ | |
215 | return r ? 0 : recmatch(q + 1, s + 1, ic); | |
216 | c = e = 0; /* clear range, escape flags */ | |
217 | } | |
218 | } | |
219 | return r ? recmatch(q + 1, s + 1, ic) : 0; /* bracket match failed */ | |
220 | } | |
221 | ||
222 | /* if escape ('\'), just compare next character */ | |
223 | if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */ | |
224 | return 0; | |
225 | ||
226 | /* just a character--compare it */ | |
227 | #ifdef QDOS | |
228 | return QMatch(Case((uch)c), Case(*s)) ? recmatch(p, ++s, ic) : 0; | |
229 | #else | |
230 | return Case((uch)c) == Case(*s) ? recmatch(p, ++s, ic) : 0; | |
231 | #endif | |
232 | ||
233 | } /* end function recmatch() */ | |
234 | ||
235 | ||
236 | ||
237 | ||
238 | ||
239 | int iswild(p) /* originally only used for stat()-bug workaround in */ | |
240 | ZCONST char *p; /* VAX C, Turbo/Borland C, Watcom C, Atari MiNT libs; */ | |
241 | { /* now used in process_zipfiles() as well */ | |
242 | for (; *p; ++p) | |
243 | if (*p == '\\' && *(p+1)) | |
244 | ++p; | |
245 | #ifdef VMS | |
246 | else if (*p == '%' || *p == '*') | |
247 | #else /* !VMS */ | |
248 | #ifdef AMIGA | |
249 | else if (*p == '?' || *p == '*' || (*p=='#' && p[1]=='?') || *p == '[') | |
250 | #else /* !AMIGA */ | |
251 | else if (*p == '?' || *p == '*' || *p == '[') | |
252 | #endif /* ?AMIGA */ | |
253 | #endif /* ?VMS */ | |
254 | #ifdef QDOS | |
255 | return (int)p; | |
256 | #else | |
257 | return TRUE; | |
258 | #endif | |
259 | ||
260 | return FALSE; | |
261 | ||
262 | } /* end function iswild() */ | |
263 | ||
264 | ||
265 | ||
266 | ||
267 | ||
268 | #ifdef TEST_MATCH | |
269 | ||
270 | #define put(s) {fputs(s,stdout); fflush(stdout);} | |
271 | ||
272 | void main() | |
273 | { | |
274 | char pat[256], str[256]; | |
275 | ||
276 | for (;;) { | |
277 | put("Pattern (return to exit): "); | |
278 | gets(pat); | |
279 | if (!pat[0]) | |
280 | break; | |
281 | for (;;) { | |
282 | put("String (return for new pattern): "); | |
283 | gets(str); | |
284 | if (!str[0]) | |
285 | break; | |
286 | pipeit("Case sensitive: %s insensitive: %s\n", | |
287 | match(str, pat, 0) ? "YES" : "NO", | |
288 | match(str, pat, 1) ? "YES" : "NO"); | |
289 | } | |
290 | } | |
291 | EXIT(0); | |
292 | } | |
293 | ||
294 | #endif /* TEST_MATCH */ |