]> git.saurik.com Git - apple/libc.git/blob - regex/FreeBSD/regcomp.c.patch
65aec8bca4b37e405426777e2a561b3bc3e863a7
[apple/libc.git] / regex / FreeBSD / regcomp.c.patch
1 --- regcomp.c.orig 2004-11-25 11:38:32.000000000 -0800
2 +++ regcomp.c 2005-02-24 13:46:56.000000000 -0800
3 @@ -43,6 +43,8 @@
4 #include <sys/cdefs.h>
5 __FBSDID("$FreeBSD: src/lib/libc/regex/regcomp.c,v 1.34 2004/10/03 15:42:59 stefanf Exp $");
6
7 +#include "xlocale_private.h"
8 +
9 #include <sys/types.h>
10 #include <stdio.h>
11 #include <string.h>
12 @@ -73,6 +75,9 @@
13 sopno ssize; /* malloced strip size (allocated) */
14 sopno slen; /* malloced strip length (used) */
15 int ncsalloc; /* number of csets allocated */
16 +#if __DARWIN_UNIX03
17 + int zerorepeats;
18 +#endif /* __DARWIN_UNIX03 */
19 struct re_guts *g;
20 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
21 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
22 @@ -97,7 +102,7 @@
23 static void p_b_eclass(struct parse *p, cset *cs);
24 static wint_t p_b_symbol(struct parse *p);
25 static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
26 -static wint_t othercase(wint_t ch);
27 +static wint_t othercase(wint_t ch, locale_t loc);
28 static void bothcases(struct parse *p, wint_t ch);
29 static void ordinary(struct parse *p, wint_t ch);
30 static void nonnewline(struct parse *p);
31 @@ -108,7 +113,7 @@
32 static void CHadd(struct parse *p, cset *cs, wint_t ch);
33 static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
34 static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
35 -static wint_t singleton(cset *cs);
36 +static wint_t singleton(cset *cs, locale_t loc);
37 static sopno dupl(struct parse *p, sopno start, sopno finish);
38 static void doemit(struct parse *p, sop op, size_t opnd);
39 static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
40 @@ -227,10 +232,14 @@
41 p->end = p->next + len;
42 p->error = 0;
43 p->ncsalloc = 0;
44 +#if __DARWIN_UNIX03
45 + p->zerorepeats = 0;
46 +#endif /* __DARWIN_UNIX03 */
47 for (i = 0; i < NPAREN; i++) {
48 p->pbegin[i] = 0;
49 p->pend[i] = 0;
50 }
51 + g->loc = __current_locale();
52 g->sets = NULL;
53 g->ncsets = 0;
54 g->cflags = cflags;
55 @@ -308,8 +317,12 @@
56 conc = HERE();
57 while (MORE() && (c = PEEK()) != '|' && c != stop)
58 p_ere_exp(p);
59 +#if __DARWIN_UNIX03
60 + if (!p->zerorepeats) REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
61 + else p->zerorepeats--;
62 +#else
63 (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
64 -
65 +#endif
66 if (!EAT('|'))
67 break; /* NOTE BREAK OUT */
68
69 @@ -417,7 +430,7 @@
70 ordinary(p, wc);
71 break;
72 case '{': /* okay as ordinary except if digit follows */
73 - (void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
74 + (void)REQUIRE(!MORE() || !isdigit_l((uch)PEEK(), p->g->loc), REG_BADRPT);
75 /* FALLTHROUGH */
76 default:
77 p->next--;
78 @@ -431,7 +444,7 @@
79 c = PEEK();
80 /* we call { a repetition if followed by a digit */
81 if (!( c == '*' || c == '+' || c == '?' ||
82 - (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
83 + (c == '{' && MORE2() && isdigit_l((uch)PEEK2(), p->g->loc)) ))
84 return; /* no repetition, we're done */
85 NEXT();
86
87 @@ -460,7 +473,7 @@
88 case '{':
89 count = p_count(p);
90 if (EAT(',')) {
91 - if (isdigit((uch)PEEK())) {
92 + if (isdigit_l((uch)PEEK(), p->g->loc)) {
93 count2 = p_count(p);
94 (void)REQUIRE(count <= count2, REG_BADBR);
95 } else /* single number with comma */
96 @@ -481,7 +494,7 @@
97 return;
98 c = PEEK();
99 if (!( c == '*' || c == '+' || c == '?' ||
100 - (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
101 + (c == '{' && MORE2() && isdigit_l((uch)PEEK2(), p->g->loc)) ) )
102 return;
103 SETERROR(REG_BADRPT);
104 }
105 @@ -494,7 +507,12 @@
106 p_str(p)
107 struct parse *p;
108 {
109 +#if __DARWIN_UNIX03
110 + if (!p->zerorepeats) REQUIRE(MORE(), REG_EMPTY);
111 + else p->zerorepeats--;
112 +#else /* !__DARWIN_UNIX03 */
113 (void)REQUIRE(MORE(), REG_EMPTY);
114 +#endif /* __DARWIN_UNIX03 */
115 while (MORE())
116 ordinary(p, WGETNEXT());
117 }
118 @@ -534,8 +552,12 @@
119 p->g->iflags |= USEEOL;
120 p->g->neol++;
121 }
122 -
123 +#if __DARWIN_UNIX03
124 + if (!p->zerorepeats) REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
125 + else p->zerorepeats--;
126 +#else /* !__DARWIN_UNIX03 */
127 (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
128 +#endif /* __DARWIN_UNIX03 */
129 }
130
131 /*
132 @@ -639,7 +661,7 @@
133 } else if (EATTWO('\\', '{')) {
134 count = p_count(p);
135 if (EAT(',')) {
136 - if (MORE() && isdigit((uch)PEEK())) {
137 + if (MORE() && isdigit_l((uch)PEEK(), p->g->loc)) {
138 count2 = p_count(p);
139 (void)REQUIRE(count <= count2, REG_BADBR);
140 } else /* single number with comma */
141 @@ -670,7 +692,7 @@
142 int count = 0;
143 int ndigits = 0;
144
145 - while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
146 + while (MORE() && isdigit_l((uch)PEEK(), p->g->loc) && count <= DUPMAX) {
147 count = count*10 + (GETNEXT() - '0');
148 ndigits++;
149 }
150 @@ -709,10 +731,21 @@
151 cs->icase = 1;
152 if (EAT('^'))
153 cs->invert = 1;
154 +#if __DARWIN_UNIX03
155 + if (PEEK2() != '-') { /* Don't eat '-' or ']' if they're part of ranges */
156 + if (EAT(']'))
157 + CHadd(p, cs, ']');
158 + else if (EAT('-'))
159 + CHadd(p, cs, '-');
160 + }
161 + if (MORE() && !SEETWO('-',']')) /* Parse RE []-'] */
162 + p_b_term(p, cs);
163 +#else /* !__DARWIN_UNIX03 */
164 if (EAT(']'))
165 CHadd(p, cs, ']');
166 else if (EAT('-'))
167 CHadd(p, cs, '-');
168 +#endif /* __DARWIN_UNIX03 */
169 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
170 p_b_term(p, cs);
171 if (EAT('-'))
172 @@ -725,7 +758,7 @@
173 if (cs->invert && p->g->cflags&REG_NEWLINE)
174 cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
175
176 - if ((ch = singleton(cs)) != OUT) { /* optimize singleton sets */
177 + if ((ch = singleton(cs, p->g->loc)) != OUT) { /* optimize singleton sets */
178 ordinary(p, ch);
179 freeset(p, cs);
180 } else
181 @@ -751,8 +784,16 @@
182 c = (MORE2()) ? PEEK2() : '\0';
183 break;
184 case '-':
185 +#if __DARWIN_UNIX03
186 + if (PEEK2() != '-') { /* Allow [---] */
187 + SETERROR(REG_ERANGE);
188 + return; /* NOTE RETURN */
189 + } else
190 + c = '-';
191 +#else /* !__DARWIN_UNIX03 */
192 SETERROR(REG_ERANGE);
193 return; /* NOTE RETURN */
194 +#endif /* __DARWIN_UNIX03 */
195 break;
196 default:
197 c = '\0';
198 @@ -773,7 +814,11 @@
199 NEXT2();
200 (void)REQUIRE(MORE(), REG_EBRACK);
201 c = PEEK();
202 +#if __DARWIN_UNIX03
203 + REQUIRE(c != '-', REG_ECOLLATE); /* allow [=]=] */
204 +#else /* !__DARWIN_UNIX03 */
205 (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
206 +#endif /* __DARWIN_UNIX03 */
207 p_b_eclass(p, cs);
208 (void)REQUIRE(MORE(), REG_EBRACK);
209 (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
210 @@ -792,14 +837,14 @@
211 if (start == finish)
212 CHadd(p, cs, start);
213 else {
214 - if (__collate_load_error) {
215 + if (p->g->loc->__collate_load_error) {
216 (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
217 CHaddrange(p, cs, start, finish);
218 } else {
219 - (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
220 + (void)REQUIRE(__collate_range_cmp(start, finish, p->g->loc) <= 0, REG_ERANGE);
221 for (i = 0; i <= UCHAR_MAX; i++) {
222 - if ( __collate_range_cmp(start, i) <= 0
223 - && __collate_range_cmp(i, finish) <= 0
224 + if ( __collate_range_cmp(start, i, p->g->loc) <= 0
225 + && __collate_range_cmp(i, finish, p->g->loc) <= 0
226 )
227 CHadd(p, cs, i);
228 }
229 @@ -823,7 +868,7 @@
230 wctype_t wct;
231 char clname[16];
232
233 - while (MORE() && isalpha((uch)PEEK()))
234 + while (MORE() && isalpha_l((uch)PEEK(), p->g->loc))
235 NEXT();
236 len = p->next - sp;
237 if (len >= sizeof(clname) - 1) {
238 @@ -832,7 +877,7 @@
239 }
240 memcpy(clname, sp, len);
241 clname[len] = '\0';
242 - if ((wct = wctype(clname)) == 0) {
243 + if ((wct = wctype_l(clname, p->g->loc)) == 0) {
244 SETERROR(REG_ECTYPE);
245 return;
246 }
247 @@ -903,7 +948,7 @@
248 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
249 return(cp->code); /* known name */
250 memset(&mbs, 0, sizeof(mbs));
251 - if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
252 + if ((clen = mbrtowc_l(&wc, sp, len, &mbs, p->g->loc)) == len)
253 return (wc); /* single character */
254 else if (clen == (size_t)-1 || clen == (size_t)-2)
255 SETERROR(REG_ILLSEQ);
256 @@ -914,17 +959,18 @@
257
258 /*
259 - othercase - return the case counterpart of an alphabetic
260 - == static char othercase(int ch);
261 + == static char othercase(int ch, locale_t loc);
262 */
263 static wint_t /* if no counterpart, return ch */
264 -othercase(ch)
265 +othercase(ch, loc)
266 wint_t ch;
267 +locale_t loc;
268 {
269 - assert(iswalpha(ch));
270 - if (iswupper(ch))
271 - return(towlower(ch));
272 - else if (iswlower(ch))
273 - return(towupper(ch));
274 + assert(iswalpha_l(ch, loc));
275 + if (iswupper_l(ch, loc))
276 + return(towlower_l(ch, loc));
277 + else if (iswlower_l(ch, loc))
278 + return(towupper_l(ch, loc));
279 else /* peculiar, but could happen */
280 return(ch);
281 }
282 @@ -946,10 +992,10 @@
283 size_t n;
284 mbstate_t mbs;
285
286 - assert(othercase(ch) != ch); /* p_bracket() would recurse */
287 + assert(othercase(ch, p->g->loc) != ch); /* p_bracket() would recurse */
288 p->next = bracket;
289 memset(&mbs, 0, sizeof(mbs));
290 - n = wcrtomb(bracket, ch, &mbs);
291 + n = wcrtomb_l(bracket, ch, &mbs, p->g->loc);
292 assert(n != (size_t)-1);
293 bracket[n] = ']';
294 bracket[n + 1] = '\0';
295 @@ -971,7 +1017,7 @@
296 {
297 cset *cs;
298
299 - if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
300 + if ((p->g->cflags&REG_ICASE) && iswalpha_l(ch, p->g->loc) && othercase(ch, p->g->loc) != ch)
301 bothcases(p, ch);
302 else if ((ch & OPDMASK) == ch)
303 EMIT(OCHAR, ch);
304 @@ -1039,6 +1085,9 @@
305 switch (REP(MAP(from), MAP(to))) {
306 case REP(0, 0): /* must be user doing this */
307 DROP(finish-start); /* drop the operand */
308 +#if __DARWIN_UNIX03
309 + p->zerorepeats++;
310 +#endif /* __DARWIN_UNIX03 */
311 break;
312 case REP(0, 1): /* as x{1,1}? */
313 case REP(0, N): /* as x{1,n}? */
314 @@ -1099,7 +1148,7 @@
315 size_t n;
316
317 memset(&mbs, 0, sizeof(mbs));
318 - n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
319 + n = mbrtowc_l(&wc, p->next, p->end - p->next, &mbs, p->g->loc);
320 if (n == (size_t)-1 || n == (size_t)-2) {
321 SETERROR(REG_ILLSEQ);
322 return (0);
323 @@ -1172,13 +1221,14 @@
324 - returning it if so, otherwise returning OUT.
325 */
326 static wint_t
327 -singleton(cs)
328 +singleton(cs, loc)
329 cset *cs;
330 +locale_t loc;
331 {
332 wint_t i, s, n;
333
334 for (i = n = 0; i < NC; i++)
335 - if (CHIN(cs, i)) {
336 + if (CHIN(cs, i, loc)) {
337 n++;
338 s = i;
339 }
340 @@ -1215,9 +1265,9 @@
341 cs->wides[cs->nwides++] = ch;
342 }
343 if (cs->icase) {
344 - if ((nch = towlower(ch)) < NC)
345 + if ((nch = towlower_l(ch, p->g->loc)) < NC)
346 cs->bmp[nch >> 3] |= 1 << (nch & 7);
347 - if ((nch = towupper(ch)) < NC)
348 + if ((nch = towupper_l(ch, p->g->loc)) < NC)
349 cs->bmp[nch >> 3] |= 1 << (nch & 7);
350 }
351 }
352 @@ -1262,7 +1312,7 @@
353 wctype_t *newtypes;
354
355 for (i = 0; i < NC; i++)
356 - if (iswctype(i, wct))
357 + if (iswctype_l(i, wct, p->g->loc))
358 CHadd(p, cs, i);
359 newtypes = realloc(cs->types, (cs->ntypes + 1) *
360 sizeof(*cs->types));
361 @@ -1451,6 +1501,7 @@
362 char buf[MB_LEN_MAX];
363 size_t clen;
364 mbstate_t mbs;
365 + struct __xlocale_st_runelocale *rl = p->g->loc->__lc_ctype;
366
367 /* avoid making error situations worse */
368 if (p->error != 0)
369 @@ -1461,8 +1512,8 @@
370 * multibyte character strings, but it's safe for at least
371 * UTF-8 (see RFC 3629).
372 */
373 - if (MB_CUR_MAX > 1 &&
374 - strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
375 + if (rl->__mb_cur_max > 1 &&
376 + strcmp(rl->_CurrentRuneLocale.__encoding, "UTF-8") != 0)
377 return;
378
379 /* find the longest OCHAR sequence in strip */
380 @@ -1478,7 +1529,7 @@
381 memset(&mbs, 0, sizeof(mbs));
382 newstart = scan - 1;
383 }
384 - clen = wcrtomb(buf, OPND(s), &mbs);
385 + clen = wcrtomb_l(buf, OPND(s), &mbs, p->g->loc);
386 if (clen == (size_t)-1)
387 goto toohard;
388 newlen += clen;
389 @@ -1597,7 +1648,7 @@
390 while (cp < g->must + g->mlen) {
391 while (OP(s = *scan++) != OCHAR)
392 continue;
393 - clen = wcrtomb(cp, OPND(s), &mbs);
394 + clen = wcrtomb_l(cp, OPND(s), &mbs, p->g->loc);
395 assert(clen != (size_t)-1);
396 cp += clen;
397 }