]> git.saurik.com Git - apple/libc.git/blob - regex/FreeBSD/regcomp.c.patch
Libc-763.11.tar.gz
[apple/libc.git] / regex / FreeBSD / regcomp.c.patch
1 --- regcomp.c.orig 2010-06-21 14:05:04.000000000 -0700
2 +++ regcomp.c 2010-06-21 14:23:51.000000000 -0700
3 @@ -39,6 +39,8 @@ static char sccsid[] = "@(#)regcomp.c 8.
4 #include <sys/cdefs.h>
5 __FBSDID("$FreeBSD: src/lib/libc/regex/regcomp.c,v 1.36 2007/06/11 03:05:54 delphij Exp $");
6
7 +#include "xlocale_private.h"
8 +
9 #include <sys/types.h>
10 #include <stdio.h>
11 #include <string.h>
12 @@ -69,6 +71,9 @@ struct parse {
13 sopno ssize; /* malloced strip size (allocated) */
14 sopno slen; /* malloced strip length (used) */
15 int ncsalloc; /* number of csets allocated */
16 +#if __DARWIN_UNIX03
17 + int zerorepeats;
18 +#endif /* __DARWIN_UNIX03 */
19 struct re_guts *g;
20 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
21 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
22 @@ -93,7 +98,7 @@ static void p_b_cclass(struct parse *p,
23 static void p_b_eclass(struct parse *p, cset *cs);
24 static wint_t p_b_symbol(struct parse *p);
25 static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
26 -static wint_t othercase(wint_t ch);
27 +static wint_t othercase(wint_t ch, locale_t loc);
28 static void bothcases(struct parse *p, wint_t ch);
29 static void ordinary(struct parse *p, wint_t ch);
30 static void nonnewline(struct parse *p);
31 @@ -104,7 +109,7 @@ static void freeset(struct parse *p, cse
32 static void CHadd(struct parse *p, cset *cs, wint_t ch);
33 static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
34 static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
35 -static wint_t singleton(cset *cs);
36 +static wint_t singleton(cset *cs, locale_t loc);
37 static sopno dupl(struct parse *p, sopno start, sopno finish);
38 static void doemit(struct parse *p, sop op, size_t opnd);
39 static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
40 @@ -222,10 +227,14 @@ regcomp(regex_t * __restrict preg,
41 p->end = p->next + len;
42 p->error = 0;
43 p->ncsalloc = 0;
44 +#if __DARWIN_UNIX03
45 + p->zerorepeats = 0;
46 +#endif /* __DARWIN_UNIX03 */
47 for (i = 0; i < NPAREN; i++) {
48 p->pbegin[i] = 0;
49 p->pend[i] = 0;
50 }
51 + g->loc = __current_locale();
52 g->sets = NULL;
53 g->ncsets = 0;
54 g->cflags = cflags;
55 @@ -302,8 +311,12 @@ p_ere(struct parse *p,
56 conc = HERE();
57 while (MORE() && (c = PEEK()) != '|' && c != stop)
58 p_ere_exp(p);
59 +#if __DARWIN_UNIX03
60 + if (!p->zerorepeats) REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
61 + else p->zerorepeats--;
62 +#else
63 (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
64 -
65 +#endif
66 if (!EAT('|'))
67 break; /* NOTE BREAK OUT */
68
69 @@ -410,7 +423,7 @@ p_ere_exp(struct parse *p)
70 ordinary(p, wc);
71 break;
72 case '{': /* okay as ordinary except if digit follows */
73 - (void)REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
74 + (void)REQUIRE(!MORE() || !isdigit_l((uch)PEEK(), p->g->loc), REG_BADRPT);
75 /* FALLTHROUGH */
76 default:
77 p->next--;
78 @@ -424,7 +437,7 @@ p_ere_exp(struct parse *p)
79 c = PEEK();
80 /* we call { a repetition if followed by a digit */
81 if (!( c == '*' || c == '+' || c == '?' ||
82 - (c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
83 + (c == '{' && MORE2() && isdigit_l((uch)PEEK2(), p->g->loc)) ))
84 return; /* no repetition, we're done */
85 NEXT();
86
87 @@ -453,7 +466,7 @@ p_ere_exp(struct parse *p)
88 case '{':
89 count = p_count(p);
90 if (EAT(',')) {
91 - if (isdigit((uch)PEEK())) {
92 + if (isdigit_l((uch)PEEK(), p->g->loc)) {
93 count2 = p_count(p);
94 (void)REQUIRE(count <= count2, REG_BADBR);
95 } else /* single number with comma */
96 @@ -474,7 +487,7 @@ p_ere_exp(struct parse *p)
97 return;
98 c = PEEK();
99 if (!( c == '*' || c == '+' || c == '?' ||
100 - (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
101 + (c == '{' && MORE2() && isdigit_l((uch)PEEK2(), p->g->loc)) ) )
102 return;
103 SETERROR(REG_BADRPT);
104 }
105 @@ -486,7 +499,12 @@ p_ere_exp(struct parse *p)
106 static void
107 p_str(struct parse *p)
108 {
109 +#if __DARWIN_UNIX03
110 + if (!p->zerorepeats) REQUIRE(MORE(), REG_EMPTY);
111 + else p->zerorepeats--;
112 +#else /* !__DARWIN_UNIX03 */
113 (void)REQUIRE(MORE(), REG_EMPTY);
114 +#endif /* __DARWIN_UNIX03 */
115 while (MORE())
116 ordinary(p, WGETNEXT());
117 }
118 @@ -525,8 +543,12 @@ p_bre(struct parse *p,
119 p->g->iflags |= USEEOL;
120 p->g->neol++;
121 }
122 -
123 +#if __DARWIN_UNIX03
124 + if (!p->zerorepeats) REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
125 + else p->zerorepeats--;
126 +#else /* !__DARWIN_UNIX03 */
127 (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
128 +#endif /* __DARWIN_UNIX03 */
129 }
130
131 /*
132 @@ -599,12 +621,22 @@ p_simp_re(struct parse *p,
133 i = (c&~BACKSL) - '0';
134 assert(i < NPAREN);
135 if (p->pend[i] != 0) {
136 +#if __DARWIN_UNIX03
137 + int skip = 1;
138 +#endif /* __DARWIN_UNIX03 */
139 assert(i <= p->g->nsub);
140 EMIT(OBACK_, i);
141 assert(p->pbegin[i] != 0);
142 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
143 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
144 +#if __DARWIN_UNIX03
145 + if (OP(p->strip[p->pbegin[i]+skip]) == OBOL) {
146 + skip++; /* don't dup anchor in subexp */
147 + }
148 + (void) dupl(p, p->pbegin[i]+skip, p->pend[i]);
149 +#else /* !__DARWIN_UNIX03 */
150 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
151 +#endif /* __DARWIN_UNIX03 */
152 EMIT(O_BACK, i);
153 } else
154 SETERROR(REG_ESUBREG);
155 @@ -627,9 +659,10 @@ p_simp_re(struct parse *p,
156 INSERT(OQUEST_, pos);
157 ASTERN(O_QUEST, pos);
158 } else if (EATTWO('\\', '{')) {
159 + (void)REQUIRE(MORE(), REG_EBRACE);
160 count = p_count(p);
161 if (EAT(',')) {
162 - if (MORE() && isdigit((uch)PEEK())) {
163 + if (MORE() && isdigit_l((uch)PEEK(), p->g->loc)) {
164 count2 = p_count(p);
165 (void)REQUIRE(count <= count2, REG_BADBR);
166 } else /* single number with comma */
167 @@ -659,7 +692,7 @@ p_count(struct parse *p)
168 int count = 0;
169 int ndigits = 0;
170
171 - while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
172 + while (MORE() && isdigit_l((uch)PEEK(), p->g->loc) && count <= DUPMAX) {
173 count = count*10 + (GETNEXT() - '0');
174 ndigits++;
175 }
176 @@ -697,10 +730,22 @@ p_bracket(struct parse *p)
177 cs->icase = 1;
178 if (EAT('^'))
179 cs->invert = 1;
180 +#if __DARWIN_UNIX03
181 + if (PEEK2() != '-' && PEEK2() != ']') { /* Don't eat '-' or ']' if they're part of ranges
182 + * but do process [^-] */
183 if (EAT(']'))
184 CHadd(p, cs, ']');
185 else if (EAT('-'))
186 CHadd(p, cs, '-');
187 + }
188 + if (MORE() && !SEETWO('-',']')) /* Parse RE []-'] */
189 + p_b_term(p, cs);
190 +#else /* !__DARWIN_UNIX03 */
191 + if (EAT(']'))
192 + CHadd(p, cs, ']');
193 + else if (EAT('-'))
194 + CHadd(p, cs, '-');
195 +#endif /* __DARWIN_UNIX03 */
196 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
197 p_b_term(p, cs);
198 if (EAT('-'))
199 @@ -713,7 +758,7 @@ p_bracket(struct parse *p)
200 if (cs->invert && p->g->cflags&REG_NEWLINE)
201 cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
202
203 - if ((ch = singleton(cs)) != OUT) { /* optimize singleton sets */
204 + if ((ch = singleton(cs, p->g->loc)) != OUT) { /* optimize singleton sets */
205 ordinary(p, ch);
206 freeset(p, cs);
207 } else
208 @@ -737,8 +782,16 @@ p_b_term(struct parse *p, cset *cs)
209 c = (MORE2()) ? PEEK2() : '\0';
210 break;
211 case '-':
212 +#if __DARWIN_UNIX03
213 + if (PEEK2() != '-') { /* Allow [---] */
214 + SETERROR(REG_ERANGE);
215 + return; /* NOTE RETURN */
216 + } else
217 + c = '-';
218 +#else /* !__DARWIN_UNIX03 */
219 SETERROR(REG_ERANGE);
220 return; /* NOTE RETURN */
221 +#endif /* __DARWIN_UNIX03 */
222 break;
223 default:
224 c = '\0';
225 @@ -759,7 +812,11 @@ p_b_term(struct parse *p, cset *cs)
226 NEXT2();
227 (void)REQUIRE(MORE(), REG_EBRACK);
228 c = PEEK();
229 +#if __DARWIN_UNIX03
230 + REQUIRE(c != '-', REG_ECOLLATE); /* allow [=]=] */
231 +#else /* !__DARWIN_UNIX03 */
232 (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
233 +#endif /* __DARWIN_UNIX03 */
234 p_b_eclass(p, cs);
235 (void)REQUIRE(MORE(), REG_EBRACK);
236 (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
237 @@ -778,14 +835,14 @@ p_b_term(struct parse *p, cset *cs)
238 if (start == finish)
239 CHadd(p, cs, start);
240 else {
241 - if (__collate_load_error) {
242 + if (p->g->loc->__collate_load_error) {
243 (void)REQUIRE((uch)start <= (uch)finish, REG_ERANGE);
244 CHaddrange(p, cs, start, finish);
245 } else {
246 - (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE);
247 + (void)REQUIRE(__collate_range_cmp(start, finish, p->g->loc) <= 0, REG_ERANGE);
248 for (i = 0; i <= UCHAR_MAX; i++) {
249 - if ( __collate_range_cmp(start, i) <= 0
250 - && __collate_range_cmp(i, finish) <= 0
251 + if ( __collate_range_cmp(start, i, p->g->loc) <= 0
252 + && __collate_range_cmp(i, finish, p->g->loc) <= 0
253 )
254 CHadd(p, cs, i);
255 }
256 @@ -807,7 +864,7 @@ p_b_cclass(struct parse *p, cset *cs)
257 wctype_t wct;
258 char clname[16];
259
260 - while (MORE() && isalpha((uch)PEEK()))
261 + while (MORE() && isalpha_l((uch)PEEK(), p->g->loc))
262 NEXT();
263 len = p->next - sp;
264 if (len >= sizeof(clname) - 1) {
265 @@ -816,7 +873,7 @@ p_b_cclass(struct parse *p, cset *cs)
266 }
267 memcpy(clname, sp, len);
268 clname[len] = '\0';
269 - if ((wct = wctype(clname)) == 0) {
270 + if ((wct = wctype_l(clname, p->g->loc)) == 0) {
271 SETERROR(REG_ECTYPE);
272 return;
273 }
274 @@ -826,14 +883,38 @@ p_b_cclass(struct parse *p, cset *cs)
275 /*
276 - p_b_eclass - parse an equivalence-class name and deal with it
277 == static void p_b_eclass(struct parse *p, cset *cs);
278 - *
279 - * This implementation is incomplete. xxx
280 */
281 static void
282 p_b_eclass(struct parse *p, cset *cs)
283 {
284 - wint_t c;
285 -
286 + char *sp = p->next;
287 + int len, ec;
288 + mbstate_t mbs;
289 + int *newequiv_classes;
290 + wint_t c;
291 +
292 + while (MORE() && !SEETWO('=', ']'))
293 + NEXT();
294 + if (!MORE()) {
295 + SETERROR(REG_EBRACK);
296 + return;
297 + }
298 + len = p->next - sp;
299 + memset(&mbs, 0, sizeof(mbs));
300 + ec = __collate_equiv_class(sp, len, &mbs, p->g->loc);
301 + if (ec > 0) {
302 + newequiv_classes = realloc(cs->equiv_classes,
303 + (cs->nequiv_classes + 1) * sizeof(*cs->equiv_classes));
304 + if (newequiv_classes == NULL) {
305 + SETERROR(REG_ESPACE);
306 + return;
307 + }
308 + cs->equiv_classes = newequiv_classes;
309 + cs->equiv_classes[cs->nequiv_classes++] = ec;
310 + return;
311 + }
312 + /* not an equivalence class, so fallback to a collating element */
313 + p->next = sp;
314 c = p_b_coll_elem(p, '=');
315 CHadd(p, cs, c);
316 }
317 @@ -866,10 +947,10 @@ p_b_coll_elem(struct parse *p,
318 wint_t endc) /* name ended by endc,']' */
319 {
320 char *sp = p->next;
321 - struct cname *cp;
322 + const struct cname *cp;
323 int len;
324 mbstate_t mbs;
325 - wchar_t wc;
326 + wchar_t wbuf[16];
327 size_t clen;
328
329 while (MORE() && !SEETWO(endc, ']'))
330 @@ -883,9 +964,10 @@ p_b_coll_elem(struct parse *p,
331 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
332 return(cp->code); /* known name */
333 memset(&mbs, 0, sizeof(mbs));
334 - if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
335 - return (wc); /* single character */
336 - else if (clen == (size_t)-1 || clen == (size_t)-2)
337 + clen = __collate_collating_symbol(wbuf, 16, sp, len, &mbs, p->g->loc);
338 + if (clen == 1)
339 + return (*wbuf); /* single character */
340 + else if (clen == (size_t)-1)
341 SETERROR(REG_ILLSEQ);
342 else
343 SETERROR(REG_ECOLLATE); /* neither */
344 @@ -894,16 +976,16 @@ p_b_coll_elem(struct parse *p,
345
346 /*
347 - othercase - return the case counterpart of an alphabetic
348 - == static char othercase(int ch);
349 + == static char othercase(wint_t ch, locale_t loc);
350 */
351 static wint_t /* if no counterpart, return ch */
352 -othercase(wint_t ch)
353 +othercase(wint_t ch, locale_t loc)
354 {
355 - assert(iswalpha(ch));
356 - if (iswupper(ch))
357 - return(towlower(ch));
358 - else if (iswlower(ch))
359 - return(towupper(ch));
360 + assert(iswalpha_l(ch, loc));
361 + if (iswupper_l(ch, loc))
362 + return(towlower_l(ch, loc));
363 + else if (iswlower_l(ch, loc))
364 + return(towupper_l(ch, loc));
365 else /* peculiar, but could happen */
366 return(ch);
367 }
368 @@ -923,10 +1005,10 @@ bothcases(struct parse *p, wint_t ch)
369 size_t n;
370 mbstate_t mbs;
371
372 - assert(othercase(ch) != ch); /* p_bracket() would recurse */
373 + assert(othercase(ch, p->g->loc) != ch); /* p_bracket() would recurse */
374 p->next = bracket;
375 memset(&mbs, 0, sizeof(mbs));
376 - n = wcrtomb(bracket, ch, &mbs);
377 + n = wcrtomb_l(bracket, ch, &mbs, p->g->loc);
378 assert(n != (size_t)-1);
379 bracket[n] = ']';
380 bracket[n + 1] = '\0';
381 @@ -946,7 +1028,7 @@ ordinary(struct parse *p, wint_t ch)
382 {
383 cset *cs;
384
385 - if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
386 + if ((p->g->cflags&REG_ICASE) && iswalpha_l(ch, p->g->loc) && othercase(ch, p->g->loc) != ch)
387 bothcases(p, ch);
388 else if ((ch & OPDMASK) == ch)
389 EMIT(OCHAR, ch);
390 @@ -1012,10 +1094,22 @@ repeat(struct parse *p,
391 switch (REP(MAP(from), MAP(to))) {
392 case REP(0, 0): /* must be user doing this */
393 DROP(finish-start); /* drop the operand */
394 +#if __DARWIN_UNIX03
395 + p->zerorepeats++;
396 +#endif /* __DARWIN_UNIX03 */
397 break;
398 + case REP(0, INF): /* as x{1,}? */
399 +#if __DARWIN_UNIX03
400 + /* this case does not require the (y|) trick, noKLUDGE */
401 + /* Just like * =+? */
402 + INSERT(OPLUS_, start);
403 + ASTERN(O_PLUS, start);
404 + INSERT(OQUEST_, start);
405 + ASTERN(O_QUEST, start);
406 + break;
407 +#endif /* __DARWIN_UNIX03 */
408 case REP(0, 1): /* as x{1,1}? */
409 case REP(0, N): /* as x{1,n}? */
410 - case REP(0, INF): /* as x{1,}? */
411 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
412 INSERT(OCH_, start); /* offset is wrong... */
413 repeat(p, start+1, 1, to);
414 @@ -1029,6 +1123,10 @@ repeat(struct parse *p,
415 /* done */
416 break;
417 case REP(1, N): /* as x?x{1,n-1} */
418 +#if __DARWIN_UNIX03
419 + INSERT(OQUEST_, start);
420 + ASTERN(O_QUEST, start);
421 +#else /* !__DARWIN_UNIX03 */
422 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
423 INSERT(OCH_, start);
424 ASTERN(OOR1, start);
425 @@ -1036,6 +1134,7 @@ repeat(struct parse *p,
426 EMIT(OOR2, 0); /* offset very wrong... */
427 AHEAD(THERE()); /* ...so fix it */
428 ASTERN(O_CH, THERETHERE());
429 +#endif /* __DARWIN_UNIX03 */
430 copy = dupl(p, start+1, finish+1);
431 assert(copy == finish+4);
432 repeat(p, copy, 1, to-1);
433 @@ -1071,7 +1170,7 @@ wgetnext(struct parse *p)
434 size_t n;
435
436 memset(&mbs, 0, sizeof(mbs));
437 - n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
438 + n = mbrtowc_l(&wc, p->next, p->end - p->next, &mbs, p->g->loc);
439 if (n == (size_t)-1 || n == (size_t)-2) {
440 SETERROR(REG_ILLSEQ);
441 return (0);
442 @@ -1139,12 +1238,12 @@ freeset(struct parse *p, cset *cs)
443 - returning it if so, otherwise returning OUT.
444 */
445 static wint_t
446 -singleton(cset *cs)
447 +singleton(cset *cs, locale_t loc)
448 {
449 wint_t i, s, n;
450
451 for (i = n = 0; i < NC; i++)
452 - if (CHIN(cs, i)) {
453 + if (CHIN(cs, i, loc)) {
454 n++;
455 s = i;
456 }
457 @@ -1178,9 +1277,9 @@ CHadd(struct parse *p, cset *cs, wint_t
458 cs->wides[cs->nwides++] = ch;
459 }
460 if (cs->icase) {
461 - if ((nch = towlower(ch)) < NC)
462 + if ((nch = towlower_l(ch, p->g->loc)) < NC)
463 cs->bmp[nch >> 3] |= 1 << (nch & 7);
464 - if ((nch = towupper(ch)) < NC)
465 + if ((nch = towupper_l(ch, p->g->loc)) < NC)
466 cs->bmp[nch >> 3] |= 1 << (nch & 7);
467 }
468 }
469 @@ -1219,7 +1318,7 @@ CHaddtype(struct parse *p, cset *cs, wct
470 wctype_t *newtypes;
471
472 for (i = 0; i < NC; i++)
473 - if (iswctype(i, wct))
474 + if (iswctype_l(i, wct, p->g->loc))
475 CHadd(p, cs, i);
476 newtypes = realloc(cs->types, (cs->ntypes + 1) *
477 sizeof(*cs->types));
478 @@ -1391,6 +1490,7 @@ findmust(struct parse *p, struct re_guts
479 char buf[MB_LEN_MAX];
480 size_t clen;
481 mbstate_t mbs;
482 + struct __xlocale_st_runelocale *rl = p->g->loc->__lc_ctype;
483
484 /* avoid making error situations worse */
485 if (p->error != 0)
486 @@ -1401,8 +1501,8 @@ findmust(struct parse *p, struct re_guts
487 * multibyte character strings, but it's safe for at least
488 * UTF-8 (see RFC 3629).
489 */
490 - if (MB_CUR_MAX > 1 &&
491 - strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
492 + if (rl->__mb_cur_max > 1 &&
493 + strcmp(rl->_CurrentRuneLocale.__encoding, "UTF-8") != 0)
494 return;
495
496 /* find the longest OCHAR sequence in strip */
497 @@ -1418,7 +1518,7 @@ findmust(struct parse *p, struct re_guts
498 memset(&mbs, 0, sizeof(mbs));
499 newstart = scan - 1;
500 }
501 - clen = wcrtomb(buf, OPND(s), &mbs);
502 + clen = wcrtomb_l(buf, OPND(s), &mbs, p->g->loc);
503 if (clen == (size_t)-1)
504 goto toohard;
505 newlen += clen;
506 @@ -1537,7 +1637,7 @@ findmust(struct parse *p, struct re_guts
507 while (cp < g->must + g->mlen) {
508 while (OP(s = *scan++) != OCHAR)
509 continue;
510 - clen = wcrtomb(cp, OPND(s), &mbs);
511 + clen = wcrtomb_l(cp, OPND(s), &mbs, p->g->loc);
512 assert(clen != (size_t)-1);
513 cp += clen;
514 }