]> git.saurik.com Git - apple/libc.git/blob - locale/FreeBSD/collate.c.patch
8a08e1641b8e97c03fa935d9a833d17c1c491c3c
[apple/libc.git] / locale / FreeBSD / collate.c.patch
1 --- collate.c.bsdnew 2009-11-09 15:05:25.000000000 -0800
2 +++ collate.c 2009-11-09 15:20:20.000000000 -0800
3 @@ -28,14 +28,26 @@
4 #include <sys/cdefs.h>
5 __FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $");
6
7 +#include "xlocale_private.h"
8 +/* assumes the locale_t variable is named loc */
9 +#define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table)
10 +#define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table)
11 +#define __collate_char_pri_table (loc->__lc_collate->__char_pri_table)
12 +#define __collate_info (&loc->__lc_collate->__info)
13 +#define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table)
14 +#define __collate_substitute_table (loc->__lc_collate->__substitute_table)
15 +
16 #include "namespace.h"
17 #include <arpa/inet.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 +#include <stddef.h>
21 #include <string.h>
22 +#include <wchar.h>
23 #include <errno.h>
24 #include <unistd.h>
25 #include <sysexits.h>
26 +#include <ctype.h>
27 #include "un-namespace.h"
28
29 #include "collate.h"
30 @@ -44,36 +56,50 @@ __FBSDID("$FreeBSD: src/lib/libc/locale/
31
32 #include "libc_private.h"
33
34 -int __collate_load_error = 1;
35 -int __collate_substitute_nontrivial;
36 -
37 -u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN];
38 -struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
39 -struct __collate_st_chain_pri *__collate_chain_pri_table;
40 -
41 +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
42 +static void wntohl(wchar_t *, int);
43 +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
44 void __collate_err(int ex, const char *f) __dead2;
45
46 -int
47 -__collate_load_tables(const char *encoding)
48 +/*
49 + * Normally, the __collate_* routines should all be __private_extern__,
50 + * but grep is using them (3715846). Until we can provide an alternative,
51 + * we leave them public, and provide a read-only __collate_load_error variable
52 + */
53 +#undef __collate_load_error
54 +int __collate_load_error = 1;
55 +
56 +__private_extern__ int
57 +__collate_load_tables(const char *encoding, locale_t loc)
58 {
59 FILE *fp;
60 - int i, saverr, chains;
61 - uint32_t u32;
62 + int i, saverr, chains, z;
63 char strbuf[STR_LEN], buf[PATH_MAX];
64 - void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
65 - static char collate_encoding[ENCODING_LEN + 1];
66 + struct __xlocale_st_collate *TMP;
67 + static struct __xlocale_st_collate *cache = NULL;
68 + struct __collate_st_info info;
69 + void *vp;
70
71 /* 'encoding' must be already checked. */
72 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
73 - __collate_load_error = 1;
74 + loc->__collate_load_error = 1;
75 + if (loc == &__global_locale)
76 + __collate_load_error = 1;
77 + XL_RELEASE(loc->__lc_collate);
78 + loc->__lc_collate = NULL;
79 return (_LDP_CACHE);
80 }
81
82 /*
83 * If the locale name is the same as our cache, use the cache.
84 */
85 - if (strcmp(encoding, collate_encoding) == 0) {
86 - __collate_load_error = 0;
87 + if (cache && strcmp(encoding, cache->__encoding) == 0) {
88 + loc->__collate_load_error = 0;
89 + if (loc == &__global_locale)
90 + __collate_load_error = 0;
91 + XL_RELEASE(loc->__lc_collate);
92 + loc->__lc_collate = cache;
93 + XL_RETAIN(loc->__lc_collate);
94 return (_LDP_CACHE);
95 }
96
97 @@ -97,9 +123,7 @@ __collate_load_tables(const char *encodi
98 return (_LDP_ERROR);
99 }
100 chains = -1;
101 - if (strcmp(strbuf, COLLATE_VERSION) == 0)
102 - chains = 0;
103 - else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
104 + if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0)
105 chains = 1;
106 if (chains < 0) {
107 (void)fclose(fp);
108 @@ -107,13 +131,21 @@ __collate_load_tables(const char *encodi
109 return (_LDP_ERROR);
110 }
111 if (chains) {
112 - if (fread(&u32, sizeof(u32), 1, fp) != 1) {
113 + if (fread(&info, sizeof(info), 1, fp) != 1) {
114 saverr = errno;
115 (void)fclose(fp);
116 errno = saverr;
117 return (_LDP_ERROR);
118 }
119 - if ((chains = (int)ntohl(u32)) < 1) {
120 +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
121 + for(z = 0; z < info.directive_count; z++) {
122 + info.undef_pri[z] = ntohl(info.undef_pri[z]);
123 + info.subst_count[z] = ntohl(info.subst_count[z]);
124 + }
125 + info.chain_count = ntohl(info.chain_count);
126 + info.large_pri_count = ntohl(info.large_pri_count);
127 +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
128 + if ((chains = info.chain_count) < 0) {
129 (void)fclose(fp);
130 errno = EFTYPE;
131 return (_LDP_ERROR);
132 @@ -121,143 +153,446 @@ __collate_load_tables(const char *encodi
133 } else
134 chains = TABLE_SIZE;
135
136 - if ((TMP_substitute_table =
137 - malloc(sizeof(__collate_substitute_table))) == NULL) {
138 + i = sizeof(struct __xlocale_st_collate)
139 + + sizeof(struct __collate_st_chain_pri) * chains
140 + + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
141 + for(z = 0; z < info.directive_count; z++)
142 + i += sizeof(struct __collate_st_subst) * info.subst_count[z];
143 + if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) {
144 saverr = errno;
145 (void)fclose(fp);
146 errno = saverr;
147 return (_LDP_ERROR);
148 }
149 - if ((TMP_char_pri_table =
150 - malloc(sizeof(__collate_char_pri_table))) == NULL) {
151 - saverr = errno;
152 - free(TMP_substitute_table);
153 - (void)fclose(fp);
154 - errno = saverr;
155 - return (_LDP_ERROR);
156 - }
157 - if ((TMP_chain_pri_table =
158 - malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
159 - saverr = errno;
160 - free(TMP_substitute_table);
161 - free(TMP_char_pri_table);
162 - (void)fclose(fp);
163 - errno = saverr;
164 - return (_LDP_ERROR);
165 - }
166 + TMP->__refcount = 2; /* one for the locale, one for the cache */
167 + TMP->__free_extra = NULL;
168
169 #define FREAD(a, b, c, d) \
170 { \
171 if (fread(a, b, c, d) != c) { \
172 saverr = errno; \
173 - free(TMP_substitute_table); \
174 - free(TMP_char_pri_table); \
175 - free(TMP_chain_pri_table); \
176 + free(TMP); \
177 (void)fclose(d); \
178 errno = saverr; \
179 return (_LDP_ERROR); \
180 } \
181 }
182
183 - FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
184 - FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
185 - FREAD(TMP_chain_pri_table,
186 - sizeof(*__collate_chain_pri_table), chains, fp);
187 + /* adjust size to read the remaining in one chunk */
188 + i -= offsetof(struct __xlocale_st_collate, __char_pri_table);
189 + FREAD(TMP->__char_pri_table, i, 1, fp);
190 (void)fclose(fp);
191
192 - (void)strcpy(collate_encoding, encoding);
193 - if (__collate_substitute_table_ptr != NULL)
194 - free(__collate_substitute_table_ptr);
195 - __collate_substitute_table_ptr = TMP_substitute_table;
196 - if (__collate_char_pri_table_ptr != NULL)
197 - free(__collate_char_pri_table_ptr);
198 - __collate_char_pri_table_ptr = TMP_char_pri_table;
199 - for (i = 0; i < UCHAR_MAX + 1; i++) {
200 - __collate_char_pri_table[i].prim =
201 - ntohl(__collate_char_pri_table[i].prim);
202 - __collate_char_pri_table[i].sec =
203 - ntohl(__collate_char_pri_table[i].sec);
204 - }
205 - if (__collate_chain_pri_table != NULL)
206 - free(__collate_chain_pri_table);
207 - __collate_chain_pri_table = TMP_chain_pri_table;
208 - for (i = 0; i < chains; i++) {
209 - __collate_chain_pri_table[i].prim =
210 - ntohl(__collate_chain_pri_table[i].prim);
211 - __collate_chain_pri_table[i].sec =
212 - ntohl(__collate_chain_pri_table[i].sec);
213 - }
214 - __collate_substitute_nontrivial = 0;
215 - for (i = 0; i < UCHAR_MAX + 1; i++) {
216 - if (__collate_substitute_table[i][0] != i ||
217 - __collate_substitute_table[i][1] != 0) {
218 - __collate_substitute_nontrivial = 1;
219 - break;
220 + vp = (void *)(TMP + 1);
221 +
222 + /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */
223 + if (info.subst_count[0] > 0) {
224 + TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
225 + vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
226 + } else
227 + TMP->__substitute_table[0] = NULL;
228 + if (info.flags & COLLATE_SUBST_DUP)
229 + TMP->__substitute_table[1] = TMP->__substitute_table[0];
230 + else if (info.subst_count[1] > 0) {
231 + TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
232 + vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
233 + } else
234 + TMP->__substitute_table[1] = NULL;
235 +
236 + if (chains > 0) {
237 + TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
238 + vp += chains * sizeof(struct __collate_st_chain_pri);
239 + } else
240 + TMP->__chain_pri_table = NULL;
241 + if (info.large_pri_count > 0)
242 + TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp;
243 + else
244 + TMP->__large_char_pri_table = NULL;
245 +
246 +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
247 + {
248 + struct __collate_st_char_pri *p = TMP->__char_pri_table;
249 + for(i = UCHAR_MAX + 1; i-- > 0; p++) {
250 + for(z = 0; z < info.directive_count; z++)
251 + p->pri[z] = ntohl(p->pri[z]);
252 }
253 }
254 - __collate_load_error = 0;
255 + for(z = 0; z < info.directive_count; z++)
256 + if (info.subst_count[z] > 0) {
257 + struct __collate_st_subst *p = TMP->__substitute_table[z];
258 + for(i = info.subst_count[z]; i-- > 0; p++) {
259 + p->val = ntohl(p->val);
260 + wntohl(p->str, STR_LEN);
261 + }
262 + }
263 + {
264 + struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
265 + for(i = chains; i-- > 0; p++) {
266 + wntohl(p->str, STR_LEN);
267 + for(z = 0; z < info.directive_count; z++)
268 + p->pri[z] = ntohl(p->pri[z]);
269 + }
270 + }
271 + if (info.large_pri_count > 0) {
272 + struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table;
273 + for(i = info.large_pri_count; i-- > 0; p++) {
274 + p->val = ntohl(p->val);
275 + for(z = 0; z < info.directive_count; z++)
276 + p->pri.pri[z] = ntohl(p->pri.pri[z]);
277 + }
278 + }
279 +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
280 + (void)strcpy(TMP->__encoding, encoding);
281 + (void)memcpy(&TMP->__info, &info, sizeof(info));
282 + XL_RELEASE(cache);
283 + cache = TMP;
284 + XL_RELEASE(loc->__lc_collate);
285 + loc->__lc_collate = cache;
286 + /* no need to retain, since we set __refcount to 2 above */
287 +
288 + loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0);
289 + loc->__collate_load_error = 0;
290 + if (loc == &__global_locale)
291 + __collate_load_error = 0;
292
293 return (_LDP_LOADED);
294 }
295
296 -u_char *
297 -__collate_substitute(const u_char *s)
298 +static int
299 +__collate_wcsnlen(const wchar_t *s, int len)
300 +{
301 + int n = 0;
302 + while (*s && n < len) {
303 + s++;
304 + n++;
305 + }
306 + return n;
307 +}
308 +
309 +static struct __collate_st_subst *
310 +substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
311 +{
312 + int low = 0;
313 + int high = n - 1;
314 + int next, compar;
315 + struct __collate_st_subst *p;
316 +
317 + while (low <= high) {
318 + next = (low + high) / 2;
319 + p = tab + next;
320 + compar = key - p->val;
321 + if (compar == 0)
322 + return p;
323 + if (compar > 0)
324 + low = next + 1;
325 + else
326 + high = next - 1;
327 + }
328 + return NULL;
329 +}
330 +
331 +__private_extern__ wchar_t *
332 +__collate_substitute(const wchar_t *s, int which, locale_t loc)
333 {
334 int dest_len, len, nlen;
335 - int delta = strlen(s);
336 - u_char *dest_str = NULL;
337 + int n, delta, nsubst;
338 + wchar_t *dest_str = NULL;
339 + const wchar_t *fp;
340 + struct __collate_st_subst *subst, *match;
341
342 if (s == NULL || *s == '\0')
343 - return (__collate_strdup(""));
344 - delta += delta / 8;
345 - dest_str = malloc(dest_len = delta);
346 + return (__collate_wcsdup(L""));
347 + dest_len = wcslen(s);
348 + nsubst = __collate_info->subst_count[which];
349 + if (nsubst <= 0)
350 + return __collate_wcsdup(s);
351 + subst = __collate_substitute_table[which];
352 + delta = dest_len / 4;
353 + if (delta < 2)
354 + delta = 2;
355 + dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
356 if (dest_str == NULL)
357 __collate_err(EX_OSERR, __func__);
358 len = 0;
359 while (*s) {
360 - nlen = len + strlen(__collate_substitute_table[*s]);
361 + if ((match = substsearch(*s, subst, nsubst)) != NULL) {
362 + fp = match->str;
363 + n = __collate_wcsnlen(fp, STR_LEN);
364 + } else {
365 + fp = s;
366 + n = 1;
367 + }
368 + nlen = len + n;
369 if (dest_len <= nlen) {
370 - dest_str = reallocf(dest_str, dest_len = nlen + delta);
371 + dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
372 if (dest_str == NULL)
373 __collate_err(EX_OSERR, __func__);
374 }
375 - (void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
376 - len = nlen;
377 + wcsncpy(dest_str + len, fp, n);
378 + len += n;
379 + s++;
380 }
381 + dest_str[len] = 0;
382 return (dest_str);
383 }
384
385 -void
386 -__collate_lookup(const u_char *t, int *len, int *prim, int *sec)
387 +static struct __collate_st_chain_pri *
388 +chainsearch(const wchar_t *key, int *len, locale_t loc)
389 +{
390 + int low = 0;
391 + int high = __collate_info->chain_count - 1;
392 + int next, compar, l;
393 + struct __collate_st_chain_pri *p;
394 + struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
395 +
396 + while (low <= high) {
397 + next = (low + high) / 2;
398 + p = tab + next;
399 + compar = *key - *p->str;
400 + if (compar == 0) {
401 + l = __collate_wcsnlen(p->str, STR_LEN);
402 + compar = wcsncmp(key, p->str, l);
403 + if (compar == 0) {
404 + *len = l;
405 + return p;
406 + }
407 + }
408 + if (compar > 0)
409 + low = next + 1;
410 + else
411 + high = next - 1;
412 + }
413 + return NULL;
414 +}
415 +
416 +static struct __collate_st_large_char_pri *
417 +largesearch(const wchar_t key, locale_t loc)
418 +{
419 + int low = 0;
420 + int high = __collate_info->large_pri_count - 1;
421 + int next, compar;
422 + struct __collate_st_large_char_pri *p;
423 + struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
424 +
425 + while (low <= high) {
426 + next = (low + high) / 2;
427 + p = tab + next;
428 + compar = key - p->val;
429 + if (compar == 0)
430 + return p;
431 + if (compar > 0)
432 + low = next + 1;
433 + else
434 + high = next - 1;
435 + }
436 + return NULL;
437 +}
438 +
439 +__private_extern__ void
440 +__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc)
441 {
442 struct __collate_st_chain_pri *p2;
443 + int l;
444
445 *len = 1;
446 *prim = *sec = 0;
447 - for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
448 - if (*t == p2->str[0] &&
449 - strncmp(t, p2->str, strlen(p2->str)) == 0) {
450 - *len = strlen(p2->str);
451 - *prim = p2->prim;
452 - *sec = p2->sec;
453 + p2 = chainsearch(t, &l, loc);
454 + /* use the chain if prim >= 0 */
455 + if (p2 && p2->pri[0] >= 0) {
456 + *len = l;
457 + *prim = p2->pri[0];
458 + *sec = p2->pri[1];
459 + return;
460 + }
461 + if (*t <= UCHAR_MAX) {
462 + *prim = __collate_char_pri_table[*t].pri[0];
463 + *sec = __collate_char_pri_table[*t].pri[1];
464 + return;
465 + }
466 + if (__collate_info->large_pri_count > 0) {
467 + struct __collate_st_large_char_pri *match;
468 + match = largesearch(*t, loc);
469 + if (match) {
470 + *prim = match->pri.pri[0];
471 + *sec = match->pri.pri[1];
472 + return;
473 + }
474 + }
475 + *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
476 + *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
477 +}
478 +
479 +/*
480 + * This is only provided for programs (like grep) that are calling this
481 + * private function. This will go away eventually.
482 + */
483 +void
484 +__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec)
485 +{
486 + locale_t loc = __current_locale();
487 + wchar_t *w = __collate_mbstowcs((const char *)t, loc);
488 + int sverrno;
489 +
490 + __collate_lookup_l(w, len, prim, sec, loc);
491 + sverrno = errno;
492 + free(w);
493 + errno = sverrno;
494 +}
495 +
496 +__private_extern__ void
497 +__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc)
498 +{
499 + struct __collate_st_chain_pri *p2;
500 + int p, l;
501 +
502 + *len = 1;
503 + *pri = 0;
504 + p2 = chainsearch(t, &l, loc);
505 + if (p2) {
506 + p = p2->pri[which];
507 + /* use the chain if pri >= 0 */
508 + if (p >= 0) {
509 + *len = l;
510 + *pri = p;
511 return;
512 }
513 }
514 - *prim = __collate_char_pri_table[*t].prim;
515 - *sec = __collate_char_pri_table[*t].sec;
516 + if (*t <= UCHAR_MAX) {
517 + *pri = __collate_char_pri_table[*t].pri[which];
518 + return;
519 + }
520 + if (__collate_info->large_pri_count > 0) {
521 + struct __collate_st_large_char_pri *match;
522 + match = largesearch(*t, loc);
523 + if (match) {
524 + *pri = match->pri.pri[which];
525 + return;
526 + }
527 + }
528 + *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
529 +}
530 +
531 +__private_extern__ wchar_t *
532 +__collate_mbstowcs(const char *s, locale_t loc)
533 +{
534 + static const mbstate_t initial;
535 + mbstate_t st;
536 + size_t len;
537 + const char *ss;
538 + wchar_t *wcs;
539 +
540 + ss = s;
541 + st = initial;
542 + if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1)
543 + return NULL;
544 + if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
545 + __collate_err(EX_OSERR, __func__);
546 + st = initial;
547 + mbsrtowcs_l(wcs, &s, len, &st, loc);
548 + wcs[len] = 0;
549 +
550 + return (wcs);
551 }
552
553 -u_char *
554 -__collate_strdup(u_char *s)
555 +__private_extern__ wchar_t *
556 +__collate_wcsdup(const wchar_t *s)
557 {
558 - u_char *t = strdup(s);
559 + size_t len = wcslen(s) + 1;
560 + wchar_t *wcs;
561
562 - if (t == NULL)
563 + if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
564 __collate_err(EX_OSERR, __func__);
565 - return (t);
566 + wcscpy(wcs, s);
567 + return (wcs);
568 }
569
570 -void
571 +__private_extern__ void
572 +__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc)
573 +{
574 + int pri, len;
575 + size_t slen;
576 + const wchar_t *t;
577 + wchar_t *tt = NULL, *tr = NULL;
578 + int direc, pass;
579 + wchar_t *xfp;
580 + struct __collate_st_info *info = __collate_info;
581 + int sverrno;
582 +
583 + for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
584 + xf[pass] = NULL;
585 + for(pass = 0; pass < info->directive_count; pass++) {
586 + direc = info->directive[pass];
587 + if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
588 + sverrno = errno;
589 + free(tt);
590 + errno = sverrno;
591 + tt = __collate_substitute(src, pass, loc);
592 + }
593 + if (direc & DIRECTIVE_BACKWARD) {
594 + wchar_t *bp, *fp, c;
595 + sverrno = errno;
596 + free(tr);
597 + errno = sverrno;
598 + tr = __collate_wcsdup(tt ? tt : src);
599 + bp = tr;
600 + fp = tr + wcslen(tr) - 1;
601 + while(bp < fp) {
602 + c = *bp;
603 + *bp++ = *fp;
604 + *fp-- = c;
605 + }
606 + t = (const wchar_t *)tr;
607 + } else if (tt)
608 + t = (const wchar_t *)tt;
609 + else
610 + t = (const wchar_t *)src;
611 + sverrno = errno;
612 + if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) {
613 + errno = sverrno;
614 + slen = 0;
615 + goto end;
616 + }
617 + errno = sverrno;
618 + xfp = xf[pass];
619 + if (direc & DIRECTIVE_POSITION) {
620 + while(*t) {
621 + __collate_lookup_which(t, &len, &pri, pass, loc);
622 + t += len;
623 + if (pri <= 0) {
624 + if (pri < 0) {
625 + errno = EINVAL;
626 + slen = 0;
627 + goto end;
628 + }
629 + pri = COLLATE_MAX_PRIORITY;
630 + }
631 + *xfp++ = pri;
632 + }
633 + } else {
634 + while(*t) {
635 + __collate_lookup_which(t, &len, &pri, pass, loc);
636 + t += len;
637 + if (pri <= 0) {
638 + if (pri < 0) {
639 + errno = EINVAL;
640 + slen = 0;
641 + goto end;
642 + }
643 + continue;
644 + }
645 + *xfp++ = pri;
646 + }
647 + }
648 + *xfp = 0;
649 + }
650 + end:
651 + sverrno = errno;
652 + free(tt);
653 + free(tr);
654 + errno = sverrno;
655 +}
656 +
657 +__private_extern__ void
658 __collate_err(int ex, const char *f)
659 {
660 const char *s;
661 @@ -275,24 +610,345 @@ __collate_err(int ex, const char *f)
662 exit(ex);
663 }
664
665 +/*
666 + * __collate_collating_symbol takes the multibyte string specified by
667 + * src and slen, and using ps, converts that to a wide character. Then
668 + * it is checked to verify it is a collating symbol, and then copies
669 + * it to the wide character string specified by dst and dlen (the
670 + * results are not null terminated). The length of the wide characters
671 + * copied to dst is returned if successful. Zero is returned if no such
672 + * collating symbol exists. (size_t)-1 is returned if there are wide-character
673 + * conversion errors, if the length of the converted string is greater that
674 + * STR_LEN or if dlen is too small. It is up to the calling routine to
675 + * preserve the mbstate_t structure as needed.
676 + */
677 +__private_extern__ size_t
678 +__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc)
679 +{
680 + wchar_t wname[STR_LEN];
681 + wchar_t w, *wp;
682 + size_t len, l;
683 +
684 + /* POSIX locale */
685 + if (loc->__collate_load_error) {
686 + if (dlen < 1)
687 + return (size_t)-1;
688 + if (slen != 1 || !isascii(*src))
689 + return 0;
690 + *dst = *src;
691 + return 1;
692 + }
693 + for(wp = wname, len = 0; slen > 0; len++) {
694 + l = mbrtowc_l(&w, src, slen, ps, loc);
695 + if (l == (size_t)-1 || l == (size_t)-2)
696 + return (size_t)-1;
697 + if (l == 0)
698 + break;
699 + if (len >= STR_LEN)
700 + return -1;
701 + *wp++ = w;
702 + src += l;
703 + slen = (long)slen - (long)l;
704 + }
705 + if (len == 0 || len > dlen)
706 + return (size_t)-1;
707 + if (len == 1) {
708 + if (*wname <= UCHAR_MAX) {
709 + if (__collate_char_pri_table[*wname].pri[0] >= 0) {
710 + if (dlen > 0)
711 + *dst = *wname;
712 + return 1;
713 + }
714 + return 0;
715 + } else if (__collate_info->large_pri_count > 0) {
716 + struct __collate_st_large_char_pri *match;
717 + match = largesearch(*wname, loc);
718 + if (match && match->pri.pri[0] >= 0) {
719 + if (dlen > 0)
720 + *dst = *wname;
721 + return 1;
722 + }
723 + }
724 + return 0;
725 + }
726 + *wp = 0;
727 + if (__collate_info->chain_count > 0) {
728 + struct __collate_st_chain_pri *match;
729 + int ll;
730 + match = chainsearch(wname, &ll, loc);
731 + if (match) {
732 + if (ll < dlen)
733 + dlen = ll;
734 + wcsncpy(dst, wname, dlen);
735 + return ll;
736 + }
737 + }
738 + return 0;
739 +}
740 +
741 +/*
742 + * __collate_equiv_class returns the equivalence class number for the symbol
743 + * specified by src and slen, using ps to convert from multi-byte to wide
744 + * character. Zero is returned if the symbol is not in an equivalence
745 + * class. -1 is returned if there are wide character conversion error,
746 + * if there are any greater-than-8-bit characters or if a multi-byte symbol
747 + * is greater or equal to STR_LEN in length. It is up to the calling
748 + * routine to preserve the mbstate_t structure as needed.
749 + */
750 +__private_extern__ int
751 +__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc)
752 +{
753 + wchar_t wname[STR_LEN];
754 + wchar_t w, *wp;
755 + size_t len, l;
756 + int e;
757 +
758 + /* POSIX locale */
759 + if (loc->__collate_load_error)
760 + return 0;
761 + for(wp = wname, len = 0; slen > 0; len++) {
762 + l = mbrtowc_l(&w, src, slen, ps, loc);
763 + if (l == (size_t)-1 || l == (size_t)-2)
764 + return -1;
765 + if (l == 0)
766 + break;
767 + if (len >= STR_LEN)
768 + return -1;
769 + *wp++ = w;
770 + src += l;
771 + slen = (long)slen - (long)l;
772 + }
773 + if (len == 0)
774 + return -1;
775 + if (len == 1) {
776 + e = -1;
777 + if (*wname <= UCHAR_MAX)
778 + e = __collate_char_pri_table[*wname].pri[0];
779 + else if (__collate_info->large_pri_count > 0) {
780 + struct __collate_st_large_char_pri *match;
781 + match = largesearch(*wname, loc);
782 + if (match)
783 + e = match->pri.pri[0];
784 + }
785 + if (e == 0)
786 + return IGNORE_EQUIV_CLASS;
787 + return e > 0 ? e : 0;
788 + }
789 + *wp = 0;
790 + if (__collate_info->chain_count > 0) {
791 + struct __collate_st_chain_pri *match;
792 + int ll;
793 + match = chainsearch(wname, &ll, loc);
794 + if (match) {
795 + e = match->pri[0];
796 + if (e == 0)
797 + return IGNORE_EQUIV_CLASS;
798 + return e < 0 ? -e : e;
799 + }
800 + }
801 + return 0;
802 +}
803 +
804 +/*
805 + * __collate_equiv_match tries to match any single or multi-character symbol
806 + * in equivalence class equiv_class in the multi-byte string specified by src
807 + * and slen. If start is non-zero, it is taken to be the first (pre-converted)
808 + * wide character. Subsequence wide characters, if needed, will use ps in
809 + * the conversion. On a successful match, the length of the matched string
810 + * is returned (including the start character). If dst is non-NULL, the
811 + * matched wide-character string is copied to dst, a wide character array of
812 + * length dlen (the results are not zero-terminated). If rlen is non-NULL,
813 + * the number of character in src actually used is returned. Zero is
814 + * returned by __collate_equiv_match if there is no match. (size_t)-1 is
815 + * returned on error: if there were conversion errors or if dlen is too small
816 + * to accept the results. On no match or error, ps is restored to its incoming
817 + * state.
818 + */
819 +size_t
820 +__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc)
821 +{
822 + wchar_t w;
823 + size_t len, l, clen;
824 + int i;
825 + wchar_t buf[STR_LEN], *wp;
826 + mbstate_t save;
827 + const char *s = src;
828 + size_t sl = slen;
829 + struct __collate_st_chain_pri *ch = NULL;
830 +
831 + /* POSIX locale */
832 + if (loc->__collate_load_error)
833 + return (size_t)-1;
834 + if (equiv_class == IGNORE_EQUIV_CLASS)
835 + equiv_class = 0;
836 + if (ps)
837 + save = *ps;
838 + wp = buf;
839 + len = clen = 0;
840 + if (start) {
841 + *wp++ = start;
842 + len = 1;
843 + }
844 + /* convert up to the max chain length */
845 + while(sl > 0 && len < __collate_info->chain_max_len) {
846 + l = mbrtowc_l(&w, s, sl, ps, loc);
847 + if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
848 + break;
849 + *wp++ = w;
850 + s += l;
851 + clen += l;
852 + sl -= l;
853 + len++;
854 + }
855 + *wp = 0;
856 + if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) {
857 + int e = ch->pri[0];
858 + if (e < 0)
859 + e = -e;
860 + if (e == equiv_class)
861 + goto found;
862 + }
863 + /* try single character */
864 + i = 1;
865 + if (*buf <= UCHAR_MAX) {
866 + if (equiv_class == __collate_char_pri_table[*buf].pri[0])
867 + goto found;
868 + } else if (__collate_info->large_pri_count > 0) {
869 + struct __collate_st_large_char_pri *match;
870 + match = largesearch(*buf, loc);
871 + if (match && equiv_class == match->pri.pri[0])
872 + goto found;
873 + }
874 + /* no match */
875 + if (ps)
876 + *ps = save;
877 + return 0;
878 +found:
879 + /* if we converted more than we used, restore to initial and reconvert
880 + * up to what did match */
881 + if (i < len) {
882 + len = i;
883 + if (ps)
884 + *ps = save;
885 + if (start)
886 + i--;
887 + clen = 0;
888 + while(i-- > 0) {
889 + l = mbrtowc_l(&w, src, slen, ps, loc);
890 + src += l;
891 + clen += l;
892 + slen -= l;
893 + }
894 + }
895 + if (dst) {
896 + if (dlen < len) {
897 + if (ps)
898 + *ps = save;
899 + return (size_t)-1;
900 + }
901 + for(wp = buf; len > 0; len--)
902 + *dst++ = *wp++;
903 + }
904 + if (rlen)
905 + *rlen = clen;
906 + return len;
907 +}
908 +
909 +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
910 +static void
911 +wntohl(wchar_t *str, int len)
912 +{
913 + for(; *str && len > 0; str++, len--)
914 + *str = ntohl(*str);
915 +}
916 +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
917 +
918 #ifdef COLLATE_DEBUG
919 +static char *
920 +show(int c)
921 +{
922 + static char buf[5];
923 +
924 + if (c >=32 && c <= 126)
925 + sprintf(buf, "'%c' ", c);
926 + else
927 + sprintf(buf, "\\x{%02x}", c);
928 + return buf;
929 +}
930 +
931 +static char *
932 +showwcs(const wchar_t *t, int len)
933 +{
934 + static char buf[64];
935 + char *cp = buf;
936 +
937 + for(; *t && len > 0; len--, t++) {
938 + if (*t >=32 && *t <= 126)
939 + *cp++ = *t;
940 + else {
941 + sprintf(cp, "\\x{%02x}", *t);
942 + cp += strlen(cp);
943 + }
944 + }
945 + *cp = 0;
946 + return buf;
947 +}
948 +
949 void
950 __collate_print_tables()
951 {
952 - int i;
953 - struct __collate_st_chain_pri *p2;
954 + int i, z;
955 + locale_t loc = __current_locale();
956
957 - printf("Substitute table:\n");
958 - for (i = 0; i < UCHAR_MAX + 1; i++)
959 - if (i != *__collate_substitute_table[i])
960 - printf("\t'%c' --> \"%s\"\n", i,
961 - __collate_substitute_table[i]);
962 - printf("Chain priority table:\n");
963 - for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
964 - printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
965 + printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
966 + __collate_info->directive[0], __collate_info->directive[1],
967 + __collate_info->flags, __collate_info->chain_max_len,
968 + __collate_info->directive_count,
969 + __collate_info->undef_pri[0], __collate_info->undef_pri[1],
970 + __collate_info->subst_count[0], __collate_info->subst_count[1],
971 + __collate_info->chain_count, __collate_info->large_pri_count);
972 + for(z = 0; z < __collate_info->directive_count; z++) {
973 + if (__collate_info->subst_count[z] > 0) {
974 + struct __collate_st_subst *p2 = __collate_substitute_table[z];
975 + if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP))
976 + printf("Both substitute tables:\n");
977 + else
978 + printf("Substitute table %d:\n", z);
979 + for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
980 + printf("\t%s --> \"%s\"\n",
981 + show(p2->val),
982 + showwcs(p2->str, STR_LEN));
983 + }
984 + }
985 + if (__collate_info->chain_count > 0) {
986 + printf("Chain priority table:\n");
987 + struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
988 + for (i = __collate_info->chain_count; i-- > 0; p2++) {
989 + printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
990 + for(z = 0; z < __collate_info->directive_count; z++)
991 + printf(" %d", p2->pri[z]);
992 + putchar('\n');
993 + }
994 + }
995 printf("Char priority table:\n");
996 - for (i = 0; i < UCHAR_MAX + 1; i++)
997 - printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
998 - __collate_char_pri_table[i].sec);
999 + {
1000 + struct __collate_st_char_pri *p2 = __collate_char_pri_table;
1001 + for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
1002 + printf("\t%s :", show(i));
1003 + for(z = 0; z < __collate_info->directive_count; z++)
1004 + printf(" %d", p2->pri[z]);
1005 + putchar('\n');
1006 + }
1007 + }
1008 + if (__collate_info->large_pri_count > 0) {
1009 + struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
1010 + printf("Large priority table:\n");
1011 + for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
1012 + printf("\t%s :", show(p2->val));
1013 + for(z = 0; z < __collate_info->directive_count; z++)
1014 + printf(" %d", p2->pri.pri[z]);
1015 + putchar('\n');
1016 + }
1017 + }
1018 }
1019 #endif