]> git.saurik.com Git - apple/libc.git/blob - locale/FreeBSD/collate.c.patch
Libc-594.9.5.tar.gz
[apple/libc.git] / locale / FreeBSD / collate.c.patch
1 --- collate.c.orig 2004-11-25 11:38:16.000000000 -0800
2 +++ collate.c 2005-10-20 01:00:19.000000000 -0700
3 @@ -28,14 +28,26 @@
4 #include <sys/cdefs.h>
5 __FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.33 2004/09/22 16:56:48 stefanf Exp $");
6
7 +#include "xlocale_private.h"
8 +/* assumes the locale_t variable is named loc */
9 +#define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table)
10 +#define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table)
11 +#define __collate_char_pri_table (loc->__lc_collate->__char_pri_table)
12 +#define __collate_info (&loc->__lc_collate->__info)
13 +#define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table)
14 +#define __collate_substitute_table (loc->__lc_collate->__substitute_table)
15 +
16 #include "namespace.h"
17 #include <arpa/inet.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 +#include <stddef.h>
21 #include <string.h>
22 +#include <wchar.h>
23 #include <errno.h>
24 #include <unistd.h>
25 #include <sysexits.h>
26 +#include <ctype.h>
27 #include "un-namespace.h"
28
29 #include "collate.h"
30 @@ -44,36 +56,50 @@
31
32 #include "libc_private.h"
33
34 -int __collate_load_error = 1;
35 -int __collate_substitute_nontrivial;
36 -
37 -u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN];
38 -struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
39 -struct __collate_st_chain_pri *__collate_chain_pri_table;
40 -
41 +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
42 +static void wntohl(wchar_t *, int);
43 +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
44 void __collate_err(int ex, const char *f) __dead2;
45
46 -int
47 -__collate_load_tables(const char *encoding)
48 +/*
49 + * Normally, the __collate_* routines should all be __private_extern__,
50 + * but grep is using them (3715846). Until we can provide an alternative,
51 + * we leave them public, and provide a read-only __collate_load_error variable
52 + */
53 +#undef __collate_load_error
54 +int __collate_load_error = 1;
55 +
56 +__private_extern__ int
57 +__collate_load_tables(const char *encoding, locale_t loc)
58 {
59 FILE *fp;
60 - int i, saverr, chains;
61 - uint32_t u32;
62 + int i, saverr, chains, z;
63 char strbuf[STR_LEN], buf[PATH_MAX];
64 - void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
65 - static char collate_encoding[ENCODING_LEN + 1];
66 + struct __xlocale_st_collate *TMP;
67 + static struct __xlocale_st_collate *cache = NULL;
68 + struct __collate_st_info info;
69 + void *vp;
70
71 /* 'encoding' must be already checked. */
72 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
73 - __collate_load_error = 1;
74 + loc->__collate_load_error = 1;
75 + if (loc == &__global_locale)
76 + __collate_load_error = 1;
77 + XL_RELEASE(loc->__lc_collate);
78 + loc->__lc_collate = NULL;
79 return (_LDP_CACHE);
80 }
81
82 /*
83 * If the locale name is the same as our cache, use the cache.
84 */
85 - if (strcmp(encoding, collate_encoding) == 0) {
86 - __collate_load_error = 0;
87 + if (cache && strcmp(encoding, cache->__encoding) == 0) {
88 + loc->__collate_load_error = 0;
89 + if (loc == &__global_locale)
90 + __collate_load_error = 0;
91 + XL_RELEASE(loc->__lc_collate);
92 + loc->__lc_collate = cache;
93 + XL_RETAIN(loc->__lc_collate);
94 return (_LDP_CACHE);
95 }
96
97 @@ -97,9 +123,7 @@
98 return (_LDP_ERROR);
99 }
100 chains = -1;
101 - if (strcmp(strbuf, COLLATE_VERSION) == 0)
102 - chains = 0;
103 - else if (strcmp(strbuf, COLLATE_VERSION1_1) == 0)
104 + if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0)
105 chains = 1;
106 if (chains < 0) {
107 (void)fclose(fp);
108 @@ -107,13 +131,21 @@
109 return (_LDP_ERROR);
110 }
111 if (chains) {
112 - if (fread(&u32, sizeof(u32), 1, fp) != 1) {
113 + if (fread(&info, sizeof(info), 1, fp) != 1) {
114 saverr = errno;
115 (void)fclose(fp);
116 errno = saverr;
117 return (_LDP_ERROR);
118 }
119 - if ((chains = (int)ntohl(u32)) < 1) {
120 +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
121 + for(z = 0; z < info.directive_count; z++) {
122 + info.undef_pri[z] = ntohl(info.undef_pri[z]);
123 + info.subst_count[z] = ntohl(info.subst_count[z]);
124 + }
125 + info.chain_count = ntohl(info.chain_count);
126 + info.large_pri_count = ntohl(info.large_pri_count);
127 +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
128 + if ((chains = info.chain_count) < 0) {
129 (void)fclose(fp);
130 errno = EFTYPE;
131 return (_LDP_ERROR);
132 @@ -121,136 +153,446 @@
133 } else
134 chains = TABLE_SIZE;
135
136 - if ((TMP_substitute_table =
137 - malloc(sizeof(__collate_substitute_table))) == NULL) {
138 - saverr = errno;
139 - (void)fclose(fp);
140 - errno = saverr;
141 - return (_LDP_ERROR);
142 - }
143 - if ((TMP_char_pri_table =
144 - malloc(sizeof(__collate_char_pri_table))) == NULL) {
145 - saverr = errno;
146 - free(TMP_substitute_table);
147 - (void)fclose(fp);
148 - errno = saverr;
149 - return (_LDP_ERROR);
150 - }
151 - if ((TMP_chain_pri_table =
152 - malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
153 + i = sizeof(struct __xlocale_st_collate)
154 + + sizeof(struct __collate_st_chain_pri) * chains
155 + + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
156 + for(z = 0; z < info.directive_count; z++)
157 + i += sizeof(struct __collate_st_subst) * info.subst_count[z];
158 + if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) {
159 saverr = errno;
160 - free(TMP_substitute_table);
161 - free(TMP_char_pri_table);
162 (void)fclose(fp);
163 errno = saverr;
164 return (_LDP_ERROR);
165 }
166 + TMP->__refcount = 2; /* one for the locale, one for the cache */
167 + TMP->__free_extra = NULL;
168
169 #define FREAD(a, b, c, d) \
170 { \
171 if (fread(a, b, c, d) != c) { \
172 saverr = errno; \
173 - free(TMP_substitute_table); \
174 - free(TMP_char_pri_table); \
175 - free(TMP_chain_pri_table); \
176 + free(TMP); \
177 (void)fclose(d); \
178 errno = saverr; \
179 return (_LDP_ERROR); \
180 } \
181 }
182
183 - FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
184 - FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
185 - FREAD(TMP_chain_pri_table,
186 - sizeof(*__collate_chain_pri_table), chains, fp);
187 + /* adjust size to read the remaining in one chunk */
188 + i -= offsetof(struct __xlocale_st_collate, __char_pri_table);
189 + FREAD(TMP->__char_pri_table, i, 1, fp);
190 (void)fclose(fp);
191
192 - (void)strcpy(collate_encoding, encoding);
193 - if (__collate_substitute_table_ptr != NULL)
194 - free(__collate_substitute_table_ptr);
195 - __collate_substitute_table_ptr = TMP_substitute_table;
196 - if (__collate_char_pri_table_ptr != NULL)
197 - free(__collate_char_pri_table_ptr);
198 - __collate_char_pri_table_ptr = TMP_char_pri_table;
199 - if (__collate_chain_pri_table != NULL)
200 - free(__collate_chain_pri_table);
201 - __collate_chain_pri_table = TMP_chain_pri_table;
202 -
203 - __collate_substitute_nontrivial = 0;
204 - for (i = 0; i < UCHAR_MAX + 1; i++) {
205 - if (__collate_substitute_table[i][0] != i ||
206 - __collate_substitute_table[i][1] != 0) {
207 - __collate_substitute_nontrivial = 1;
208 - break;
209 + vp = (void *)(TMP + 1);
210 +
211 + /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */
212 + if (info.subst_count[0] > 0) {
213 + TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
214 + vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
215 + } else
216 + TMP->__substitute_table[0] = NULL;
217 + if (info.flags & COLLATE_SUBST_DUP)
218 + TMP->__substitute_table[1] = TMP->__substitute_table[0];
219 + else if (info.subst_count[1] > 0) {
220 + TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
221 + vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
222 + } else
223 + TMP->__substitute_table[1] = NULL;
224 +
225 + if (chains > 0) {
226 + TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
227 + vp += chains * sizeof(struct __collate_st_chain_pri);
228 + } else
229 + TMP->__chain_pri_table = NULL;
230 + if (info.large_pri_count > 0)
231 + TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp;
232 + else
233 + TMP->__large_char_pri_table = NULL;
234 +
235 +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
236 + {
237 + struct __collate_st_char_pri *p = TMP->__char_pri_table;
238 + for(i = UCHAR_MAX + 1; i-- > 0; p++) {
239 + for(z = 0; z < info.directive_count; z++)
240 + p->pri[z] = ntohl(p->pri[z]);
241 }
242 }
243 - __collate_load_error = 0;
244 + for(z = 0; z < info.directive_count; z++)
245 + if (info.subst_count[z] > 0) {
246 + struct __collate_st_subst *p = TMP->__substitute_table[z];
247 + for(i = info.subst_count[z]; i-- > 0; p++) {
248 + p->val = ntohl(p->val);
249 + wntohl(p->str, STR_LEN);
250 + }
251 + }
252 + {
253 + struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
254 + for(i = chains; i-- > 0; p++) {
255 + wntohl(p->str, STR_LEN);
256 + for(z = 0; z < info.directive_count; z++)
257 + p->pri[z] = ntohl(p->pri[z]);
258 + }
259 + }
260 + if (info.large_pri_count > 0) {
261 + struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table;
262 + for(i = info.large_pri_count; i-- > 0; p++) {
263 + p->val = ntohl(p->val);
264 + for(z = 0; z < info.directive_count; z++)
265 + p->pri.pri[z] = ntohl(p->pri.pri[z]);
266 + }
267 + }
268 +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
269 + (void)strcpy(TMP->__encoding, encoding);
270 + (void)memcpy(&TMP->__info, &info, sizeof(info));
271 + XL_RELEASE(cache);
272 + cache = TMP;
273 + XL_RELEASE(loc->__lc_collate);
274 + loc->__lc_collate = cache;
275 + /* no need to retain, since we set __refcount to 2 above */
276 +
277 + loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0);
278 + loc->__collate_load_error = 0;
279 + if (loc == &__global_locale)
280 + __collate_load_error = 0;
281
282 return (_LDP_LOADED);
283 }
284
285 -u_char *
286 -__collate_substitute(s)
287 - const u_char *s;
288 +static int
289 +__collate_wcsnlen(const wchar_t *s, int len)
290 +{
291 + int n = 0;
292 + while (*s && n < len) {
293 + s++;
294 + n++;
295 + }
296 + return n;
297 +}
298 +
299 +static struct __collate_st_subst *
300 +substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
301 +{
302 + int low = 0;
303 + int high = n - 1;
304 + int next, compar;
305 + struct __collate_st_subst *p;
306 +
307 + while (low <= high) {
308 + next = (low + high) / 2;
309 + p = tab + next;
310 + compar = key - p->val;
311 + if (compar == 0)
312 + return p;
313 + if (compar > 0)
314 + low = next + 1;
315 + else
316 + high = next - 1;
317 + }
318 + return NULL;
319 +}
320 +
321 +__private_extern__ wchar_t *
322 +__collate_substitute(const wchar_t *s, int which, locale_t loc)
323 {
324 int dest_len, len, nlen;
325 - int delta = strlen(s);
326 - u_char *dest_str = NULL;
327 + int n, delta, nsubst;
328 + wchar_t *dest_str = NULL;
329 + const wchar_t *fp;
330 + struct __collate_st_subst *subst, *match;
331
332 if (s == NULL || *s == '\0')
333 - return (__collate_strdup(""));
334 - delta += delta / 8;
335 - dest_str = malloc(dest_len = delta);
336 + return (__collate_wcsdup(L""));
337 + dest_len = wcslen(s);
338 + nsubst = __collate_info->subst_count[which];
339 + if (nsubst <= 0)
340 + return __collate_wcsdup(s);
341 + subst = __collate_substitute_table[which];
342 + delta = dest_len / 4;
343 + if (delta < 2)
344 + delta = 2;
345 + dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
346 if (dest_str == NULL)
347 __collate_err(EX_OSERR, __func__);
348 len = 0;
349 while (*s) {
350 - nlen = len + strlen(__collate_substitute_table[*s]);
351 + if ((match = substsearch(*s, subst, nsubst)) != NULL) {
352 + fp = match->str;
353 + n = __collate_wcsnlen(fp, STR_LEN);
354 + } else {
355 + fp = s;
356 + n = 1;
357 + }
358 + nlen = len + n;
359 if (dest_len <= nlen) {
360 - dest_str = reallocf(dest_str, dest_len = nlen + delta);
361 + dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
362 if (dest_str == NULL)
363 __collate_err(EX_OSERR, __func__);
364 }
365 - (void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
366 - len = nlen;
367 + wcsncpy(dest_str + len, fp, n);
368 + len += n;
369 + s++;
370 }
371 + dest_str[len] = 0;
372 return (dest_str);
373 }
374
375 -void
376 -__collate_lookup(t, len, prim, sec)
377 - const u_char *t;
378 - int *len, *prim, *sec;
379 +static struct __collate_st_chain_pri *
380 +chainsearch(const wchar_t *key, int *len, locale_t loc)
381 +{
382 + int low = 0;
383 + int high = __collate_info->chain_count - 1;
384 + int next, compar, l;
385 + struct __collate_st_chain_pri *p;
386 + struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
387 +
388 + while (low <= high) {
389 + next = (low + high) / 2;
390 + p = tab + next;
391 + compar = *key - *p->str;
392 + if (compar == 0) {
393 + l = __collate_wcsnlen(p->str, STR_LEN);
394 + compar = wcsncmp(key, p->str, l);
395 + if (compar == 0) {
396 + *len = l;
397 + return p;
398 + }
399 + }
400 + if (compar > 0)
401 + low = next + 1;
402 + else
403 + high = next - 1;
404 + }
405 + return NULL;
406 +}
407 +
408 +static struct __collate_st_large_char_pri *
409 +largesearch(const wchar_t key, locale_t loc)
410 +{
411 + int low = 0;
412 + int high = __collate_info->large_pri_count - 1;
413 + int next, compar;
414 + struct __collate_st_large_char_pri *p;
415 + struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
416 +
417 + while (low <= high) {
418 + next = (low + high) / 2;
419 + p = tab + next;
420 + compar = key - p->val;
421 + if (compar == 0)
422 + return p;
423 + if (compar > 0)
424 + low = next + 1;
425 + else
426 + high = next - 1;
427 + }
428 + return NULL;
429 +}
430 +
431 +__private_extern__ void
432 +__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc)
433 {
434 struct __collate_st_chain_pri *p2;
435 + int l;
436
437 *len = 1;
438 *prim = *sec = 0;
439 - for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
440 - if (*t == p2->str[0] &&
441 - strncmp(t, p2->str, strlen(p2->str)) == 0) {
442 - *len = strlen(p2->str);
443 - *prim = p2->prim;
444 - *sec = p2->sec;
445 + p2 = chainsearch(t, &l, loc);
446 + /* use the chain if prim >= 0 */
447 + if (p2 && p2->pri[0] >= 0) {
448 + *len = l;
449 + *prim = p2->pri[0];
450 + *sec = p2->pri[1];
451 + return;
452 + }
453 + if (*t <= UCHAR_MAX) {
454 + *prim = __collate_char_pri_table[*t].pri[0];
455 + *sec = __collate_char_pri_table[*t].pri[1];
456 + return;
457 + }
458 + if (__collate_info->large_pri_count > 0) {
459 + struct __collate_st_large_char_pri *match;
460 + match = largesearch(*t, loc);
461 + if (match) {
462 + *prim = match->pri.pri[0];
463 + *sec = match->pri.pri[1];
464 + return;
465 + }
466 + }
467 + *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
468 + *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
469 +}
470 +
471 +/*
472 + * This is only provided for programs (like grep) that are calling this
473 + * private function. This will go away eventually.
474 + */
475 +void
476 +__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec)
477 +{
478 + locale_t loc = __current_locale();
479 + wchar_t *w = __collate_mbstowcs((const char *)t, loc);
480 + int sverrno;
481 +
482 + __collate_lookup_l(w, len, prim, sec, loc);
483 + sverrno = errno;
484 + free(w);
485 + errno = sverrno;
486 +}
487 +
488 +__private_extern__ void
489 +__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc)
490 +{
491 + struct __collate_st_chain_pri *p2;
492 + int p, l;
493 +
494 + *len = 1;
495 + *pri = 0;
496 + p2 = chainsearch(t, &l, loc);
497 + if (p2) {
498 + p = p2->pri[which];
499 + /* use the chain if pri >= 0 */
500 + if (p >= 0) {
501 + *len = l;
502 + *pri = p;
503 + return;
504 + }
505 + }
506 + if (*t <= UCHAR_MAX) {
507 + *pri = __collate_char_pri_table[*t].pri[which];
508 + return;
509 + }
510 + if (__collate_info->large_pri_count > 0) {
511 + struct __collate_st_large_char_pri *match;
512 + match = largesearch(*t, loc);
513 + if (match) {
514 + *pri = match->pri.pri[which];
515 return;
516 }
517 }
518 - *prim = __collate_char_pri_table[*t].prim;
519 - *sec = __collate_char_pri_table[*t].sec;
520 + *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
521 }
522
523 -u_char *
524 -__collate_strdup(s)
525 - u_char *s;
526 +__private_extern__ wchar_t *
527 +__collate_mbstowcs(const char *s, locale_t loc)
528 {
529 - u_char *t = strdup(s);
530 + static const mbstate_t initial;
531 + mbstate_t st;
532 + size_t len;
533 + const char *ss;
534 + wchar_t *wcs;
535 +
536 + ss = s;
537 + st = initial;
538 + if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1)
539 + return NULL;
540 + if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
541 + __collate_err(EX_OSERR, __func__);
542 + st = initial;
543 + mbsrtowcs_l(wcs, &s, len, &st, loc);
544 + wcs[len] = 0;
545
546 - if (t == NULL)
547 + return (wcs);
548 +}
549 +
550 +__private_extern__ wchar_t *
551 +__collate_wcsdup(const wchar_t *s)
552 +{
553 + size_t len = wcslen(s) + 1;
554 + wchar_t *wcs;
555 +
556 + if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
557 __collate_err(EX_OSERR, __func__);
558 - return (t);
559 + wcscpy(wcs, s);
560 + return (wcs);
561 }
562
563 -void
564 +__private_extern__ void
565 +__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc)
566 +{
567 + int pri, len;
568 + size_t slen;
569 + const wchar_t *t;
570 + wchar_t *tt = NULL, *tr = NULL;
571 + int direc, pass;
572 + wchar_t *xfp;
573 + struct __collate_st_info *info = __collate_info;
574 + int sverrno;
575 +
576 + for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
577 + xf[pass] = NULL;
578 + for(pass = 0; pass < info->directive_count; pass++) {
579 + direc = info->directive[pass];
580 + if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
581 + sverrno = errno;
582 + free(tt);
583 + errno = sverrno;
584 + tt = __collate_substitute(src, pass, loc);
585 + }
586 + if (direc & DIRECTIVE_BACKWARD) {
587 + wchar_t *bp, *fp, c;
588 + sverrno = errno;
589 + free(tr);
590 + errno = sverrno;
591 + tr = __collate_wcsdup(tt ? tt : src);
592 + bp = tr;
593 + fp = tr + wcslen(tr) - 1;
594 + while(bp < fp) {
595 + c = *bp;
596 + *bp++ = *fp;
597 + *fp-- = c;
598 + }
599 + t = (const wchar_t *)tr;
600 + } else if (tt)
601 + t = (const wchar_t *)tt;
602 + else
603 + t = (const wchar_t *)src;
604 + sverrno = errno;
605 + if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) {
606 + errno = sverrno;
607 + slen = 0;
608 + goto end;
609 + }
610 + errno = sverrno;
611 + xfp = xf[pass];
612 + if (direc & DIRECTIVE_POSITION) {
613 + while(*t) {
614 + __collate_lookup_which(t, &len, &pri, pass, loc);
615 + t += len;
616 + if (pri <= 0) {
617 + if (pri < 0) {
618 + errno = EINVAL;
619 + slen = 0;
620 + goto end;
621 + }
622 + pri = COLLATE_MAX_PRIORITY;
623 + }
624 + *xfp++ = pri;
625 + }
626 + } else {
627 + while(*t) {
628 + __collate_lookup_which(t, &len, &pri, pass, loc);
629 + t += len;
630 + if (pri <= 0) {
631 + if (pri < 0) {
632 + errno = EINVAL;
633 + slen = 0;
634 + goto end;
635 + }
636 + continue;
637 + }
638 + *xfp++ = pri;
639 + }
640 + }
641 + *xfp = 0;
642 + }
643 + end:
644 + sverrno = errno;
645 + free(tt);
646 + free(tr);
647 + errno = sverrno;
648 +}
649 +
650 +__private_extern__ void
651 __collate_err(int ex, const char *f)
652 {
653 const char *s;
654 @@ -268,24 +610,345 @@
655 exit(ex);
656 }
657
658 +/*
659 + * __collate_collating_symbol takes the multibyte string specified by
660 + * src and slen, and using ps, converts that to a wide character. Then
661 + * it is checked to verify it is a collating symbol, and then copies
662 + * it to the wide character string specified by dst and dlen (the
663 + * results are not null terminated). The length of the wide characters
664 + * copied to dst is returned if successful. Zero is returned if no such
665 + * collating symbol exists. (size_t)-1 is returned if there are wide-character
666 + * conversion errors, if the length of the converted string is greater that
667 + * STR_LEN or if dlen is too small. It is up to the calling routine to
668 + * preserve the mbstate_t structure as needed.
669 + */
670 +__private_extern__ size_t
671 +__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc)
672 +{
673 + wchar_t wname[STR_LEN];
674 + wchar_t w, *wp;
675 + size_t len, l;
676 +
677 + /* POSIX locale */
678 + if (loc->__collate_load_error) {
679 + if (dlen < 1)
680 + return (size_t)-1;
681 + if (slen != 1 || !isascii(*src))
682 + return 0;
683 + *dst = *src;
684 + return 1;
685 + }
686 + for(wp = wname, len = 0; slen > 0; len++) {
687 + l = mbrtowc_l(&w, src, slen, ps, loc);
688 + if (l == (size_t)-1 || l == (size_t)-2)
689 + return (size_t)-1;
690 + if (l == 0)
691 + break;
692 + if (len >= STR_LEN)
693 + return -1;
694 + *wp++ = w;
695 + src += l;
696 + slen = (long)slen - (long)l;
697 + }
698 + if (len == 0 || len > dlen)
699 + return (size_t)-1;
700 + if (len == 1) {
701 + if (*wname <= UCHAR_MAX) {
702 + if (__collate_char_pri_table[*wname].pri[0] >= 0) {
703 + if (dlen > 0)
704 + *dst = *wname;
705 + return 1;
706 + }
707 + return 0;
708 + } else if (__collate_info->large_pri_count > 0) {
709 + struct __collate_st_large_char_pri *match;
710 + match = largesearch(*wname, loc);
711 + if (match && match->pri.pri[0] >= 0) {
712 + if (dlen > 0)
713 + *dst = *wname;
714 + return 1;
715 + }
716 + }
717 + return 0;
718 + }
719 + *wp = 0;
720 + if (__collate_info->chain_count > 0) {
721 + struct __collate_st_chain_pri *match;
722 + int ll;
723 + match = chainsearch(wname, &ll, loc);
724 + if (match) {
725 + if (ll < dlen)
726 + dlen = ll;
727 + wcsncpy(dst, wname, dlen);
728 + return ll;
729 + }
730 + }
731 + return 0;
732 +}
733 +
734 +/*
735 + * __collate_equiv_class returns the equivalence class number for the symbol
736 + * specified by src and slen, using ps to convert from multi-byte to wide
737 + * character. Zero is returned if the symbol is not in an equivalence
738 + * class. -1 is returned if there are wide character conversion error,
739 + * if there are any greater-than-8-bit characters or if a multi-byte symbol
740 + * is greater or equal to STR_LEN in length. It is up to the calling
741 + * routine to preserve the mbstate_t structure as needed.
742 + */
743 +__private_extern__ int
744 +__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc)
745 +{
746 + wchar_t wname[STR_LEN];
747 + wchar_t w, *wp;
748 + size_t len, l;
749 + int e;
750 +
751 + /* POSIX locale */
752 + if (loc->__collate_load_error)
753 + return 0;
754 + for(wp = wname, len = 0; slen > 0; len++) {
755 + l = mbrtowc_l(&w, src, slen, ps, loc);
756 + if (l == (size_t)-1 || l == (size_t)-2)
757 + return -1;
758 + if (l == 0)
759 + break;
760 + if (len >= STR_LEN)
761 + return -1;
762 + *wp++ = w;
763 + src += l;
764 + slen = (long)slen - (long)l;
765 + }
766 + if (len == 0)
767 + return -1;
768 + if (len == 1) {
769 + e = -1;
770 + if (*wname <= UCHAR_MAX)
771 + e = __collate_char_pri_table[*wname].pri[0];
772 + else if (__collate_info->large_pri_count > 0) {
773 + struct __collate_st_large_char_pri *match;
774 + match = largesearch(*wname, loc);
775 + if (match)
776 + e = match->pri.pri[0];
777 + }
778 + if (e == 0)
779 + return IGNORE_EQUIV_CLASS;
780 + return e > 0 ? e : 0;
781 + }
782 + *wp = 0;
783 + if (__collate_info->chain_count > 0) {
784 + struct __collate_st_chain_pri *match;
785 + int ll;
786 + match = chainsearch(wname, &ll, loc);
787 + if (match) {
788 + e = match->pri[0];
789 + if (e == 0)
790 + return IGNORE_EQUIV_CLASS;
791 + return e < 0 ? -e : e;
792 + }
793 + }
794 + return 0;
795 +}
796 +
797 +/*
798 + * __collate_equiv_match tries to match any single or multi-character symbol
799 + * in equivalence class equiv_class in the multi-byte string specified by src
800 + * and slen. If start is non-zero, it is taken to be the first (pre-converted)
801 + * wide character. Subsequence wide characters, if needed, will use ps in
802 + * the conversion. On a successful match, the length of the matched string
803 + * is returned (including the start character). If dst is non-NULL, the
804 + * matched wide-character string is copied to dst, a wide character array of
805 + * length dlen (the results are not zero-terminated). If rlen is non-NULL,
806 + * the number of character in src actually used is returned. Zero is
807 + * returned by __collate_equiv_match if there is no match. (size_t)-1 is
808 + * returned on error: if there were conversion errors or if dlen is too small
809 + * to accept the results. On no match or error, ps is restored to its incoming
810 + * state.
811 + */
812 +size_t
813 +__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc)
814 +{
815 + wchar_t w;
816 + size_t len, l, clen;
817 + int i;
818 + wchar_t buf[STR_LEN], *wp;
819 + mbstate_t save;
820 + const char *s = src;
821 + size_t sl = slen;
822 + struct __collate_st_chain_pri *ch = NULL;
823 +
824 + /* POSIX locale */
825 + if (loc->__collate_load_error)
826 + return (size_t)-1;
827 + if (equiv_class == IGNORE_EQUIV_CLASS)
828 + equiv_class = 0;
829 + if (ps)
830 + save = *ps;
831 + wp = buf;
832 + len = clen = 0;
833 + if (start) {
834 + *wp++ = start;
835 + len = 1;
836 + }
837 + /* convert up to the max chain length */
838 + while(sl > 0 && len < __collate_info->chain_max_len) {
839 + l = mbrtowc_l(&w, s, sl, ps, loc);
840 + if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
841 + break;
842 + *wp++ = w;
843 + s += l;
844 + clen += l;
845 + sl -= l;
846 + len++;
847 + }
848 + *wp = 0;
849 + if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) {
850 + int e = ch->pri[0];
851 + if (e < 0)
852 + e = -e;
853 + if (e == equiv_class)
854 + goto found;
855 + }
856 + /* try single character */
857 + i = 1;
858 + if (*buf <= UCHAR_MAX) {
859 + if (equiv_class == __collate_char_pri_table[*buf].pri[0])
860 + goto found;
861 + } else if (__collate_info->large_pri_count > 0) {
862 + struct __collate_st_large_char_pri *match;
863 + match = largesearch(*buf, loc);
864 + if (match && equiv_class == match->pri.pri[0])
865 + goto found;
866 + }
867 + /* no match */
868 + if (ps)
869 + *ps = save;
870 + return 0;
871 +found:
872 + /* if we converted more than we used, restore to initial and reconvert
873 + * up to what did match */
874 + if (i < len) {
875 + len = i;
876 + if (ps)
877 + *ps = save;
878 + if (start)
879 + i--;
880 + clen = 0;
881 + while(i-- > 0) {
882 + l = mbrtowc_l(&w, src, slen, ps, loc);
883 + src += l;
884 + clen += l;
885 + slen -= l;
886 + }
887 + }
888 + if (dst) {
889 + if (dlen < len) {
890 + if (ps)
891 + *ps = save;
892 + return (size_t)-1;
893 + }
894 + for(wp = buf; len > 0; len--)
895 + *dst++ = *wp++;
896 + }
897 + if (rlen)
898 + *rlen = clen;
899 + return len;
900 +}
901 +
902 +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
903 +static void
904 +wntohl(wchar_t *str, int len)
905 +{
906 + for(; *str && len > 0; str++, len--)
907 + *str = ntohl(*str);
908 +}
909 +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
910 +
911 #ifdef COLLATE_DEBUG
912 +static char *
913 +show(int c)
914 +{
915 + static char buf[5];
916 +
917 + if (c >=32 && c <= 126)
918 + sprintf(buf, "'%c' ", c);
919 + else
920 + sprintf(buf, "\\x{%02x}", c);
921 + return buf;
922 +}
923 +
924 +static char *
925 +showwcs(const wchar_t *t, int len)
926 +{
927 + static char buf[64];
928 + char *cp = buf;
929 +
930 + for(; *t && len > 0; len--, t++) {
931 + if (*t >=32 && *t <= 126)
932 + *cp++ = *t;
933 + else {
934 + sprintf(cp, "\\x{%02x}", *t);
935 + cp += strlen(cp);
936 + }
937 + }
938 + *cp = 0;
939 + return buf;
940 +}
941 +
942 void
943 __collate_print_tables()
944 {
945 - int i;
946 - struct __collate_st_chain_pri *p2;
947 + int i, z;
948 + locale_t loc = __current_locale();
949
950 - printf("Substitute table:\n");
951 - for (i = 0; i < UCHAR_MAX + 1; i++)
952 - if (i != *__collate_substitute_table[i])
953 - printf("\t'%c' --> \"%s\"\n", i,
954 - __collate_substitute_table[i]);
955 - printf("Chain priority table:\n");
956 - for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
957 - printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
958 + printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
959 + __collate_info->directive[0], __collate_info->directive[1],
960 + __collate_info->flags, __collate_info->chain_max_len,
961 + __collate_info->directive_count,
962 + __collate_info->undef_pri[0], __collate_info->undef_pri[1],
963 + __collate_info->subst_count[0], __collate_info->subst_count[1],
964 + __collate_info->chain_count, __collate_info->large_pri_count);
965 + for(z = 0; z < __collate_info->directive_count; z++) {
966 + if (__collate_info->subst_count[z] > 0) {
967 + struct __collate_st_subst *p2 = __collate_substitute_table[z];
968 + if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP))
969 + printf("Both substitute tables:\n");
970 + else
971 + printf("Substitute table %d:\n", z);
972 + for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
973 + printf("\t%s --> \"%s\"\n",
974 + show(p2->val),
975 + showwcs(p2->str, STR_LEN));
976 + }
977 + }
978 + if (__collate_info->chain_count > 0) {
979 + printf("Chain priority table:\n");
980 + struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
981 + for (i = __collate_info->chain_count; i-- > 0; p2++) {
982 + printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
983 + for(z = 0; z < __collate_info->directive_count; z++)
984 + printf(" %d", p2->pri[z]);
985 + putchar('\n');
986 + }
987 + }
988 printf("Char priority table:\n");
989 - for (i = 0; i < UCHAR_MAX + 1; i++)
990 - printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
991 - __collate_char_pri_table[i].sec);
992 + {
993 + struct __collate_st_char_pri *p2 = __collate_char_pri_table;
994 + for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
995 + printf("\t%s :", show(i));
996 + for(z = 0; z < __collate_info->directive_count; z++)
997 + printf(" %d", p2->pri[z]);
998 + putchar('\n');
999 + }
1000 + }
1001 + if (__collate_info->large_pri_count > 0) {
1002 + struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
1003 + printf("Large priority table:\n");
1004 + for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
1005 + printf("\t%s :", show(p2->val));
1006 + for(z = 0; z < __collate_info->directive_count; z++)
1007 + printf(" %d", p2->pri.pri[z]);
1008 + putchar('\n');
1009 + }
1010 + }
1011 }
1012 #endif