]> git.saurik.com Git - apple/libc.git/blob - locale/FreeBSD/collate.c
Libc-1082.50.1.tar.gz
[apple/libc.git] / locale / FreeBSD / collate.c
1 /*-
2 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3 * at Electronni Visti IA, Kiev, Ukraine.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $");
30
31 #include "xlocale_private.h"
32 /* assumes the locale_t variable is named loc */
33 #define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table)
34 #define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table)
35 #define __collate_char_pri_table (loc->__lc_collate->__char_pri_table)
36 #define __collate_info (&loc->__lc_collate->__info)
37 #define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table)
38 #define __collate_substitute_table (loc->__lc_collate->__substitute_table)
39
40 #include "namespace.h"
41 #include <arpa/inet.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stddef.h>
45 #include <string.h>
46 #include <wchar.h>
47 #include <errno.h>
48 #include <unistd.h>
49 #include <sysexits.h>
50 #include <ctype.h>
51 #include "un-namespace.h"
52
53 #include "collate.h"
54 #include "setlocale.h"
55 #include "ldpart.h"
56
57 #include "libc_private.h"
58
59 #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
60 static void wntohl(wchar_t *, int);
61 #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
62 void __collate_err(int ex, const char *f) __dead2;
63
64 /*
65 * Normally, the __collate_* routines should all be __private_extern__,
66 * but grep is using them (3715846). Until we can provide an alternative,
67 * we leave them public, and provide a read-only __collate_load_error variable
68 */
69 #undef __collate_load_error
70 int __collate_load_error = 1;
71
72 __private_extern__ int
73 __collate_load_tables(const char *encoding, locale_t loc)
74 {
75 FILE *fp;
76 int i, saverr, chains, z;
77 char strbuf[STR_LEN], buf[PATH_MAX];
78 struct __xlocale_st_collate *TMP;
79 static struct __xlocale_st_collate *cache = NULL;
80 struct __collate_st_info info;
81 void *vp;
82
83 /* 'encoding' must be already checked. */
84 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
85 loc->__collate_load_error = 1;
86 if (loc == &__global_locale)
87 __collate_load_error = 1;
88 XL_RELEASE(loc->__lc_collate);
89 loc->__lc_collate = NULL;
90 return (_LDP_CACHE);
91 }
92
93 /*
94 * If the locale name is the same as our cache, use the cache.
95 */
96 if (cache && strcmp(encoding, cache->__encoding) == 0) {
97 loc->__collate_load_error = 0;
98 if (loc == &__global_locale)
99 __collate_load_error = 0;
100 XL_RELEASE(loc->__lc_collate);
101 loc->__lc_collate = cache;
102 XL_RETAIN(loc->__lc_collate);
103 return (_LDP_CACHE);
104 }
105
106 /*
107 * Slurp the locale file into the cache.
108 */
109
110 /* 'PathLocale' must be already set & checked. */
111 /* Range checking not needed, encoding has fixed size */
112 (void)strcpy(buf, _PathLocale);
113 (void)strcat(buf, "/");
114 (void)strcat(buf, encoding);
115 (void)strcat(buf, "/LC_COLLATE");
116 if ((fp = fopen(buf, "r")) == NULL)
117 return (_LDP_ERROR);
118
119 if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
120 saverr = errno;
121 (void)fclose(fp);
122 errno = saverr;
123 return (_LDP_ERROR);
124 }
125 chains = -1;
126 if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0)
127 chains = 1;
128 if (chains < 0) {
129 (void)fclose(fp);
130 errno = EFTYPE;
131 return (_LDP_ERROR);
132 }
133 if (chains) {
134 if (fread(&info, sizeof(info), 1, fp) != 1) {
135 saverr = errno;
136 (void)fclose(fp);
137 errno = saverr;
138 return (_LDP_ERROR);
139 }
140 #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
141 for(z = 0; z < info.directive_count; z++) {
142 info.undef_pri[z] = ntohl(info.undef_pri[z]);
143 info.subst_count[z] = ntohl(info.subst_count[z]);
144 }
145 info.chain_count = ntohl(info.chain_count);
146 info.large_pri_count = ntohl(info.large_pri_count);
147 #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
148 if ((chains = info.chain_count) < 0) {
149 (void)fclose(fp);
150 errno = EFTYPE;
151 return (_LDP_ERROR);
152 }
153 } else
154 chains = TABLE_SIZE;
155
156 i = sizeof(struct __xlocale_st_collate)
157 + sizeof(struct __collate_st_chain_pri) * chains
158 + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
159 for(z = 0; z < info.directive_count; z++)
160 i += sizeof(struct __collate_st_subst) * info.subst_count[z];
161 if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) {
162 saverr = errno;
163 (void)fclose(fp);
164 errno = saverr;
165 return (_LDP_ERROR);
166 }
167 TMP->__refcount = 2; /* one for the locale, one for the cache */
168 TMP->__free_extra = NULL;
169
170 #define FREAD(a, b, c, d) \
171 { \
172 if (fread(a, b, c, d) != c) { \
173 saverr = errno; \
174 free(TMP); \
175 (void)fclose(d); \
176 errno = saverr; \
177 return (_LDP_ERROR); \
178 } \
179 }
180
181 /* adjust size to read the remaining in one chunk */
182 i -= offsetof(struct __xlocale_st_collate, __char_pri_table);
183 FREAD(TMP->__char_pri_table, i, 1, fp);
184 (void)fclose(fp);
185
186 vp = (void *)(TMP + 1);
187
188 /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */
189 if (info.subst_count[0] > 0) {
190 TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
191 vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
192 } else
193 TMP->__substitute_table[0] = NULL;
194 if (info.flags & COLLATE_SUBST_DUP)
195 TMP->__substitute_table[1] = TMP->__substitute_table[0];
196 else if (info.subst_count[1] > 0) {
197 TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
198 vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
199 } else
200 TMP->__substitute_table[1] = NULL;
201
202 if (chains > 0) {
203 TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
204 vp += chains * sizeof(struct __collate_st_chain_pri);
205 } else
206 TMP->__chain_pri_table = NULL;
207 if (info.large_pri_count > 0)
208 TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp;
209 else
210 TMP->__large_char_pri_table = NULL;
211
212 #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
213 {
214 struct __collate_st_char_pri *p = TMP->__char_pri_table;
215 for(i = UCHAR_MAX + 1; i-- > 0; p++) {
216 for(z = 0; z < info.directive_count; z++)
217 p->pri[z] = ntohl(p->pri[z]);
218 }
219 }
220 for(z = 0; z < info.directive_count; z++)
221 if (info.subst_count[z] > 0) {
222 struct __collate_st_subst *p = TMP->__substitute_table[z];
223 for(i = info.subst_count[z]; i-- > 0; p++) {
224 p->val = ntohl(p->val);
225 wntohl(p->str, STR_LEN);
226 }
227 }
228 {
229 struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
230 for(i = chains; i-- > 0; p++) {
231 wntohl(p->str, STR_LEN);
232 for(z = 0; z < info.directive_count; z++)
233 p->pri[z] = ntohl(p->pri[z]);
234 }
235 }
236 if (info.large_pri_count > 0) {
237 struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table;
238 for(i = info.large_pri_count; i-- > 0; p++) {
239 p->val = ntohl(p->val);
240 for(z = 0; z < info.directive_count; z++)
241 p->pri.pri[z] = ntohl(p->pri.pri[z]);
242 }
243 }
244 #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
245 (void)strcpy(TMP->__encoding, encoding);
246 (void)memcpy(&TMP->__info, &info, sizeof(info));
247 XL_RELEASE(cache);
248 cache = TMP;
249 XL_RELEASE(loc->__lc_collate);
250 loc->__lc_collate = cache;
251 /* no need to retain, since we set __refcount to 2 above */
252
253 loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0);
254 loc->__collate_load_error = 0;
255 if (loc == &__global_locale)
256 __collate_load_error = 0;
257
258 return (_LDP_LOADED);
259 }
260
261 static int
262 __collate_wcsnlen(const wchar_t *s, int len)
263 {
264 int n = 0;
265 while (*s && n < len) {
266 s++;
267 n++;
268 }
269 return n;
270 }
271
272 static struct __collate_st_subst *
273 substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
274 {
275 int low = 0;
276 int high = n - 1;
277 int next, compar;
278 struct __collate_st_subst *p;
279
280 while (low <= high) {
281 next = (low + high) / 2;
282 p = tab + next;
283 compar = key - p->val;
284 if (compar == 0)
285 return p;
286 if (compar > 0)
287 low = next + 1;
288 else
289 high = next - 1;
290 }
291 return NULL;
292 }
293
294 __private_extern__ wchar_t *
295 __collate_substitute(const wchar_t *s, int which, locale_t loc)
296 {
297 int dest_len, len, nlen;
298 int n, delta, nsubst;
299 wchar_t *dest_str = NULL;
300 const wchar_t *fp;
301 struct __collate_st_subst *subst, *match;
302
303 if (s == NULL || *s == '\0')
304 return (__collate_wcsdup(L""));
305 dest_len = wcslen(s);
306 nsubst = __collate_info->subst_count[which];
307 if (nsubst <= 0)
308 return __collate_wcsdup(s);
309 subst = __collate_substitute_table[which];
310 delta = dest_len / 4;
311 if (delta < 2)
312 delta = 2;
313 dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
314 if (dest_str == NULL)
315 __collate_err(EX_OSERR, __func__);
316 len = 0;
317 while (*s) {
318 if ((match = substsearch(*s, subst, nsubst)) != NULL) {
319 fp = match->str;
320 n = __collate_wcsnlen(fp, STR_LEN);
321 } else {
322 fp = s;
323 n = 1;
324 }
325 nlen = len + n;
326 if (dest_len <= nlen) {
327 dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
328 if (dest_str == NULL)
329 __collate_err(EX_OSERR, __func__);
330 }
331 wcsncpy(dest_str + len, fp, n);
332 len += n;
333 s++;
334 }
335 dest_str[len] = 0;
336 return (dest_str);
337 }
338
339 static struct __collate_st_chain_pri *
340 chainsearch(const wchar_t *key, int *len, locale_t loc)
341 {
342 int low = 0;
343 int high = __collate_info->chain_count - 1;
344 int next, compar, l;
345 struct __collate_st_chain_pri *p;
346 struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
347
348 while (low <= high) {
349 next = (low + high) / 2;
350 p = tab + next;
351 compar = *key - *p->str;
352 if (compar == 0) {
353 l = __collate_wcsnlen(p->str, STR_LEN);
354 compar = wcsncmp(key, p->str, l);
355 if (compar == 0) {
356 *len = l;
357 return p;
358 }
359 }
360 if (compar > 0)
361 low = next + 1;
362 else
363 high = next - 1;
364 }
365 return NULL;
366 }
367
368 static struct __collate_st_large_char_pri *
369 largesearch(const wchar_t key, locale_t loc)
370 {
371 int low = 0;
372 int high = __collate_info->large_pri_count - 1;
373 int next, compar;
374 struct __collate_st_large_char_pri *p;
375 struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
376
377 while (low <= high) {
378 next = (low + high) / 2;
379 p = tab + next;
380 compar = key - p->val;
381 if (compar == 0)
382 return p;
383 if (compar > 0)
384 low = next + 1;
385 else
386 high = next - 1;
387 }
388 return NULL;
389 }
390
391 __private_extern__ void
392 __collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc)
393 {
394 struct __collate_st_chain_pri *p2;
395 int l;
396
397 *len = 1;
398 *prim = *sec = 0;
399 p2 = chainsearch(t, &l, loc);
400 /* use the chain if prim >= 0 */
401 if (p2 && p2->pri[0] >= 0) {
402 *len = l;
403 *prim = p2->pri[0];
404 *sec = p2->pri[1];
405 return;
406 }
407 if (*t <= UCHAR_MAX) {
408 *prim = __collate_char_pri_table[*t].pri[0];
409 *sec = __collate_char_pri_table[*t].pri[1];
410 return;
411 }
412 if (__collate_info->large_pri_count > 0) {
413 struct __collate_st_large_char_pri *match;
414 match = largesearch(*t, loc);
415 if (match) {
416 *prim = match->pri.pri[0];
417 *sec = match->pri.pri[1];
418 return;
419 }
420 }
421 *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
422 *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
423 }
424
425 /*
426 * This is only provided for programs (like grep) that are calling this
427 * private function. This will go away eventually.
428 */
429 void
430 __collate_lookup(const unsigned char *t, int *len, int *prim, int *sec)
431 {
432 locale_t loc = __current_locale();
433 wchar_t *w = __collate_mbstowcs((const char *)t, loc);
434 int sverrno;
435
436 __collate_lookup_l(w, len, prim, sec, loc);
437 sverrno = errno;
438 free(w);
439 errno = sverrno;
440 }
441
442 __private_extern__ void
443 __collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc)
444 {
445 struct __collate_st_chain_pri *p2;
446 int p, l;
447
448 *len = 1;
449 *pri = 0;
450 p2 = chainsearch(t, &l, loc);
451 if (p2) {
452 p = p2->pri[which];
453 /* use the chain if pri >= 0 */
454 if (p >= 0) {
455 *len = l;
456 *pri = p;
457 return;
458 }
459 }
460 if (*t <= UCHAR_MAX) {
461 *pri = __collate_char_pri_table[*t].pri[which];
462 return;
463 }
464 if (__collate_info->large_pri_count > 0) {
465 struct __collate_st_large_char_pri *match;
466 match = largesearch(*t, loc);
467 if (match) {
468 *pri = match->pri.pri[which];
469 return;
470 }
471 }
472 *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
473 }
474
475 __private_extern__ wchar_t *
476 __collate_mbstowcs(const char *s, locale_t loc)
477 {
478 static const mbstate_t initial;
479 mbstate_t st;
480 size_t len;
481 const char *ss;
482 wchar_t *wcs;
483
484 ss = s;
485 st = initial;
486 if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1)
487 return NULL;
488 if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
489 __collate_err(EX_OSERR, __func__);
490 st = initial;
491 mbsrtowcs_l(wcs, &s, len, &st, loc);
492 wcs[len] = 0;
493
494 return (wcs);
495 }
496
497 __private_extern__ wchar_t *
498 __collate_wcsdup(const wchar_t *s)
499 {
500 size_t len = wcslen(s) + 1;
501 wchar_t *wcs;
502
503 if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
504 __collate_err(EX_OSERR, __func__);
505 wcscpy(wcs, s);
506 return (wcs);
507 }
508
509 __private_extern__ void
510 __collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc)
511 {
512 int pri, len;
513 size_t slen;
514 const wchar_t *t;
515 wchar_t *tt = NULL, *tr = NULL;
516 int direc, pass;
517 wchar_t *xfp;
518 struct __collate_st_info *info = __collate_info;
519 int sverrno;
520
521 for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
522 xf[pass] = NULL;
523 for(pass = 0; pass < info->directive_count; pass++) {
524 direc = info->directive[pass];
525 if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
526 sverrno = errno;
527 free(tt);
528 errno = sverrno;
529 tt = __collate_substitute(src, pass, loc);
530 }
531 if (direc & DIRECTIVE_BACKWARD) {
532 wchar_t *bp, *fp, c;
533 sverrno = errno;
534 free(tr);
535 errno = sverrno;
536 tr = __collate_wcsdup(tt ? tt : src);
537 bp = tr;
538 fp = tr + wcslen(tr) - 1;
539 while(bp < fp) {
540 c = *bp;
541 *bp++ = *fp;
542 *fp-- = c;
543 }
544 t = (const wchar_t *)tr;
545 } else if (tt)
546 t = (const wchar_t *)tt;
547 else
548 t = (const wchar_t *)src;
549 sverrno = errno;
550 if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) {
551 errno = sverrno;
552 slen = 0;
553 goto end;
554 }
555 errno = sverrno;
556 xfp = xf[pass];
557 if (direc & DIRECTIVE_POSITION) {
558 while(*t) {
559 __collate_lookup_which(t, &len, &pri, pass, loc);
560 t += len;
561 if (pri <= 0) {
562 if (pri < 0) {
563 errno = EINVAL;
564 slen = 0;
565 goto end;
566 }
567 pri = COLLATE_MAX_PRIORITY;
568 }
569 *xfp++ = pri;
570 }
571 } else {
572 while(*t) {
573 __collate_lookup_which(t, &len, &pri, pass, loc);
574 t += len;
575 if (pri <= 0) {
576 if (pri < 0) {
577 errno = EINVAL;
578 slen = 0;
579 goto end;
580 }
581 continue;
582 }
583 *xfp++ = pri;
584 }
585 }
586 *xfp = 0;
587 }
588 end:
589 sverrno = errno;
590 free(tt);
591 free(tr);
592 errno = sverrno;
593 }
594
595 __private_extern__ void
596 __collate_err(int ex, const char *f)
597 {
598 const char *s;
599 int serrno = errno;
600
601 s = _getprogname();
602 _write(STDERR_FILENO, s, strlen(s));
603 _write(STDERR_FILENO, ": ", 2);
604 s = f;
605 _write(STDERR_FILENO, s, strlen(s));
606 _write(STDERR_FILENO, ": ", 2);
607 s = strerror(serrno);
608 _write(STDERR_FILENO, s, strlen(s));
609 _write(STDERR_FILENO, "\n", 1);
610 exit(ex);
611 }
612
613 /*
614 * __collate_collating_symbol takes the multibyte string specified by
615 * src and slen, and using ps, converts that to a wide character. Then
616 * it is checked to verify it is a collating symbol, and then copies
617 * it to the wide character string specified by dst and dlen (the
618 * results are not null terminated). The length of the wide characters
619 * copied to dst is returned if successful. Zero is returned if no such
620 * collating symbol exists. (size_t)-1 is returned if there are wide-character
621 * conversion errors, if the length of the converted string is greater that
622 * STR_LEN or if dlen is too small. It is up to the calling routine to
623 * preserve the mbstate_t structure as needed.
624 */
625 __private_extern__ size_t
626 __collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc)
627 {
628 wchar_t wname[STR_LEN];
629 wchar_t w, *wp;
630 size_t len, l;
631
632 /* POSIX locale */
633 if (loc->__collate_load_error) {
634 if (dlen < 1)
635 return (size_t)-1;
636 if (slen != 1 || !isascii(*src))
637 return 0;
638 *dst = *src;
639 return 1;
640 }
641 for(wp = wname, len = 0; slen > 0; len++) {
642 l = mbrtowc_l(&w, src, slen, ps, loc);
643 if (l == (size_t)-1 || l == (size_t)-2)
644 return (size_t)-1;
645 if (l == 0)
646 break;
647 if (len >= STR_LEN)
648 return -1;
649 *wp++ = w;
650 src += l;
651 slen = (long)slen - (long)l;
652 }
653 if (len == 0 || len > dlen)
654 return (size_t)-1;
655 if (len == 1) {
656 if (*wname <= UCHAR_MAX) {
657 if (__collate_char_pri_table[*wname].pri[0] >= 0) {
658 if (dlen > 0)
659 *dst = *wname;
660 return 1;
661 }
662 return 0;
663 } else if (__collate_info->large_pri_count > 0) {
664 struct __collate_st_large_char_pri *match;
665 match = largesearch(*wname, loc);
666 if (match && match->pri.pri[0] >= 0) {
667 if (dlen > 0)
668 *dst = *wname;
669 return 1;
670 }
671 }
672 return 0;
673 }
674 *wp = 0;
675 if (__collate_info->chain_count > 0) {
676 struct __collate_st_chain_pri *match;
677 int ll;
678 match = chainsearch(wname, &ll, loc);
679 if (match) {
680 if (ll < dlen)
681 dlen = ll;
682 wcsncpy(dst, wname, dlen);
683 return ll;
684 }
685 }
686 return 0;
687 }
688
689 /*
690 * __collate_equiv_class returns the equivalence class number for the symbol
691 * specified by src and slen, using ps to convert from multi-byte to wide
692 * character. Zero is returned if the symbol is not in an equivalence
693 * class. -1 is returned if there are wide character conversion error,
694 * if there are any greater-than-8-bit characters or if a multi-byte symbol
695 * is greater or equal to STR_LEN in length. It is up to the calling
696 * routine to preserve the mbstate_t structure as needed.
697 */
698 __private_extern__ int
699 __collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc)
700 {
701 wchar_t wname[STR_LEN];
702 wchar_t w, *wp;
703 size_t len, l;
704 int e;
705
706 /* POSIX locale */
707 if (loc->__collate_load_error)
708 return 0;
709 for(wp = wname, len = 0; slen > 0; len++) {
710 l = mbrtowc_l(&w, src, slen, ps, loc);
711 if (l == (size_t)-1 || l == (size_t)-2)
712 return -1;
713 if (l == 0)
714 break;
715 if (len >= STR_LEN)
716 return -1;
717 *wp++ = w;
718 src += l;
719 slen = (long)slen - (long)l;
720 }
721 if (len == 0)
722 return -1;
723 if (len == 1) {
724 e = -1;
725 if (*wname <= UCHAR_MAX)
726 e = __collate_char_pri_table[*wname].pri[0];
727 else if (__collate_info->large_pri_count > 0) {
728 struct __collate_st_large_char_pri *match;
729 match = largesearch(*wname, loc);
730 if (match)
731 e = match->pri.pri[0];
732 }
733 if (e == 0)
734 return IGNORE_EQUIV_CLASS;
735 return e > 0 ? e : 0;
736 }
737 *wp = 0;
738 if (__collate_info->chain_count > 0) {
739 struct __collate_st_chain_pri *match;
740 int ll;
741 match = chainsearch(wname, &ll, loc);
742 if (match) {
743 e = match->pri[0];
744 if (e == 0)
745 return IGNORE_EQUIV_CLASS;
746 return e < 0 ? -e : e;
747 }
748 }
749 return 0;
750 }
751
752 /*
753 * __collate_equiv_match tries to match any single or multi-character symbol
754 * in equivalence class equiv_class in the multi-byte string specified by src
755 * and slen. If start is non-zero, it is taken to be the first (pre-converted)
756 * wide character. Subsequence wide characters, if needed, will use ps in
757 * the conversion. On a successful match, the length of the matched string
758 * is returned (including the start character). If dst is non-NULL, the
759 * matched wide-character string is copied to dst, a wide character array of
760 * length dlen (the results are not zero-terminated). If rlen is non-NULL,
761 * the number of character in src actually used is returned. Zero is
762 * returned by __collate_equiv_match if there is no match. (size_t)-1 is
763 * returned on error: if there were conversion errors or if dlen is too small
764 * to accept the results. On no match or error, ps is restored to its incoming
765 * state.
766 */
767 size_t
768 __collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc)
769 {
770 wchar_t w;
771 size_t len, l, clen;
772 int i;
773 wchar_t buf[STR_LEN], *wp;
774 mbstate_t save;
775 const char *s = src;
776 size_t sl = slen;
777 struct __collate_st_chain_pri *ch = NULL;
778
779 /* POSIX locale */
780 if (loc->__collate_load_error)
781 return (size_t)-1;
782 if (equiv_class == IGNORE_EQUIV_CLASS)
783 equiv_class = 0;
784 if (ps)
785 save = *ps;
786 wp = buf;
787 len = clen = 0;
788 if (start) {
789 *wp++ = start;
790 len = 1;
791 }
792 /* convert up to the max chain length */
793 while(sl > 0 && len < __collate_info->chain_max_len) {
794 l = mbrtowc_l(&w, s, sl, ps, loc);
795 if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
796 break;
797 *wp++ = w;
798 s += l;
799 clen += l;
800 sl -= l;
801 len++;
802 }
803 *wp = 0;
804 if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) {
805 int e = ch->pri[0];
806 if (e < 0)
807 e = -e;
808 if (e == equiv_class)
809 goto found;
810 }
811 /* try single character */
812 i = 1;
813 if (*buf <= UCHAR_MAX) {
814 if (equiv_class == __collate_char_pri_table[*buf].pri[0])
815 goto found;
816 } else if (__collate_info->large_pri_count > 0) {
817 struct __collate_st_large_char_pri *match;
818 match = largesearch(*buf, loc);
819 if (match && equiv_class == match->pri.pri[0])
820 goto found;
821 }
822 /* no match */
823 if (ps)
824 *ps = save;
825 return 0;
826 found:
827 /* if we converted more than we used, restore to initial and reconvert
828 * up to what did match */
829 if (i < len) {
830 len = i;
831 if (ps)
832 *ps = save;
833 if (start)
834 i--;
835 clen = 0;
836 while(i-- > 0) {
837 l = mbrtowc_l(&w, src, slen, ps, loc);
838 src += l;
839 clen += l;
840 slen -= l;
841 }
842 }
843 if (dst) {
844 if (dlen < len) {
845 if (ps)
846 *ps = save;
847 return (size_t)-1;
848 }
849 for(wp = buf; len > 0; len--)
850 *dst++ = *wp++;
851 }
852 if (rlen)
853 *rlen = clen;
854 return len;
855 }
856
857 /*
858 * __collate_equiv_value returns the primary collation value for the given
859 * collating symbol specified by str and len. Zero or negative is return
860 * if the collating symbol was not found. (Use by the bracket code in TRE.)
861 */
862 __private_extern__ int
863 __collate_equiv_value(locale_t loc, const wchar_t *str, size_t len)
864 {
865 int e;
866
867 if (len < 1 || len >= STR_LEN)
868 return -1;
869
870 /* POSIX locale */
871 if (loc->__collate_load_error)
872 return (len == 1 && *str <= UCHAR_MAX) ? *str : -1;
873
874 if (len == 1) {
875 e = -1;
876 if (*str <= UCHAR_MAX)
877 e = __collate_char_pri_table[*str].pri[0];
878 else if (__collate_info->large_pri_count > 0) {
879 struct __collate_st_large_char_pri *match;
880 match = largesearch(*str, loc);
881 if (match)
882 e = match->pri.pri[0];
883 }
884 if (e == 0)
885 return IGNORE_EQUIV_CLASS;
886 return e > 0 ? e : 0;
887 }
888 if (__collate_info->chain_count > 0) {
889 wchar_t name[STR_LEN];
890 struct __collate_st_chain_pri *match;
891 int ll;
892
893 wcsncpy(name, str, len);
894 name[len] = 0;
895 match = chainsearch(name, &ll, loc);
896 if (match) {
897 e = match->pri[0];
898 if (e == 0)
899 return IGNORE_EQUIV_CLASS;
900 return e < 0 ? -e : e;
901 }
902 }
903 return 0;
904 }
905
906 #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
907 static void
908 wntohl(wchar_t *str, int len)
909 {
910 for(; *str && len > 0; str++, len--)
911 *str = ntohl(*str);
912 }
913 #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
914
915 #ifdef COLLATE_DEBUG
916 static char *
917 show(int c)
918 {
919 static char buf[5];
920
921 if (c >=32 && c <= 126)
922 sprintf(buf, "'%c' ", c);
923 else
924 sprintf(buf, "\\x{%02x}", c);
925 return buf;
926 }
927
928 static char *
929 showwcs(const wchar_t *t, int len)
930 {
931 static char buf[64];
932 char *cp = buf;
933
934 for(; *t && len > 0; len--, t++) {
935 if (*t >=32 && *t <= 126)
936 *cp++ = *t;
937 else {
938 sprintf(cp, "\\x{%02x}", *t);
939 cp += strlen(cp);
940 }
941 }
942 *cp = 0;
943 return buf;
944 }
945
946 void
947 __collate_print_tables()
948 {
949 int i, z;
950 locale_t loc = __current_locale();
951
952 printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
953 __collate_info->directive[0], __collate_info->directive[1],
954 __collate_info->flags, __collate_info->chain_max_len,
955 __collate_info->directive_count,
956 __collate_info->undef_pri[0], __collate_info->undef_pri[1],
957 __collate_info->subst_count[0], __collate_info->subst_count[1],
958 __collate_info->chain_count, __collate_info->large_pri_count);
959 for(z = 0; z < __collate_info->directive_count; z++) {
960 if (__collate_info->subst_count[z] > 0) {
961 struct __collate_st_subst *p2 = __collate_substitute_table[z];
962 if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP))
963 printf("Both substitute tables:\n");
964 else
965 printf("Substitute table %d:\n", z);
966 for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
967 printf("\t%s --> \"%s\"\n",
968 show(p2->val),
969 showwcs(p2->str, STR_LEN));
970 }
971 }
972 if (__collate_info->chain_count > 0) {
973 printf("Chain priority table:\n");
974 struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
975 for (i = __collate_info->chain_count; i-- > 0; p2++) {
976 printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
977 for(z = 0; z < __collate_info->directive_count; z++)
978 printf(" %d", p2->pri[z]);
979 putchar('\n');
980 }
981 }
982 printf("Char priority table:\n");
983 {
984 struct __collate_st_char_pri *p2 = __collate_char_pri_table;
985 for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
986 printf("\t%s :", show(i));
987 for(z = 0; z < __collate_info->directive_count; z++)
988 printf(" %d", p2->pri[z]);
989 putchar('\n');
990 }
991 }
992 if (__collate_info->large_pri_count > 0) {
993 struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
994 printf("Large priority table:\n");
995 for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
996 printf("\t%s :", show(p2->val));
997 for(z = 0; z < __collate_info->directive_count; z++)
998 printf(" %d", p2->pri.pri[z]);
999 putchar('\n');
1000 }
1001 }
1002 }
1003 #endif