]> git.saurik.com Git - apple/libc.git/blame - locale/FreeBSD/collate.c
Libc-1158.50.2.tar.gz
[apple/libc.git] / locale / FreeBSD / collate.c
CommitLineData
9385eb3d
A
1/*-
2 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3 * at Electronni Visti IA, Kiev, Ukraine.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
1f2f436a 29__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $");
9385eb3d 30
ad3c9f2a
A
31#include "xlocale_private.h"
32/* assumes the locale_t variable is named loc */
33#define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table)
34#define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table)
35#define __collate_char_pri_table (loc->__lc_collate->__char_pri_table)
36#define __collate_info (&loc->__lc_collate->__info)
37#define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table)
38#define __collate_substitute_table (loc->__lc_collate->__substitute_table)
39
9385eb3d
A
40#include "namespace.h"
41#include <arpa/inet.h>
42#include <stdio.h>
43#include <stdlib.h>
ad3c9f2a 44#include <stddef.h>
9385eb3d 45#include <string.h>
ad3c9f2a 46#include <wchar.h>
9385eb3d
A
47#include <errno.h>
48#include <unistd.h>
49#include <sysexits.h>
ad3c9f2a 50#include <ctype.h>
9385eb3d
A
51#include "un-namespace.h"
52
53#include "collate.h"
54#include "setlocale.h"
55#include "ldpart.h"
56
57#include "libc_private.h"
58
ad3c9f2a
A
59#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
60static void wntohl(wchar_t *, int);
61#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
9385eb3d
A
62void __collate_err(int ex, const char *f) __dead2;
63
ad3c9f2a
A
64/*
65 * Normally, the __collate_* routines should all be __private_extern__,
66 * but grep is using them (3715846). Until we can provide an alternative,
67 * we leave them public, and provide a read-only __collate_load_error variable
68 */
69#undef __collate_load_error
70int __collate_load_error = 1;
71
72__private_extern__ int
73__collate_load_tables(const char *encoding, locale_t loc)
9385eb3d
A
74{
75 FILE *fp;
ad3c9f2a 76 int i, saverr, chains, z;
9385eb3d 77 char strbuf[STR_LEN], buf[PATH_MAX];
ad3c9f2a
A
78 struct __xlocale_st_collate *TMP;
79 static struct __xlocale_st_collate *cache = NULL;
80 struct __collate_st_info info;
81 void *vp;
9385eb3d
A
82
83 /* 'encoding' must be already checked. */
84 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
ad3c9f2a
A
85 loc->__collate_load_error = 1;
86 if (loc == &__global_locale)
87 __collate_load_error = 1;
88 XL_RELEASE(loc->__lc_collate);
89 loc->__lc_collate = NULL;
9385eb3d
A
90 return (_LDP_CACHE);
91 }
92
93 /*
94 * If the locale name is the same as our cache, use the cache.
95 */
ad3c9f2a
A
96 if (cache && strcmp(encoding, cache->__encoding) == 0) {
97 loc->__collate_load_error = 0;
98 if (loc == &__global_locale)
99 __collate_load_error = 0;
100 XL_RELEASE(loc->__lc_collate);
101 loc->__lc_collate = cache;
102 XL_RETAIN(loc->__lc_collate);
9385eb3d
A
103 return (_LDP_CACHE);
104 }
105
106 /*
107 * Slurp the locale file into the cache.
108 */
109
110 /* 'PathLocale' must be already set & checked. */
111 /* Range checking not needed, encoding has fixed size */
974e3884 112 (void)strcpy(buf, encoding);
9385eb3d 113 (void)strcat(buf, "/LC_COLLATE");
974e3884 114 if ((fp = fdopen(__open_path_locale(buf), "r")) == NULL) {
9385eb3d 115 return (_LDP_ERROR);
974e3884 116 }
9385eb3d
A
117
118 if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
119 saverr = errno;
120 (void)fclose(fp);
121 errno = saverr;
122 return (_LDP_ERROR);
123 }
124 chains = -1;
ad3c9f2a 125 if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0)
9385eb3d
A
126 chains = 1;
127 if (chains < 0) {
128 (void)fclose(fp);
129 errno = EFTYPE;
130 return (_LDP_ERROR);
131 }
132 if (chains) {
ad3c9f2a 133 if (fread(&info, sizeof(info), 1, fp) != 1) {
9385eb3d
A
134 saverr = errno;
135 (void)fclose(fp);
136 errno = saverr;
137 return (_LDP_ERROR);
138 }
ad3c9f2a
A
139#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
140 for(z = 0; z < info.directive_count; z++) {
141 info.undef_pri[z] = ntohl(info.undef_pri[z]);
142 info.subst_count[z] = ntohl(info.subst_count[z]);
143 }
144 info.chain_count = ntohl(info.chain_count);
145 info.large_pri_count = ntohl(info.large_pri_count);
146#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
147 if ((chains = info.chain_count) < 0) {
9385eb3d
A
148 (void)fclose(fp);
149 errno = EFTYPE;
150 return (_LDP_ERROR);
151 }
152 } else
153 chains = TABLE_SIZE;
154
ad3c9f2a
A
155 i = sizeof(struct __xlocale_st_collate)
156 + sizeof(struct __collate_st_chain_pri) * chains
157 + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
158 for(z = 0; z < info.directive_count; z++)
159 i += sizeof(struct __collate_st_subst) * info.subst_count[z];
160 if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) {
9385eb3d
A
161 saverr = errno;
162 (void)fclose(fp);
163 errno = saverr;
164 return (_LDP_ERROR);
165 }
ad3c9f2a
A
166 TMP->__refcount = 2; /* one for the locale, one for the cache */
167 TMP->__free_extra = NULL;
9385eb3d
A
168
169#define FREAD(a, b, c, d) \
170{ \
171 if (fread(a, b, c, d) != c) { \
172 saverr = errno; \
ad3c9f2a 173 free(TMP); \
9385eb3d
A
174 (void)fclose(d); \
175 errno = saverr; \
176 return (_LDP_ERROR); \
177 } \
178}
179
ad3c9f2a
A
180 /* adjust size to read the remaining in one chunk */
181 i -= offsetof(struct __xlocale_st_collate, __char_pri_table);
182 FREAD(TMP->__char_pri_table, i, 1, fp);
9385eb3d
A
183 (void)fclose(fp);
184
ad3c9f2a
A
185 vp = (void *)(TMP + 1);
186
187 /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */
188 if (info.subst_count[0] > 0) {
189 TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
190 vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
191 } else
192 TMP->__substitute_table[0] = NULL;
193 if (info.flags & COLLATE_SUBST_DUP)
194 TMP->__substitute_table[1] = TMP->__substitute_table[0];
195 else if (info.subst_count[1] > 0) {
196 TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
197 vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
198 } else
199 TMP->__substitute_table[1] = NULL;
200
201 if (chains > 0) {
202 TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
203 vp += chains * sizeof(struct __collate_st_chain_pri);
204 } else
205 TMP->__chain_pri_table = NULL;
206 if (info.large_pri_count > 0)
207 TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp;
208 else
209 TMP->__large_char_pri_table = NULL;
210
211#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
212 {
213 struct __collate_st_char_pri *p = TMP->__char_pri_table;
214 for(i = UCHAR_MAX + 1; i-- > 0; p++) {
215 for(z = 0; z < info.directive_count; z++)
216 p->pri[z] = ntohl(p->pri[z]);
217 }
218 }
219 for(z = 0; z < info.directive_count; z++)
220 if (info.subst_count[z] > 0) {
221 struct __collate_st_subst *p = TMP->__substitute_table[z];
222 for(i = info.subst_count[z]; i-- > 0; p++) {
223 p->val = ntohl(p->val);
224 wntohl(p->str, STR_LEN);
225 }
226 }
227 {
228 struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
229 for(i = chains; i-- > 0; p++) {
230 wntohl(p->str, STR_LEN);
231 for(z = 0; z < info.directive_count; z++)
232 p->pri[z] = ntohl(p->pri[z]);
233 }
234 }
235 if (info.large_pri_count > 0) {
236 struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table;
237 for(i = info.large_pri_count; i-- > 0; p++) {
238 p->val = ntohl(p->val);
239 for(z = 0; z < info.directive_count; z++)
240 p->pri.pri[z] = ntohl(p->pri.pri[z]);
9385eb3d
A
241 }
242 }
ad3c9f2a
A
243#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
244 (void)strcpy(TMP->__encoding, encoding);
245 (void)memcpy(&TMP->__info, &info, sizeof(info));
246 XL_RELEASE(cache);
247 cache = TMP;
248 XL_RELEASE(loc->__lc_collate);
249 loc->__lc_collate = cache;
250 /* no need to retain, since we set __refcount to 2 above */
251
252 loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0);
253 loc->__collate_load_error = 0;
254 if (loc == &__global_locale)
255 __collate_load_error = 0;
9385eb3d
A
256
257 return (_LDP_LOADED);
258}
259
ad3c9f2a
A
260static int
261__collate_wcsnlen(const wchar_t *s, int len)
262{
263 int n = 0;
264 while (*s && n < len) {
265 s++;
266 n++;
267 }
268 return n;
269}
270
271static struct __collate_st_subst *
272substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
273{
274 int low = 0;
275 int high = n - 1;
276 int next, compar;
277 struct __collate_st_subst *p;
278
279 while (low <= high) {
280 next = (low + high) / 2;
281 p = tab + next;
282 compar = key - p->val;
283 if (compar == 0)
284 return p;
285 if (compar > 0)
286 low = next + 1;
287 else
288 high = next - 1;
289 }
290 return NULL;
291}
292
293__private_extern__ wchar_t *
294__collate_substitute(const wchar_t *s, int which, locale_t loc)
9385eb3d
A
295{
296 int dest_len, len, nlen;
ad3c9f2a
A
297 int n, delta, nsubst;
298 wchar_t *dest_str = NULL;
299 const wchar_t *fp;
300 struct __collate_st_subst *subst, *match;
9385eb3d
A
301
302 if (s == NULL || *s == '\0')
ad3c9f2a
A
303 return (__collate_wcsdup(L""));
304 dest_len = wcslen(s);
305 nsubst = __collate_info->subst_count[which];
306 if (nsubst <= 0)
307 return __collate_wcsdup(s);
308 subst = __collate_substitute_table[which];
309 delta = dest_len / 4;
310 if (delta < 2)
311 delta = 2;
312 dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
9385eb3d 313 if (dest_str == NULL)
3d9156a7 314 __collate_err(EX_OSERR, __func__);
9385eb3d
A
315 len = 0;
316 while (*s) {
ad3c9f2a
A
317 if ((match = substsearch(*s, subst, nsubst)) != NULL) {
318 fp = match->str;
319 n = __collate_wcsnlen(fp, STR_LEN);
320 } else {
321 fp = s;
322 n = 1;
323 }
324 nlen = len + n;
9385eb3d 325 if (dest_len <= nlen) {
ad3c9f2a 326 dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
9385eb3d 327 if (dest_str == NULL)
3d9156a7 328 __collate_err(EX_OSERR, __func__);
9385eb3d 329 }
ad3c9f2a
A
330 wcsncpy(dest_str + len, fp, n);
331 len += n;
332 s++;
9385eb3d 333 }
ad3c9f2a 334 dest_str[len] = 0;
9385eb3d
A
335 return (dest_str);
336}
337
ad3c9f2a
A
338static struct __collate_st_chain_pri *
339chainsearch(const wchar_t *key, int *len, locale_t loc)
340{
341 int low = 0;
342 int high = __collate_info->chain_count - 1;
343 int next, compar, l;
344 struct __collate_st_chain_pri *p;
345 struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
346
347 while (low <= high) {
348 next = (low + high) / 2;
349 p = tab + next;
350 compar = *key - *p->str;
351 if (compar == 0) {
352 l = __collate_wcsnlen(p->str, STR_LEN);
353 compar = wcsncmp(key, p->str, l);
354 if (compar == 0) {
355 *len = l;
356 return p;
357 }
358 }
359 if (compar > 0)
360 low = next + 1;
361 else
362 high = next - 1;
363 }
364 return NULL;
365}
366
367static struct __collate_st_large_char_pri *
368largesearch(const wchar_t key, locale_t loc)
369{
370 int low = 0;
371 int high = __collate_info->large_pri_count - 1;
372 int next, compar;
373 struct __collate_st_large_char_pri *p;
374 struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
375
376 while (low <= high) {
377 next = (low + high) / 2;
378 p = tab + next;
379 compar = key - p->val;
380 if (compar == 0)
381 return p;
382 if (compar > 0)
383 low = next + 1;
384 else
385 high = next - 1;
386 }
387 return NULL;
388}
389
390__private_extern__ void
391__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc)
9385eb3d
A
392{
393 struct __collate_st_chain_pri *p2;
ad3c9f2a 394 int l;
9385eb3d
A
395
396 *len = 1;
397 *prim = *sec = 0;
ad3c9f2a
A
398 p2 = chainsearch(t, &l, loc);
399 /* use the chain if prim >= 0 */
400 if (p2 && p2->pri[0] >= 0) {
401 *len = l;
402 *prim = p2->pri[0];
403 *sec = p2->pri[1];
404 return;
405 }
406 if (*t <= UCHAR_MAX) {
407 *prim = __collate_char_pri_table[*t].pri[0];
408 *sec = __collate_char_pri_table[*t].pri[1];
409 return;
410 }
411 if (__collate_info->large_pri_count > 0) {
412 struct __collate_st_large_char_pri *match;
413 match = largesearch(*t, loc);
414 if (match) {
415 *prim = match->pri.pri[0];
416 *sec = match->pri.pri[1];
417 return;
418 }
419 }
420 *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
421 *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
422}
423
424/*
425 * This is only provided for programs (like grep) that are calling this
426 * private function. This will go away eventually.
427 */
428void
429__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec)
430{
431 locale_t loc = __current_locale();
432 wchar_t *w = __collate_mbstowcs((const char *)t, loc);
433 int sverrno;
434
435 __collate_lookup_l(w, len, prim, sec, loc);
436 sverrno = errno;
437 free(w);
438 errno = sverrno;
439}
440
441__private_extern__ void
442__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc)
443{
444 struct __collate_st_chain_pri *p2;
445 int p, l;
446
447 *len = 1;
448 *pri = 0;
449 p2 = chainsearch(t, &l, loc);
450 if (p2) {
451 p = p2->pri[which];
452 /* use the chain if pri >= 0 */
453 if (p >= 0) {
454 *len = l;
455 *pri = p;
456 return;
457 }
458 }
459 if (*t <= UCHAR_MAX) {
460 *pri = __collate_char_pri_table[*t].pri[which];
461 return;
462 }
463 if (__collate_info->large_pri_count > 0) {
464 struct __collate_st_large_char_pri *match;
465 match = largesearch(*t, loc);
466 if (match) {
467 *pri = match->pri.pri[which];
9385eb3d
A
468 return;
469 }
470 }
ad3c9f2a 471 *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
9385eb3d
A
472}
473
ad3c9f2a
A
474__private_extern__ wchar_t *
475__collate_mbstowcs(const char *s, locale_t loc)
9385eb3d 476{
ad3c9f2a
A
477 static const mbstate_t initial;
478 mbstate_t st;
479 size_t len;
480 const char *ss;
481 wchar_t *wcs;
9385eb3d 482
ad3c9f2a
A
483 ss = s;
484 st = initial;
485 if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1)
486 return NULL;
487 if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
3d9156a7 488 __collate_err(EX_OSERR, __func__);
ad3c9f2a
A
489 st = initial;
490 mbsrtowcs_l(wcs, &s, len, &st, loc);
491 wcs[len] = 0;
492
493 return (wcs);
9385eb3d
A
494}
495
ad3c9f2a
A
496__private_extern__ wchar_t *
497__collate_wcsdup(const wchar_t *s)
498{
499 size_t len = wcslen(s) + 1;
500 wchar_t *wcs;
501
502 if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
503 __collate_err(EX_OSERR, __func__);
504 wcscpy(wcs, s);
505 return (wcs);
506}
507
508__private_extern__ void
509__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc)
510{
511 int pri, len;
512 size_t slen;
513 const wchar_t *t;
514 wchar_t *tt = NULL, *tr = NULL;
515 int direc, pass;
516 wchar_t *xfp;
517 struct __collate_st_info *info = __collate_info;
518 int sverrno;
519
520 for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
521 xf[pass] = NULL;
522 for(pass = 0; pass < info->directive_count; pass++) {
523 direc = info->directive[pass];
524 if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
525 sverrno = errno;
526 free(tt);
527 errno = sverrno;
528 tt = __collate_substitute(src, pass, loc);
529 }
530 if (direc & DIRECTIVE_BACKWARD) {
531 wchar_t *bp, *fp, c;
532 sverrno = errno;
533 free(tr);
534 errno = sverrno;
535 tr = __collate_wcsdup(tt ? tt : src);
536 bp = tr;
537 fp = tr + wcslen(tr) - 1;
538 while(bp < fp) {
539 c = *bp;
540 *bp++ = *fp;
541 *fp-- = c;
542 }
543 t = (const wchar_t *)tr;
544 } else if (tt)
545 t = (const wchar_t *)tt;
546 else
547 t = (const wchar_t *)src;
548 sverrno = errno;
549 if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) {
550 errno = sverrno;
551 slen = 0;
552 goto end;
553 }
554 errno = sverrno;
555 xfp = xf[pass];
556 if (direc & DIRECTIVE_POSITION) {
557 while(*t) {
558 __collate_lookup_which(t, &len, &pri, pass, loc);
559 t += len;
560 if (pri <= 0) {
561 if (pri < 0) {
562 errno = EINVAL;
563 slen = 0;
564 goto end;
565 }
566 pri = COLLATE_MAX_PRIORITY;
567 }
568 *xfp++ = pri;
569 }
570 } else {
571 while(*t) {
572 __collate_lookup_which(t, &len, &pri, pass, loc);
573 t += len;
574 if (pri <= 0) {
575 if (pri < 0) {
576 errno = EINVAL;
577 slen = 0;
578 goto end;
579 }
580 continue;
581 }
582 *xfp++ = pri;
583 }
584 }
585 *xfp = 0;
586 }
587 end:
588 sverrno = errno;
589 free(tt);
590 free(tr);
591 errno = sverrno;
592}
593
594__private_extern__ void
9385eb3d
A
595__collate_err(int ex, const char *f)
596{
597 const char *s;
598 int serrno = errno;
599
600 s = _getprogname();
601 _write(STDERR_FILENO, s, strlen(s));
602 _write(STDERR_FILENO, ": ", 2);
603 s = f;
604 _write(STDERR_FILENO, s, strlen(s));
605 _write(STDERR_FILENO, ": ", 2);
606 s = strerror(serrno);
607 _write(STDERR_FILENO, s, strlen(s));
608 _write(STDERR_FILENO, "\n", 1);
609 exit(ex);
610}
611
ad3c9f2a
A
612/*
613 * __collate_collating_symbol takes the multibyte string specified by
614 * src and slen, and using ps, converts that to a wide character. Then
615 * it is checked to verify it is a collating symbol, and then copies
616 * it to the wide character string specified by dst and dlen (the
617 * results are not null terminated). The length of the wide characters
618 * copied to dst is returned if successful. Zero is returned if no such
619 * collating symbol exists. (size_t)-1 is returned if there are wide-character
620 * conversion errors, if the length of the converted string is greater that
621 * STR_LEN or if dlen is too small. It is up to the calling routine to
622 * preserve the mbstate_t structure as needed.
623 */
624__private_extern__ size_t
625__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc)
626{
627 wchar_t wname[STR_LEN];
628 wchar_t w, *wp;
629 size_t len, l;
630
631 /* POSIX locale */
632 if (loc->__collate_load_error) {
633 if (dlen < 1)
634 return (size_t)-1;
635 if (slen != 1 || !isascii(*src))
636 return 0;
637 *dst = *src;
638 return 1;
639 }
640 for(wp = wname, len = 0; slen > 0; len++) {
641 l = mbrtowc_l(&w, src, slen, ps, loc);
642 if (l == (size_t)-1 || l == (size_t)-2)
643 return (size_t)-1;
644 if (l == 0)
645 break;
646 if (len >= STR_LEN)
647 return -1;
648 *wp++ = w;
649 src += l;
650 slen = (long)slen - (long)l;
651 }
652 if (len == 0 || len > dlen)
653 return (size_t)-1;
654 if (len == 1) {
655 if (*wname <= UCHAR_MAX) {
656 if (__collate_char_pri_table[*wname].pri[0] >= 0) {
657 if (dlen > 0)
658 *dst = *wname;
659 return 1;
660 }
661 return 0;
662 } else if (__collate_info->large_pri_count > 0) {
663 struct __collate_st_large_char_pri *match;
664 match = largesearch(*wname, loc);
665 if (match && match->pri.pri[0] >= 0) {
666 if (dlen > 0)
667 *dst = *wname;
668 return 1;
669 }
670 }
671 return 0;
672 }
673 *wp = 0;
674 if (__collate_info->chain_count > 0) {
675 struct __collate_st_chain_pri *match;
676 int ll;
677 match = chainsearch(wname, &ll, loc);
678 if (match) {
679 if (ll < dlen)
680 dlen = ll;
681 wcsncpy(dst, wname, dlen);
682 return ll;
683 }
684 }
685 return 0;
686}
687
688/*
689 * __collate_equiv_class returns the equivalence class number for the symbol
690 * specified by src and slen, using ps to convert from multi-byte to wide
691 * character. Zero is returned if the symbol is not in an equivalence
692 * class. -1 is returned if there are wide character conversion error,
693 * if there are any greater-than-8-bit characters or if a multi-byte symbol
694 * is greater or equal to STR_LEN in length. It is up to the calling
695 * routine to preserve the mbstate_t structure as needed.
696 */
697__private_extern__ int
698__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc)
699{
700 wchar_t wname[STR_LEN];
701 wchar_t w, *wp;
702 size_t len, l;
703 int e;
704
705 /* POSIX locale */
706 if (loc->__collate_load_error)
707 return 0;
708 for(wp = wname, len = 0; slen > 0; len++) {
709 l = mbrtowc_l(&w, src, slen, ps, loc);
710 if (l == (size_t)-1 || l == (size_t)-2)
711 return -1;
712 if (l == 0)
713 break;
714 if (len >= STR_LEN)
715 return -1;
716 *wp++ = w;
717 src += l;
718 slen = (long)slen - (long)l;
719 }
720 if (len == 0)
721 return -1;
722 if (len == 1) {
723 e = -1;
724 if (*wname <= UCHAR_MAX)
725 e = __collate_char_pri_table[*wname].pri[0];
726 else if (__collate_info->large_pri_count > 0) {
727 struct __collate_st_large_char_pri *match;
728 match = largesearch(*wname, loc);
729 if (match)
730 e = match->pri.pri[0];
731 }
732 if (e == 0)
733 return IGNORE_EQUIV_CLASS;
734 return e > 0 ? e : 0;
735 }
736 *wp = 0;
737 if (__collate_info->chain_count > 0) {
738 struct __collate_st_chain_pri *match;
739 int ll;
740 match = chainsearch(wname, &ll, loc);
741 if (match) {
742 e = match->pri[0];
743 if (e == 0)
744 return IGNORE_EQUIV_CLASS;
745 return e < 0 ? -e : e;
746 }
747 }
748 return 0;
749}
750
751/*
752 * __collate_equiv_match tries to match any single or multi-character symbol
753 * in equivalence class equiv_class in the multi-byte string specified by src
754 * and slen. If start is non-zero, it is taken to be the first (pre-converted)
755 * wide character. Subsequence wide characters, if needed, will use ps in
756 * the conversion. On a successful match, the length of the matched string
757 * is returned (including the start character). If dst is non-NULL, the
758 * matched wide-character string is copied to dst, a wide character array of
759 * length dlen (the results are not zero-terminated). If rlen is non-NULL,
760 * the number of character in src actually used is returned. Zero is
761 * returned by __collate_equiv_match if there is no match. (size_t)-1 is
762 * returned on error: if there were conversion errors or if dlen is too small
763 * to accept the results. On no match or error, ps is restored to its incoming
764 * state.
765 */
766size_t
767__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc)
768{
769 wchar_t w;
770 size_t len, l, clen;
771 int i;
772 wchar_t buf[STR_LEN], *wp;
773 mbstate_t save;
774 const char *s = src;
775 size_t sl = slen;
776 struct __collate_st_chain_pri *ch = NULL;
777
778 /* POSIX locale */
779 if (loc->__collate_load_error)
780 return (size_t)-1;
781 if (equiv_class == IGNORE_EQUIV_CLASS)
782 equiv_class = 0;
783 if (ps)
784 save = *ps;
785 wp = buf;
786 len = clen = 0;
787 if (start) {
788 *wp++ = start;
789 len = 1;
790 }
791 /* convert up to the max chain length */
792 while(sl > 0 && len < __collate_info->chain_max_len) {
793 l = mbrtowc_l(&w, s, sl, ps, loc);
794 if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
795 break;
796 *wp++ = w;
797 s += l;
798 clen += l;
799 sl -= l;
800 len++;
801 }
802 *wp = 0;
803 if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) {
804 int e = ch->pri[0];
805 if (e < 0)
806 e = -e;
807 if (e == equiv_class)
808 goto found;
809 }
810 /* try single character */
811 i = 1;
812 if (*buf <= UCHAR_MAX) {
813 if (equiv_class == __collate_char_pri_table[*buf].pri[0])
814 goto found;
815 } else if (__collate_info->large_pri_count > 0) {
816 struct __collate_st_large_char_pri *match;
817 match = largesearch(*buf, loc);
818 if (match && equiv_class == match->pri.pri[0])
819 goto found;
820 }
821 /* no match */
822 if (ps)
823 *ps = save;
824 return 0;
825found:
826 /* if we converted more than we used, restore to initial and reconvert
827 * up to what did match */
828 if (i < len) {
829 len = i;
830 if (ps)
831 *ps = save;
832 if (start)
833 i--;
834 clen = 0;
835 while(i-- > 0) {
836 l = mbrtowc_l(&w, src, slen, ps, loc);
837 src += l;
838 clen += l;
839 slen -= l;
840 }
841 }
842 if (dst) {
843 if (dlen < len) {
844 if (ps)
845 *ps = save;
846 return (size_t)-1;
847 }
848 for(wp = buf; len > 0; len--)
849 *dst++ = *wp++;
850 }
851 if (rlen)
852 *rlen = clen;
853 return len;
854}
855
856/*
857 * __collate_equiv_value returns the primary collation value for the given
858 * collating symbol specified by str and len. Zero or negative is return
859 * if the collating symbol was not found. (Use by the bracket code in TRE.)
860 */
861__private_extern__ int
862__collate_equiv_value(locale_t loc, const wchar_t *str, size_t len)
863{
864 int e;
865
866 if (len < 1 || len >= STR_LEN)
867 return -1;
868
869 /* POSIX locale */
870 if (loc->__collate_load_error)
871 return (len == 1 && *str <= UCHAR_MAX) ? *str : -1;
872
873 if (len == 1) {
874 e = -1;
875 if (*str <= UCHAR_MAX)
876 e = __collate_char_pri_table[*str].pri[0];
877 else if (__collate_info->large_pri_count > 0) {
878 struct __collate_st_large_char_pri *match;
879 match = largesearch(*str, loc);
880 if (match)
881 e = match->pri.pri[0];
882 }
883 if (e == 0)
884 return IGNORE_EQUIV_CLASS;
885 return e > 0 ? e : 0;
886 }
887 if (__collate_info->chain_count > 0) {
888 wchar_t name[STR_LEN];
889 struct __collate_st_chain_pri *match;
890 int ll;
891
892 wcsncpy(name, str, len);
893 name[len] = 0;
894 match = chainsearch(name, &ll, loc);
895 if (match) {
896 e = match->pri[0];
897 if (e == 0)
898 return IGNORE_EQUIV_CLASS;
899 return e < 0 ? -e : e;
900 }
901 }
902 return 0;
903}
904
905#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
906static void
907wntohl(wchar_t *str, int len)
908{
909 for(; *str && len > 0; str++, len--)
910 *str = ntohl(*str);
911}
912#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
913
9385eb3d 914#ifdef COLLATE_DEBUG
ad3c9f2a
A
915static char *
916show(int c)
917{
918 static char buf[5];
919
920 if (c >=32 && c <= 126)
921 sprintf(buf, "'%c' ", c);
922 else
923 sprintf(buf, "\\x{%02x}", c);
924 return buf;
925}
926
927static char *
928showwcs(const wchar_t *t, int len)
929{
930 static char buf[64];
931 char *cp = buf;
932
933 for(; *t && len > 0; len--, t++) {
934 if (*t >=32 && *t <= 126)
935 *cp++ = *t;
936 else {
937 sprintf(cp, "\\x{%02x}", *t);
938 cp += strlen(cp);
939 }
940 }
941 *cp = 0;
942 return buf;
943}
944
9385eb3d
A
945void
946__collate_print_tables()
947{
ad3c9f2a
A
948 int i, z;
949 locale_t loc = __current_locale();
9385eb3d 950
ad3c9f2a
A
951 printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
952 __collate_info->directive[0], __collate_info->directive[1],
953 __collate_info->flags, __collate_info->chain_max_len,
954 __collate_info->directive_count,
955 __collate_info->undef_pri[0], __collate_info->undef_pri[1],
956 __collate_info->subst_count[0], __collate_info->subst_count[1],
957 __collate_info->chain_count, __collate_info->large_pri_count);
958 for(z = 0; z < __collate_info->directive_count; z++) {
959 if (__collate_info->subst_count[z] > 0) {
960 struct __collate_st_subst *p2 = __collate_substitute_table[z];
961 if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP))
962 printf("Both substitute tables:\n");
963 else
964 printf("Substitute table %d:\n", z);
965 for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
966 printf("\t%s --> \"%s\"\n",
967 show(p2->val),
968 showwcs(p2->str, STR_LEN));
969 }
970 }
971 if (__collate_info->chain_count > 0) {
972 printf("Chain priority table:\n");
973 struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
974 for (i = __collate_info->chain_count; i-- > 0; p2++) {
975 printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
976 for(z = 0; z < __collate_info->directive_count; z++)
977 printf(" %d", p2->pri[z]);
978 putchar('\n');
979 }
980 }
9385eb3d 981 printf("Char priority table:\n");
ad3c9f2a
A
982 {
983 struct __collate_st_char_pri *p2 = __collate_char_pri_table;
984 for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
985 printf("\t%s :", show(i));
986 for(z = 0; z < __collate_info->directive_count; z++)
987 printf(" %d", p2->pri[z]);
988 putchar('\n');
989 }
990 }
991 if (__collate_info->large_pri_count > 0) {
992 struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
993 printf("Large priority table:\n");
994 for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
995 printf("\t%s :", show(p2->val));
996 for(z = 0; z < __collate_info->directive_count; z++)
997 printf(" %d", p2->pri.pri[z]);
998 putchar('\n');
999 }
1000 }
9385eb3d
A
1001}
1002#endif