]> git.saurik.com Git - apple/libc.git/blame - locale/FreeBSD/collate.c
Libc-1353.100.2.tar.gz
[apple/libc.git] / locale / FreeBSD / collate.c
CommitLineData
9385eb3d
A
1/*-
2 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3 * at Electronni Visti IA, Kiev, Ukraine.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
1f2f436a 29__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $");
9385eb3d 30
ad3c9f2a
A
31#include "xlocale_private.h"
32/* assumes the locale_t variable is named loc */
33#define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table)
34#define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table)
35#define __collate_char_pri_table (loc->__lc_collate->__char_pri_table)
36#define __collate_info (&loc->__lc_collate->__info)
37#define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table)
38#define __collate_substitute_table (loc->__lc_collate->__substitute_table)
39
9385eb3d
A
40#include "namespace.h"
41#include <arpa/inet.h>
42#include <stdio.h>
43#include <stdlib.h>
ad3c9f2a 44#include <stddef.h>
9385eb3d 45#include <string.h>
ad3c9f2a 46#include <wchar.h>
9385eb3d
A
47#include <errno.h>
48#include <unistd.h>
49#include <sysexits.h>
ad3c9f2a 50#include <ctype.h>
9385eb3d
A
51#include "un-namespace.h"
52
53#include "collate.h"
54#include "setlocale.h"
55#include "ldpart.h"
56
57#include "libc_private.h"
58
ad3c9f2a
A
59#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
60static void wntohl(wchar_t *, int);
61#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
9385eb3d
A
62void __collate_err(int ex, const char *f) __dead2;
63
ad3c9f2a
A
64/*
65 * Normally, the __collate_* routines should all be __private_extern__,
66 * but grep is using them (3715846). Until we can provide an alternative,
67 * we leave them public, and provide a read-only __collate_load_error variable
68 */
69#undef __collate_load_error
70int __collate_load_error = 1;
71
72__private_extern__ int
73__collate_load_tables(const char *encoding, locale_t loc)
9385eb3d 74{
b061a43b 75 int fd;
9385eb3d 76 FILE *fp;
ad3c9f2a 77 int i, saverr, chains, z;
9385eb3d 78 char strbuf[STR_LEN], buf[PATH_MAX];
ad3c9f2a
A
79 struct __xlocale_st_collate *TMP;
80 static struct __xlocale_st_collate *cache = NULL;
81 struct __collate_st_info info;
82 void *vp;
9385eb3d
A
83
84 /* 'encoding' must be already checked. */
85 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
ad3c9f2a
A
86 loc->__collate_load_error = 1;
87 if (loc == &__global_locale)
88 __collate_load_error = 1;
89 XL_RELEASE(loc->__lc_collate);
90 loc->__lc_collate = NULL;
9385eb3d
A
91 return (_LDP_CACHE);
92 }
93
94 /*
95 * If the locale name is the same as our cache, use the cache.
96 */
ad3c9f2a
A
97 if (cache && strcmp(encoding, cache->__encoding) == 0) {
98 loc->__collate_load_error = 0;
99 if (loc == &__global_locale)
100 __collate_load_error = 0;
101 XL_RELEASE(loc->__lc_collate);
102 loc->__lc_collate = cache;
103 XL_RETAIN(loc->__lc_collate);
9385eb3d
A
104 return (_LDP_CACHE);
105 }
106
107 /*
108 * Slurp the locale file into the cache.
109 */
110
111 /* 'PathLocale' must be already set & checked. */
112 /* Range checking not needed, encoding has fixed size */
974e3884 113 (void)strcpy(buf, encoding);
9385eb3d 114 (void)strcat(buf, "/LC_COLLATE");
b061a43b
A
115 fd = __open_path_locale(buf);
116 if (fd == -1) {
117 return (_LDP_ERROR);
118 }
119 if ((fp = fdopen(fd, "r")) == NULL) {
120 close(fd);
9385eb3d 121 return (_LDP_ERROR);
974e3884 122 }
9385eb3d
A
123
124 if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
125 saverr = errno;
126 (void)fclose(fp);
127 errno = saverr;
128 return (_LDP_ERROR);
129 }
130 chains = -1;
ad3c9f2a 131 if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0)
9385eb3d
A
132 chains = 1;
133 if (chains < 0) {
134 (void)fclose(fp);
135 errno = EFTYPE;
136 return (_LDP_ERROR);
137 }
138 if (chains) {
ad3c9f2a 139 if (fread(&info, sizeof(info), 1, fp) != 1) {
9385eb3d
A
140 saverr = errno;
141 (void)fclose(fp);
142 errno = saverr;
143 return (_LDP_ERROR);
144 }
ad3c9f2a
A
145#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
146 for(z = 0; z < info.directive_count; z++) {
147 info.undef_pri[z] = ntohl(info.undef_pri[z]);
148 info.subst_count[z] = ntohl(info.subst_count[z]);
149 }
150 info.chain_count = ntohl(info.chain_count);
151 info.large_pri_count = ntohl(info.large_pri_count);
152#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
153 if ((chains = info.chain_count) < 0) {
9385eb3d
A
154 (void)fclose(fp);
155 errno = EFTYPE;
156 return (_LDP_ERROR);
157 }
158 } else
159 chains = TABLE_SIZE;
160
ad3c9f2a
A
161 i = sizeof(struct __xlocale_st_collate)
162 + sizeof(struct __collate_st_chain_pri) * chains
163 + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
164 for(z = 0; z < info.directive_count; z++)
165 i += sizeof(struct __collate_st_subst) * info.subst_count[z];
166 if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) {
9385eb3d
A
167 saverr = errno;
168 (void)fclose(fp);
169 errno = saverr;
170 return (_LDP_ERROR);
171 }
ad3c9f2a
A
172 TMP->__refcount = 2; /* one for the locale, one for the cache */
173 TMP->__free_extra = NULL;
9385eb3d
A
174
175#define FREAD(a, b, c, d) \
176{ \
177 if (fread(a, b, c, d) != c) { \
178 saverr = errno; \
ad3c9f2a 179 free(TMP); \
9385eb3d
A
180 (void)fclose(d); \
181 errno = saverr; \
182 return (_LDP_ERROR); \
183 } \
184}
185
ad3c9f2a
A
186 /* adjust size to read the remaining in one chunk */
187 i -= offsetof(struct __xlocale_st_collate, __char_pri_table);
188 FREAD(TMP->__char_pri_table, i, 1, fp);
9385eb3d
A
189 (void)fclose(fp);
190
ad3c9f2a
A
191 vp = (void *)(TMP + 1);
192
193 /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */
194 if (info.subst_count[0] > 0) {
195 TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
196 vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
197 } else
198 TMP->__substitute_table[0] = NULL;
199 if (info.flags & COLLATE_SUBST_DUP)
200 TMP->__substitute_table[1] = TMP->__substitute_table[0];
201 else if (info.subst_count[1] > 0) {
202 TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
203 vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
204 } else
205 TMP->__substitute_table[1] = NULL;
206
207 if (chains > 0) {
208 TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
209 vp += chains * sizeof(struct __collate_st_chain_pri);
210 } else
211 TMP->__chain_pri_table = NULL;
212 if (info.large_pri_count > 0)
213 TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp;
214 else
215 TMP->__large_char_pri_table = NULL;
216
217#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
218 {
219 struct __collate_st_char_pri *p = TMP->__char_pri_table;
220 for(i = UCHAR_MAX + 1; i-- > 0; p++) {
221 for(z = 0; z < info.directive_count; z++)
222 p->pri[z] = ntohl(p->pri[z]);
223 }
224 }
225 for(z = 0; z < info.directive_count; z++)
226 if (info.subst_count[z] > 0) {
227 struct __collate_st_subst *p = TMP->__substitute_table[z];
228 for(i = info.subst_count[z]; i-- > 0; p++) {
229 p->val = ntohl(p->val);
230 wntohl(p->str, STR_LEN);
231 }
232 }
233 {
234 struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
235 for(i = chains; i-- > 0; p++) {
236 wntohl(p->str, STR_LEN);
237 for(z = 0; z < info.directive_count; z++)
238 p->pri[z] = ntohl(p->pri[z]);
239 }
240 }
241 if (info.large_pri_count > 0) {
242 struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table;
243 for(i = info.large_pri_count; i-- > 0; p++) {
244 p->val = ntohl(p->val);
245 for(z = 0; z < info.directive_count; z++)
246 p->pri.pri[z] = ntohl(p->pri.pri[z]);
9385eb3d
A
247 }
248 }
ad3c9f2a
A
249#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
250 (void)strcpy(TMP->__encoding, encoding);
251 (void)memcpy(&TMP->__info, &info, sizeof(info));
252 XL_RELEASE(cache);
253 cache = TMP;
254 XL_RELEASE(loc->__lc_collate);
255 loc->__lc_collate = cache;
256 /* no need to retain, since we set __refcount to 2 above */
257
258 loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0);
259 loc->__collate_load_error = 0;
260 if (loc == &__global_locale)
261 __collate_load_error = 0;
9385eb3d
A
262
263 return (_LDP_LOADED);
264}
265
ad3c9f2a
A
266static int
267__collate_wcsnlen(const wchar_t *s, int len)
268{
269 int n = 0;
270 while (*s && n < len) {
271 s++;
272 n++;
273 }
274 return n;
275}
276
277static struct __collate_st_subst *
278substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
279{
280 int low = 0;
281 int high = n - 1;
282 int next, compar;
283 struct __collate_st_subst *p;
284
285 while (low <= high) {
286 next = (low + high) / 2;
287 p = tab + next;
288 compar = key - p->val;
289 if (compar == 0)
290 return p;
291 if (compar > 0)
292 low = next + 1;
293 else
294 high = next - 1;
295 }
296 return NULL;
297}
298
299__private_extern__ wchar_t *
300__collate_substitute(const wchar_t *s, int which, locale_t loc)
9385eb3d
A
301{
302 int dest_len, len, nlen;
ad3c9f2a
A
303 int n, delta, nsubst;
304 wchar_t *dest_str = NULL;
305 const wchar_t *fp;
306 struct __collate_st_subst *subst, *match;
9385eb3d
A
307
308 if (s == NULL || *s == '\0')
ad3c9f2a
A
309 return (__collate_wcsdup(L""));
310 dest_len = wcslen(s);
311 nsubst = __collate_info->subst_count[which];
312 if (nsubst <= 0)
313 return __collate_wcsdup(s);
314 subst = __collate_substitute_table[which];
315 delta = dest_len / 4;
316 if (delta < 2)
317 delta = 2;
318 dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
9385eb3d 319 if (dest_str == NULL)
3d9156a7 320 __collate_err(EX_OSERR, __func__);
9385eb3d
A
321 len = 0;
322 while (*s) {
ad3c9f2a
A
323 if ((match = substsearch(*s, subst, nsubst)) != NULL) {
324 fp = match->str;
325 n = __collate_wcsnlen(fp, STR_LEN);
326 } else {
327 fp = s;
328 n = 1;
329 }
330 nlen = len + n;
9385eb3d 331 if (dest_len <= nlen) {
ad3c9f2a 332 dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
9385eb3d 333 if (dest_str == NULL)
3d9156a7 334 __collate_err(EX_OSERR, __func__);
9385eb3d 335 }
ad3c9f2a
A
336 wcsncpy(dest_str + len, fp, n);
337 len += n;
338 s++;
9385eb3d 339 }
ad3c9f2a 340 dest_str[len] = 0;
9385eb3d
A
341 return (dest_str);
342}
343
ad3c9f2a
A
344static struct __collate_st_chain_pri *
345chainsearch(const wchar_t *key, int *len, locale_t loc)
346{
347 int low = 0;
348 int high = __collate_info->chain_count - 1;
349 int next, compar, l;
350 struct __collate_st_chain_pri *p;
351 struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
352
353 while (low <= high) {
354 next = (low + high) / 2;
355 p = tab + next;
356 compar = *key - *p->str;
357 if (compar == 0) {
358 l = __collate_wcsnlen(p->str, STR_LEN);
359 compar = wcsncmp(key, p->str, l);
360 if (compar == 0) {
361 *len = l;
362 return p;
363 }
364 }
365 if (compar > 0)
366 low = next + 1;
367 else
368 high = next - 1;
369 }
370 return NULL;
371}
372
373static struct __collate_st_large_char_pri *
374largesearch(const wchar_t key, locale_t loc)
375{
376 int low = 0;
377 int high = __collate_info->large_pri_count - 1;
378 int next, compar;
379 struct __collate_st_large_char_pri *p;
380 struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
381
382 while (low <= high) {
383 next = (low + high) / 2;
384 p = tab + next;
385 compar = key - p->val;
386 if (compar == 0)
387 return p;
388 if (compar > 0)
389 low = next + 1;
390 else
391 high = next - 1;
392 }
393 return NULL;
394}
395
396__private_extern__ void
397__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc)
9385eb3d
A
398{
399 struct __collate_st_chain_pri *p2;
ad3c9f2a 400 int l;
9385eb3d
A
401
402 *len = 1;
403 *prim = *sec = 0;
ad3c9f2a
A
404 p2 = chainsearch(t, &l, loc);
405 /* use the chain if prim >= 0 */
406 if (p2 && p2->pri[0] >= 0) {
407 *len = l;
408 *prim = p2->pri[0];
409 *sec = p2->pri[1];
410 return;
411 }
412 if (*t <= UCHAR_MAX) {
413 *prim = __collate_char_pri_table[*t].pri[0];
414 *sec = __collate_char_pri_table[*t].pri[1];
415 return;
416 }
417 if (__collate_info->large_pri_count > 0) {
418 struct __collate_st_large_char_pri *match;
419 match = largesearch(*t, loc);
420 if (match) {
421 *prim = match->pri.pri[0];
422 *sec = match->pri.pri[1];
423 return;
424 }
425 }
426 *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
427 *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
428}
429
430/*
431 * This is only provided for programs (like grep) that are calling this
432 * private function. This will go away eventually.
433 */
434void
435__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec)
436{
437 locale_t loc = __current_locale();
438 wchar_t *w = __collate_mbstowcs((const char *)t, loc);
439 int sverrno;
440
441 __collate_lookup_l(w, len, prim, sec, loc);
442 sverrno = errno;
443 free(w);
444 errno = sverrno;
445}
446
447__private_extern__ void
448__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc)
449{
450 struct __collate_st_chain_pri *p2;
451 int p, l;
452
453 *len = 1;
454 *pri = 0;
455 p2 = chainsearch(t, &l, loc);
456 if (p2) {
457 p = p2->pri[which];
458 /* use the chain if pri >= 0 */
459 if (p >= 0) {
460 *len = l;
461 *pri = p;
462 return;
463 }
464 }
465 if (*t <= UCHAR_MAX) {
466 *pri = __collate_char_pri_table[*t].pri[which];
467 return;
468 }
469 if (__collate_info->large_pri_count > 0) {
470 struct __collate_st_large_char_pri *match;
471 match = largesearch(*t, loc);
472 if (match) {
473 *pri = match->pri.pri[which];
9385eb3d
A
474 return;
475 }
476 }
ad3c9f2a 477 *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
9385eb3d
A
478}
479
ad3c9f2a
A
480__private_extern__ wchar_t *
481__collate_mbstowcs(const char *s, locale_t loc)
9385eb3d 482{
ad3c9f2a
A
483 static const mbstate_t initial;
484 mbstate_t st;
485 size_t len;
486 const char *ss;
487 wchar_t *wcs;
9385eb3d 488
ad3c9f2a
A
489 ss = s;
490 st = initial;
491 if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1)
492 return NULL;
493 if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
3d9156a7 494 __collate_err(EX_OSERR, __func__);
ad3c9f2a
A
495 st = initial;
496 mbsrtowcs_l(wcs, &s, len, &st, loc);
497 wcs[len] = 0;
498
499 return (wcs);
9385eb3d
A
500}
501
ad3c9f2a
A
502__private_extern__ wchar_t *
503__collate_wcsdup(const wchar_t *s)
504{
505 size_t len = wcslen(s) + 1;
506 wchar_t *wcs;
507
508 if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
509 __collate_err(EX_OSERR, __func__);
510 wcscpy(wcs, s);
511 return (wcs);
512}
513
514__private_extern__ void
515__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc)
516{
517 int pri, len;
518 size_t slen;
519 const wchar_t *t;
520 wchar_t *tt = NULL, *tr = NULL;
521 int direc, pass;
522 wchar_t *xfp;
523 struct __collate_st_info *info = __collate_info;
524 int sverrno;
525
526 for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
527 xf[pass] = NULL;
528 for(pass = 0; pass < info->directive_count; pass++) {
529 direc = info->directive[pass];
530 if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
531 sverrno = errno;
532 free(tt);
533 errno = sverrno;
534 tt = __collate_substitute(src, pass, loc);
535 }
536 if (direc & DIRECTIVE_BACKWARD) {
537 wchar_t *bp, *fp, c;
538 sverrno = errno;
539 free(tr);
540 errno = sverrno;
541 tr = __collate_wcsdup(tt ? tt : src);
542 bp = tr;
543 fp = tr + wcslen(tr) - 1;
544 while(bp < fp) {
545 c = *bp;
546 *bp++ = *fp;
547 *fp-- = c;
548 }
549 t = (const wchar_t *)tr;
550 } else if (tt)
551 t = (const wchar_t *)tt;
552 else
553 t = (const wchar_t *)src;
554 sverrno = errno;
555 if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) {
556 errno = sverrno;
557 slen = 0;
558 goto end;
559 }
560 errno = sverrno;
561 xfp = xf[pass];
562 if (direc & DIRECTIVE_POSITION) {
563 while(*t) {
564 __collate_lookup_which(t, &len, &pri, pass, loc);
565 t += len;
566 if (pri <= 0) {
567 if (pri < 0) {
568 errno = EINVAL;
569 slen = 0;
570 goto end;
571 }
572 pri = COLLATE_MAX_PRIORITY;
573 }
574 *xfp++ = pri;
575 }
576 } else {
577 while(*t) {
578 __collate_lookup_which(t, &len, &pri, pass, loc);
579 t += len;
580 if (pri <= 0) {
581 if (pri < 0) {
582 errno = EINVAL;
583 slen = 0;
584 goto end;
585 }
586 continue;
587 }
588 *xfp++ = pri;
589 }
590 }
591 *xfp = 0;
592 }
593 end:
594 sverrno = errno;
595 free(tt);
596 free(tr);
597 errno = sverrno;
598}
599
600__private_extern__ void
9385eb3d
A
601__collate_err(int ex, const char *f)
602{
603 const char *s;
604 int serrno = errno;
605
606 s = _getprogname();
607 _write(STDERR_FILENO, s, strlen(s));
608 _write(STDERR_FILENO, ": ", 2);
609 s = f;
610 _write(STDERR_FILENO, s, strlen(s));
611 _write(STDERR_FILENO, ": ", 2);
612 s = strerror(serrno);
613 _write(STDERR_FILENO, s, strlen(s));
614 _write(STDERR_FILENO, "\n", 1);
615 exit(ex);
616}
617
ad3c9f2a
A
618/*
619 * __collate_collating_symbol takes the multibyte string specified by
620 * src and slen, and using ps, converts that to a wide character. Then
621 * it is checked to verify it is a collating symbol, and then copies
622 * it to the wide character string specified by dst and dlen (the
623 * results are not null terminated). The length of the wide characters
624 * copied to dst is returned if successful. Zero is returned if no such
625 * collating symbol exists. (size_t)-1 is returned if there are wide-character
626 * conversion errors, if the length of the converted string is greater that
627 * STR_LEN or if dlen is too small. It is up to the calling routine to
628 * preserve the mbstate_t structure as needed.
629 */
630__private_extern__ size_t
631__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc)
632{
633 wchar_t wname[STR_LEN];
634 wchar_t w, *wp;
635 size_t len, l;
636
637 /* POSIX locale */
638 if (loc->__collate_load_error) {
639 if (dlen < 1)
640 return (size_t)-1;
641 if (slen != 1 || !isascii(*src))
642 return 0;
643 *dst = *src;
644 return 1;
645 }
646 for(wp = wname, len = 0; slen > 0; len++) {
647 l = mbrtowc_l(&w, src, slen, ps, loc);
648 if (l == (size_t)-1 || l == (size_t)-2)
649 return (size_t)-1;
650 if (l == 0)
651 break;
652 if (len >= STR_LEN)
653 return -1;
654 *wp++ = w;
655 src += l;
656 slen = (long)slen - (long)l;
657 }
658 if (len == 0 || len > dlen)
659 return (size_t)-1;
660 if (len == 1) {
661 if (*wname <= UCHAR_MAX) {
662 if (__collate_char_pri_table[*wname].pri[0] >= 0) {
663 if (dlen > 0)
664 *dst = *wname;
665 return 1;
666 }
667 return 0;
668 } else if (__collate_info->large_pri_count > 0) {
669 struct __collate_st_large_char_pri *match;
670 match = largesearch(*wname, loc);
671 if (match && match->pri.pri[0] >= 0) {
672 if (dlen > 0)
673 *dst = *wname;
674 return 1;
675 }
676 }
677 return 0;
678 }
679 *wp = 0;
680 if (__collate_info->chain_count > 0) {
681 struct __collate_st_chain_pri *match;
682 int ll;
683 match = chainsearch(wname, &ll, loc);
684 if (match) {
685 if (ll < dlen)
686 dlen = ll;
687 wcsncpy(dst, wname, dlen);
688 return ll;
689 }
690 }
691 return 0;
692}
693
694/*
695 * __collate_equiv_class returns the equivalence class number for the symbol
696 * specified by src and slen, using ps to convert from multi-byte to wide
697 * character. Zero is returned if the symbol is not in an equivalence
698 * class. -1 is returned if there are wide character conversion error,
699 * if there are any greater-than-8-bit characters or if a multi-byte symbol
700 * is greater or equal to STR_LEN in length. It is up to the calling
701 * routine to preserve the mbstate_t structure as needed.
702 */
703__private_extern__ int
704__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc)
705{
706 wchar_t wname[STR_LEN];
707 wchar_t w, *wp;
708 size_t len, l;
709 int e;
710
711 /* POSIX locale */
712 if (loc->__collate_load_error)
713 return 0;
714 for(wp = wname, len = 0; slen > 0; len++) {
715 l = mbrtowc_l(&w, src, slen, ps, loc);
716 if (l == (size_t)-1 || l == (size_t)-2)
717 return -1;
718 if (l == 0)
719 break;
720 if (len >= STR_LEN)
721 return -1;
722 *wp++ = w;
723 src += l;
724 slen = (long)slen - (long)l;
725 }
726 if (len == 0)
727 return -1;
728 if (len == 1) {
729 e = -1;
730 if (*wname <= UCHAR_MAX)
731 e = __collate_char_pri_table[*wname].pri[0];
732 else if (__collate_info->large_pri_count > 0) {
733 struct __collate_st_large_char_pri *match;
734 match = largesearch(*wname, loc);
735 if (match)
736 e = match->pri.pri[0];
737 }
738 if (e == 0)
739 return IGNORE_EQUIV_CLASS;
740 return e > 0 ? e : 0;
741 }
742 *wp = 0;
743 if (__collate_info->chain_count > 0) {
744 struct __collate_st_chain_pri *match;
745 int ll;
746 match = chainsearch(wname, &ll, loc);
747 if (match) {
748 e = match->pri[0];
749 if (e == 0)
750 return IGNORE_EQUIV_CLASS;
751 return e < 0 ? -e : e;
752 }
753 }
754 return 0;
755}
756
757/*
758 * __collate_equiv_match tries to match any single or multi-character symbol
759 * in equivalence class equiv_class in the multi-byte string specified by src
760 * and slen. If start is non-zero, it is taken to be the first (pre-converted)
761 * wide character. Subsequence wide characters, if needed, will use ps in
762 * the conversion. On a successful match, the length of the matched string
763 * is returned (including the start character). If dst is non-NULL, the
764 * matched wide-character string is copied to dst, a wide character array of
765 * length dlen (the results are not zero-terminated). If rlen is non-NULL,
766 * the number of character in src actually used is returned. Zero is
767 * returned by __collate_equiv_match if there is no match. (size_t)-1 is
768 * returned on error: if there were conversion errors or if dlen is too small
769 * to accept the results. On no match or error, ps is restored to its incoming
770 * state.
771 */
772size_t
773__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc)
774{
775 wchar_t w;
776 size_t len, l, clen;
777 int i;
778 wchar_t buf[STR_LEN], *wp;
779 mbstate_t save;
780 const char *s = src;
781 size_t sl = slen;
782 struct __collate_st_chain_pri *ch = NULL;
783
784 /* POSIX locale */
785 if (loc->__collate_load_error)
786 return (size_t)-1;
787 if (equiv_class == IGNORE_EQUIV_CLASS)
788 equiv_class = 0;
789 if (ps)
790 save = *ps;
791 wp = buf;
792 len = clen = 0;
793 if (start) {
794 *wp++ = start;
795 len = 1;
796 }
797 /* convert up to the max chain length */
798 while(sl > 0 && len < __collate_info->chain_max_len) {
799 l = mbrtowc_l(&w, s, sl, ps, loc);
800 if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
801 break;
802 *wp++ = w;
803 s += l;
804 clen += l;
805 sl -= l;
806 len++;
807 }
808 *wp = 0;
809 if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) {
810 int e = ch->pri[0];
811 if (e < 0)
812 e = -e;
813 if (e == equiv_class)
814 goto found;
815 }
816 /* try single character */
817 i = 1;
818 if (*buf <= UCHAR_MAX) {
819 if (equiv_class == __collate_char_pri_table[*buf].pri[0])
820 goto found;
821 } else if (__collate_info->large_pri_count > 0) {
822 struct __collate_st_large_char_pri *match;
823 match = largesearch(*buf, loc);
824 if (match && equiv_class == match->pri.pri[0])
825 goto found;
826 }
827 /* no match */
828 if (ps)
829 *ps = save;
830 return 0;
831found:
832 /* if we converted more than we used, restore to initial and reconvert
833 * up to what did match */
834 if (i < len) {
835 len = i;
836 if (ps)
837 *ps = save;
838 if (start)
839 i--;
840 clen = 0;
841 while(i-- > 0) {
842 l = mbrtowc_l(&w, src, slen, ps, loc);
843 src += l;
844 clen += l;
845 slen -= l;
846 }
847 }
848 if (dst) {
849 if (dlen < len) {
850 if (ps)
851 *ps = save;
852 return (size_t)-1;
853 }
854 for(wp = buf; len > 0; len--)
855 *dst++ = *wp++;
856 }
857 if (rlen)
858 *rlen = clen;
859 return len;
860}
861
862/*
863 * __collate_equiv_value returns the primary collation value for the given
864 * collating symbol specified by str and len. Zero or negative is return
865 * if the collating symbol was not found. (Use by the bracket code in TRE.)
866 */
867__private_extern__ int
868__collate_equiv_value(locale_t loc, const wchar_t *str, size_t len)
869{
870 int e;
871
872 if (len < 1 || len >= STR_LEN)
873 return -1;
874
875 /* POSIX locale */
876 if (loc->__collate_load_error)
877 return (len == 1 && *str <= UCHAR_MAX) ? *str : -1;
878
879 if (len == 1) {
880 e = -1;
881 if (*str <= UCHAR_MAX)
882 e = __collate_char_pri_table[*str].pri[0];
883 else if (__collate_info->large_pri_count > 0) {
884 struct __collate_st_large_char_pri *match;
885 match = largesearch(*str, loc);
886 if (match)
887 e = match->pri.pri[0];
888 }
889 if (e == 0)
890 return IGNORE_EQUIV_CLASS;
891 return e > 0 ? e : 0;
892 }
893 if (__collate_info->chain_count > 0) {
894 wchar_t name[STR_LEN];
895 struct __collate_st_chain_pri *match;
896 int ll;
897
898 wcsncpy(name, str, len);
899 name[len] = 0;
900 match = chainsearch(name, &ll, loc);
901 if (match) {
902 e = match->pri[0];
903 if (e == 0)
904 return IGNORE_EQUIV_CLASS;
905 return e < 0 ? -e : e;
906 }
907 }
908 return 0;
909}
910
911#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
912static void
913wntohl(wchar_t *str, int len)
914{
915 for(; *str && len > 0; str++, len--)
916 *str = ntohl(*str);
917}
918#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
919
9385eb3d 920#ifdef COLLATE_DEBUG
ad3c9f2a
A
921static char *
922show(int c)
923{
924 static char buf[5];
925
926 if (c >=32 && c <= 126)
927 sprintf(buf, "'%c' ", c);
928 else
929 sprintf(buf, "\\x{%02x}", c);
930 return buf;
931}
932
933static char *
934showwcs(const wchar_t *t, int len)
935{
936 static char buf[64];
937 char *cp = buf;
938
939 for(; *t && len > 0; len--, t++) {
940 if (*t >=32 && *t <= 126)
941 *cp++ = *t;
942 else {
943 sprintf(cp, "\\x{%02x}", *t);
944 cp += strlen(cp);
945 }
946 }
947 *cp = 0;
948 return buf;
949}
950
9385eb3d
A
951void
952__collate_print_tables()
953{
ad3c9f2a
A
954 int i, z;
955 locale_t loc = __current_locale();
9385eb3d 956
ad3c9f2a
A
957 printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
958 __collate_info->directive[0], __collate_info->directive[1],
959 __collate_info->flags, __collate_info->chain_max_len,
960 __collate_info->directive_count,
961 __collate_info->undef_pri[0], __collate_info->undef_pri[1],
962 __collate_info->subst_count[0], __collate_info->subst_count[1],
963 __collate_info->chain_count, __collate_info->large_pri_count);
964 for(z = 0; z < __collate_info->directive_count; z++) {
965 if (__collate_info->subst_count[z] > 0) {
966 struct __collate_st_subst *p2 = __collate_substitute_table[z];
967 if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP))
968 printf("Both substitute tables:\n");
969 else
970 printf("Substitute table %d:\n", z);
971 for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
972 printf("\t%s --> \"%s\"\n",
973 show(p2->val),
974 showwcs(p2->str, STR_LEN));
975 }
976 }
977 if (__collate_info->chain_count > 0) {
978 printf("Chain priority table:\n");
979 struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
980 for (i = __collate_info->chain_count; i-- > 0; p2++) {
981 printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
982 for(z = 0; z < __collate_info->directive_count; z++)
983 printf(" %d", p2->pri[z]);
984 putchar('\n');
985 }
986 }
9385eb3d 987 printf("Char priority table:\n");
ad3c9f2a
A
988 {
989 struct __collate_st_char_pri *p2 = __collate_char_pri_table;
990 for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
991 printf("\t%s :", show(i));
992 for(z = 0; z < __collate_info->directive_count; z++)
993 printf(" %d", p2->pri[z]);
994 putchar('\n');
995 }
996 }
997 if (__collate_info->large_pri_count > 0) {
998 struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
999 printf("Large priority table:\n");
1000 for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
1001 printf("\t%s :", show(p2->val));
1002 for(z = 0; z < __collate_info->directive_count; z++)
1003 printf(" %d", p2->pri.pri[z]);
1004 putchar('\n');
1005 }
1006 }
9385eb3d
A
1007}
1008#endif