]> git.saurik.com Git - apple/libc.git/blame - locale/FreeBSD/collate.c
Libc-1439.100.3.tar.gz
[apple/libc.git] / locale / FreeBSD / collate.c
CommitLineData
9385eb3d
A
1/*-
2 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
3 * at Electronni Visti IA, Kiev, Ukraine.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
1f2f436a 29__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $");
9385eb3d 30
ad3c9f2a
A
31#include "xlocale_private.h"
32/* assumes the locale_t variable is named loc */
33#define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table)
34#define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table)
35#define __collate_char_pri_table (loc->__lc_collate->__char_pri_table)
36#define __collate_info (&loc->__lc_collate->__info)
37#define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table)
38#define __collate_substitute_table (loc->__lc_collate->__substitute_table)
39
9385eb3d
A
40#include "namespace.h"
41#include <arpa/inet.h>
42#include <stdio.h>
43#include <stdlib.h>
ad3c9f2a 44#include <stddef.h>
9385eb3d 45#include <string.h>
ad3c9f2a 46#include <wchar.h>
9385eb3d
A
47#include <errno.h>
48#include <unistd.h>
49#include <sysexits.h>
ad3c9f2a 50#include <ctype.h>
9385eb3d
A
51#include "un-namespace.h"
52
53#include "collate.h"
54#include "setlocale.h"
55#include "ldpart.h"
56
57#include "libc_private.h"
58
ad3c9f2a
A
59#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
60static void wntohl(wchar_t *, int);
61#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
9385eb3d
A
62void __collate_err(int ex, const char *f) __dead2;
63
ad3c9f2a
A
64/*
65 * Normally, the __collate_* routines should all be __private_extern__,
66 * but grep is using them (3715846). Until we can provide an alternative,
67 * we leave them public, and provide a read-only __collate_load_error variable
68 */
69#undef __collate_load_error
70int __collate_load_error = 1;
71
72__private_extern__ int
73__collate_load_tables(const char *encoding, locale_t loc)
9385eb3d 74{
b061a43b 75 int fd;
9385eb3d 76 FILE *fp;
ad3c9f2a 77 int i, saverr, chains, z;
9385eb3d 78 char strbuf[STR_LEN], buf[PATH_MAX];
ad3c9f2a
A
79 struct __xlocale_st_collate *TMP;
80 static struct __xlocale_st_collate *cache = NULL;
81 struct __collate_st_info info;
82 void *vp;
9385eb3d
A
83
84 /* 'encoding' must be already checked. */
85 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
ad3c9f2a
A
86 loc->__collate_load_error = 1;
87 if (loc == &__global_locale)
88 __collate_load_error = 1;
89 XL_RELEASE(loc->__lc_collate);
90 loc->__lc_collate = NULL;
9385eb3d
A
91 return (_LDP_CACHE);
92 }
93
94 /*
95 * If the locale name is the same as our cache, use the cache.
96 */
ad3c9f2a
A
97 if (cache && strcmp(encoding, cache->__encoding) == 0) {
98 loc->__collate_load_error = 0;
99 if (loc == &__global_locale)
100 __collate_load_error = 0;
101 XL_RELEASE(loc->__lc_collate);
102 loc->__lc_collate = cache;
103 XL_RETAIN(loc->__lc_collate);
9385eb3d
A
104 return (_LDP_CACHE);
105 }
106
107 /*
108 * Slurp the locale file into the cache.
109 */
110
111 /* 'PathLocale' must be already set & checked. */
112 /* Range checking not needed, encoding has fixed size */
974e3884 113 (void)strcpy(buf, encoding);
9385eb3d 114 (void)strcat(buf, "/LC_COLLATE");
b061a43b
A
115 fd = __open_path_locale(buf);
116 if (fd == -1) {
117 return (_LDP_ERROR);
118 }
119 if ((fp = fdopen(fd, "r")) == NULL) {
120 close(fd);
9385eb3d 121 return (_LDP_ERROR);
974e3884 122 }
9385eb3d
A
123
124 if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
125 saverr = errno;
126 (void)fclose(fp);
127 errno = saverr;
128 return (_LDP_ERROR);
129 }
130 chains = -1;
ad3c9f2a 131 if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0)
9385eb3d
A
132 chains = 1;
133 if (chains < 0) {
134 (void)fclose(fp);
135 errno = EFTYPE;
136 return (_LDP_ERROR);
137 }
138 if (chains) {
ad3c9f2a 139 if (fread(&info, sizeof(info), 1, fp) != 1) {
9385eb3d
A
140 saverr = errno;
141 (void)fclose(fp);
142 errno = saverr;
143 return (_LDP_ERROR);
144 }
ad3c9f2a
A
145#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
146 for(z = 0; z < info.directive_count; z++) {
147 info.undef_pri[z] = ntohl(info.undef_pri[z]);
148 info.subst_count[z] = ntohl(info.subst_count[z]);
149 }
150 info.chain_count = ntohl(info.chain_count);
151 info.large_pri_count = ntohl(info.large_pri_count);
152#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
153 if ((chains = info.chain_count) < 0) {
9385eb3d
A
154 (void)fclose(fp);
155 errno = EFTYPE;
156 return (_LDP_ERROR);
157 }
158 } else
159 chains = TABLE_SIZE;
160
ad3c9f2a
A
161 i = sizeof(struct __xlocale_st_collate)
162 + sizeof(struct __collate_st_chain_pri) * chains
163 + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
164 for(z = 0; z < info.directive_count; z++)
165 i += sizeof(struct __collate_st_subst) * info.subst_count[z];
166 if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) {
9385eb3d
A
167 saverr = errno;
168 (void)fclose(fp);
169 errno = saverr;
170 return (_LDP_ERROR);
171 }
ad3c9f2a
A
172 TMP->__refcount = 2; /* one for the locale, one for the cache */
173 TMP->__free_extra = NULL;
9385eb3d
A
174
175#define FREAD(a, b, c, d) \
176{ \
177 if (fread(a, b, c, d) != c) { \
178 saverr = errno; \
ad3c9f2a 179 free(TMP); \
9385eb3d
A
180 (void)fclose(d); \
181 errno = saverr; \
182 return (_LDP_ERROR); \
183 } \
184}
185
ad3c9f2a
A
186 /* adjust size to read the remaining in one chunk */
187 i -= offsetof(struct __xlocale_st_collate, __char_pri_table);
188 FREAD(TMP->__char_pri_table, i, 1, fp);
9385eb3d
A
189 (void)fclose(fp);
190
ad3c9f2a
A
191 vp = (void *)(TMP + 1);
192
193 /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */
194 if (info.subst_count[0] > 0) {
195 TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
196 vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
197 } else
198 TMP->__substitute_table[0] = NULL;
199 if (info.flags & COLLATE_SUBST_DUP)
200 TMP->__substitute_table[1] = TMP->__substitute_table[0];
201 else if (info.subst_count[1] > 0) {
202 TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
203 vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
204 } else
205 TMP->__substitute_table[1] = NULL;
206
207 if (chains > 0) {
208 TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
209 vp += chains * sizeof(struct __collate_st_chain_pri);
210 } else
211 TMP->__chain_pri_table = NULL;
212 if (info.large_pri_count > 0)
213 TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp;
214 else
215 TMP->__large_char_pri_table = NULL;
216
217#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
218 {
219 struct __collate_st_char_pri *p = TMP->__char_pri_table;
220 for(i = UCHAR_MAX + 1; i-- > 0; p++) {
221 for(z = 0; z < info.directive_count; z++)
222 p->pri[z] = ntohl(p->pri[z]);
223 }
224 }
225 for(z = 0; z < info.directive_count; z++)
226 if (info.subst_count[z] > 0) {
227 struct __collate_st_subst *p = TMP->__substitute_table[z];
228 for(i = info.subst_count[z]; i-- > 0; p++) {
229 p->val = ntohl(p->val);
230 wntohl(p->str, STR_LEN);
231 }
232 }
233 {
234 struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
235 for(i = chains; i-- > 0; p++) {
236 wntohl(p->str, STR_LEN);
237 for(z = 0; z < info.directive_count; z++)
238 p->pri[z] = ntohl(p->pri[z]);
239 }
240 }
241 if (info.large_pri_count > 0) {
242 struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table;
243 for(i = info.large_pri_count; i-- > 0; p++) {
244 p->val = ntohl(p->val);
245 for(z = 0; z < info.directive_count; z++)
246 p->pri.pri[z] = ntohl(p->pri.pri[z]);
9385eb3d
A
247 }
248 }
ad3c9f2a
A
249#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
250 (void)strcpy(TMP->__encoding, encoding);
251 (void)memcpy(&TMP->__info, &info, sizeof(info));
252 XL_RELEASE(cache);
253 cache = TMP;
254 XL_RELEASE(loc->__lc_collate);
255 loc->__lc_collate = cache;
256 /* no need to retain, since we set __refcount to 2 above */
257
258 loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0);
259 loc->__collate_load_error = 0;
260 if (loc == &__global_locale)
261 __collate_load_error = 0;
9385eb3d
A
262
263 return (_LDP_LOADED);
264}
265
ad3c9f2a
A
266static int
267__collate_wcsnlen(const wchar_t *s, int len)
268{
269 int n = 0;
270 while (*s && n < len) {
271 s++;
272 n++;
273 }
274 return n;
275}
276
277static struct __collate_st_subst *
278substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
279{
280 int low = 0;
281 int high = n - 1;
282 int next, compar;
283 struct __collate_st_subst *p;
284
285 while (low <= high) {
286 next = (low + high) / 2;
287 p = tab + next;
288 compar = key - p->val;
289 if (compar == 0)
290 return p;
291 if (compar > 0)
292 low = next + 1;
293 else
294 high = next - 1;
295 }
296 return NULL;
297}
298
299__private_extern__ wchar_t *
300__collate_substitute(const wchar_t *s, int which, locale_t loc)
9385eb3d
A
301{
302 int dest_len, len, nlen;
ad3c9f2a
A
303 int n, delta, nsubst;
304 wchar_t *dest_str = NULL;
305 const wchar_t *fp;
306 struct __collate_st_subst *subst, *match;
9385eb3d
A
307
308 if (s == NULL || *s == '\0')
ad3c9f2a
A
309 return (__collate_wcsdup(L""));
310 dest_len = wcslen(s);
311 nsubst = __collate_info->subst_count[which];
312 if (nsubst <= 0)
313 return __collate_wcsdup(s);
314 subst = __collate_substitute_table[which];
315 delta = dest_len / 4;
316 if (delta < 2)
317 delta = 2;
318 dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
9385eb3d 319 if (dest_str == NULL)
3d9156a7 320 __collate_err(EX_OSERR, __func__);
9385eb3d
A
321 len = 0;
322 while (*s) {
ad3c9f2a
A
323 if ((match = substsearch(*s, subst, nsubst)) != NULL) {
324 fp = match->str;
325 n = __collate_wcsnlen(fp, STR_LEN);
326 } else {
327 fp = s;
328 n = 1;
329 }
330 nlen = len + n;
9385eb3d 331 if (dest_len <= nlen) {
ad3c9f2a 332 dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
9385eb3d 333 if (dest_str == NULL)
3d9156a7 334 __collate_err(EX_OSERR, __func__);
9385eb3d 335 }
ad3c9f2a
A
336 wcsncpy(dest_str + len, fp, n);
337 len += n;
338 s++;
9385eb3d 339 }
ad3c9f2a 340 dest_str[len] = 0;
9385eb3d
A
341 return (dest_str);
342}
343
ad3c9f2a
A
344static struct __collate_st_chain_pri *
345chainsearch(const wchar_t *key, int *len, locale_t loc)
346{
347 int low = 0;
348 int high = __collate_info->chain_count - 1;
349 int next, compar, l;
350 struct __collate_st_chain_pri *p;
351 struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
352
353 while (low <= high) {
354 next = (low + high) / 2;
355 p = tab + next;
356 compar = *key - *p->str;
357 if (compar == 0) {
358 l = __collate_wcsnlen(p->str, STR_LEN);
359 compar = wcsncmp(key, p->str, l);
360 if (compar == 0) {
361 *len = l;
362 return p;
363 }
364 }
365 if (compar > 0)
366 low = next + 1;
367 else
368 high = next - 1;
369 }
370 return NULL;
371}
372
373static struct __collate_st_large_char_pri *
374largesearch(const wchar_t key, locale_t loc)
375{
376 int low = 0;
377 int high = __collate_info->large_pri_count - 1;
378 int next, compar;
379 struct __collate_st_large_char_pri *p;
380 struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
381
382 while (low <= high) {
383 next = (low + high) / 2;
384 p = tab + next;
385 compar = key - p->val;
386 if (compar == 0)
387 return p;
388 if (compar > 0)
389 low = next + 1;
390 else
391 high = next - 1;
392 }
393 return NULL;
394}
395
a9aaacca
A
396/*
397* This is provided for programs (like grep) that are calling this
398* private function. This is also used by wcscoll()
399*/
400void
ad3c9f2a 401__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc)
9385eb3d
A
402{
403 struct __collate_st_chain_pri *p2;
ad3c9f2a 404 int l;
9385eb3d 405
a9aaacca
A
406 if (!*t) {
407 *len = 0;
408 *prim = 0;
409 *sec = 0;
410 return;
411 }
412
413 NORMALIZE_LOCALE(loc);
414 if (loc->__collate_load_error) {
415 *len = 1;
416 *prim = *t;
417 *sec = 0;
418 return;
419 }
420
9385eb3d
A
421 *len = 1;
422 *prim = *sec = 0;
ad3c9f2a
A
423 p2 = chainsearch(t, &l, loc);
424 /* use the chain if prim >= 0 */
425 if (p2 && p2->pri[0] >= 0) {
426 *len = l;
427 *prim = p2->pri[0];
428 *sec = p2->pri[1];
429 return;
430 }
431 if (*t <= UCHAR_MAX) {
432 *prim = __collate_char_pri_table[*t].pri[0];
433 *sec = __collate_char_pri_table[*t].pri[1];
434 return;
435 }
436 if (__collate_info->large_pri_count > 0) {
437 struct __collate_st_large_char_pri *match;
438 match = largesearch(*t, loc);
439 if (match) {
440 *prim = match->pri.pri[0];
441 *sec = match->pri.pri[1];
442 return;
443 }
444 }
445 *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
446 *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
447}
448
449/*
a9aaacca
A
450 * This is also provided for programs (like grep) that are calling this
451 * private function - that do not perform their own multi-byte handling.
452 * This will go away eventually.
ad3c9f2a
A
453 */
454void
455__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec)
456{
457 locale_t loc = __current_locale();
a9aaacca 458 wchar_t *w = NULL;
ad3c9f2a
A
459 int sverrno;
460
a9aaacca
A
461 if (!*t) {
462 *len = 0;
463 *prim = 0;
464 *sec = 0;
465 return;
466 }
467
468 if (loc->__collate_load_error || (w = __collate_mbstowcs((const char *)t, loc)) == NULL) {
469 *len = 1;
470 *prim = (int)*t;
471 *sec = 0;
472
473 sverrno = errno;
474 free((void*)w);
475 errno = sverrno;
476 return;
477 }
478
ad3c9f2a
A
479 __collate_lookup_l(w, len, prim, sec, loc);
480 sverrno = errno;
481 free(w);
482 errno = sverrno;
483}
a9aaacca 484
ad3c9f2a
A
485__private_extern__ void
486__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc)
487{
488 struct __collate_st_chain_pri *p2;
489 int p, l;
490
491 *len = 1;
492 *pri = 0;
493 p2 = chainsearch(t, &l, loc);
494 if (p2) {
495 p = p2->pri[which];
496 /* use the chain if pri >= 0 */
497 if (p >= 0) {
498 *len = l;
499 *pri = p;
500 return;
501 }
502 }
503 if (*t <= UCHAR_MAX) {
504 *pri = __collate_char_pri_table[*t].pri[which];
505 return;
506 }
507 if (__collate_info->large_pri_count > 0) {
508 struct __collate_st_large_char_pri *match;
509 match = largesearch(*t, loc);
510 if (match) {
511 *pri = match->pri.pri[which];
9385eb3d
A
512 return;
513 }
514 }
ad3c9f2a 515 *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
9385eb3d
A
516}
517
ad3c9f2a
A
518__private_extern__ wchar_t *
519__collate_mbstowcs(const char *s, locale_t loc)
9385eb3d 520{
ad3c9f2a
A
521 static const mbstate_t initial;
522 mbstate_t st;
523 size_t len;
524 const char *ss;
525 wchar_t *wcs;
9385eb3d 526
ad3c9f2a
A
527 ss = s;
528 st = initial;
529 if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1)
530 return NULL;
531 if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
3d9156a7 532 __collate_err(EX_OSERR, __func__);
ad3c9f2a
A
533 st = initial;
534 mbsrtowcs_l(wcs, &s, len, &st, loc);
535 wcs[len] = 0;
536
537 return (wcs);
9385eb3d
A
538}
539
ad3c9f2a
A
540__private_extern__ wchar_t *
541__collate_wcsdup(const wchar_t *s)
542{
543 size_t len = wcslen(s) + 1;
544 wchar_t *wcs;
545
546 if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
547 __collate_err(EX_OSERR, __func__);
548 wcscpy(wcs, s);
549 return (wcs);
550}
551
552__private_extern__ void
553__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc)
554{
555 int pri, len;
556 size_t slen;
557 const wchar_t *t;
558 wchar_t *tt = NULL, *tr = NULL;
559 int direc, pass;
560 wchar_t *xfp;
561 struct __collate_st_info *info = __collate_info;
562 int sverrno;
563
564 for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
565 xf[pass] = NULL;
566 for(pass = 0; pass < info->directive_count; pass++) {
567 direc = info->directive[pass];
568 if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
569 sverrno = errno;
570 free(tt);
571 errno = sverrno;
572 tt = __collate_substitute(src, pass, loc);
573 }
574 if (direc & DIRECTIVE_BACKWARD) {
575 wchar_t *bp, *fp, c;
576 sverrno = errno;
577 free(tr);
578 errno = sverrno;
579 tr = __collate_wcsdup(tt ? tt : src);
580 bp = tr;
581 fp = tr + wcslen(tr) - 1;
582 while(bp < fp) {
583 c = *bp;
584 *bp++ = *fp;
585 *fp-- = c;
586 }
587 t = (const wchar_t *)tr;
588 } else if (tt)
589 t = (const wchar_t *)tt;
590 else
591 t = (const wchar_t *)src;
592 sverrno = errno;
593 if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) {
594 errno = sverrno;
595 slen = 0;
596 goto end;
597 }
598 errno = sverrno;
599 xfp = xf[pass];
600 if (direc & DIRECTIVE_POSITION) {
601 while(*t) {
602 __collate_lookup_which(t, &len, &pri, pass, loc);
603 t += len;
604 if (pri <= 0) {
605 if (pri < 0) {
606 errno = EINVAL;
607 slen = 0;
608 goto end;
609 }
610 pri = COLLATE_MAX_PRIORITY;
611 }
612 *xfp++ = pri;
613 }
614 } else {
615 while(*t) {
616 __collate_lookup_which(t, &len, &pri, pass, loc);
617 t += len;
618 if (pri <= 0) {
619 if (pri < 0) {
620 errno = EINVAL;
621 slen = 0;
622 goto end;
623 }
624 continue;
625 }
626 *xfp++ = pri;
627 }
628 }
629 *xfp = 0;
630 }
631 end:
632 sverrno = errno;
633 free(tt);
634 free(tr);
635 errno = sverrno;
636}
637
638__private_extern__ void
9385eb3d
A
639__collate_err(int ex, const char *f)
640{
641 const char *s;
642 int serrno = errno;
643
644 s = _getprogname();
645 _write(STDERR_FILENO, s, strlen(s));
646 _write(STDERR_FILENO, ": ", 2);
647 s = f;
648 _write(STDERR_FILENO, s, strlen(s));
649 _write(STDERR_FILENO, ": ", 2);
650 s = strerror(serrno);
651 _write(STDERR_FILENO, s, strlen(s));
652 _write(STDERR_FILENO, "\n", 1);
653 exit(ex);
654}
655
ad3c9f2a
A
656/*
657 * __collate_collating_symbol takes the multibyte string specified by
658 * src and slen, and using ps, converts that to a wide character. Then
659 * it is checked to verify it is a collating symbol, and then copies
660 * it to the wide character string specified by dst and dlen (the
661 * results are not null terminated). The length of the wide characters
662 * copied to dst is returned if successful. Zero is returned if no such
663 * collating symbol exists. (size_t)-1 is returned if there are wide-character
664 * conversion errors, if the length of the converted string is greater that
665 * STR_LEN or if dlen is too small. It is up to the calling routine to
666 * preserve the mbstate_t structure as needed.
667 */
668__private_extern__ size_t
669__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc)
670{
671 wchar_t wname[STR_LEN];
672 wchar_t w, *wp;
673 size_t len, l;
674
675 /* POSIX locale */
676 if (loc->__collate_load_error) {
677 if (dlen < 1)
678 return (size_t)-1;
679 if (slen != 1 || !isascii(*src))
680 return 0;
681 *dst = *src;
682 return 1;
683 }
684 for(wp = wname, len = 0; slen > 0; len++) {
685 l = mbrtowc_l(&w, src, slen, ps, loc);
686 if (l == (size_t)-1 || l == (size_t)-2)
687 return (size_t)-1;
688 if (l == 0)
689 break;
690 if (len >= STR_LEN)
691 return -1;
692 *wp++ = w;
693 src += l;
694 slen = (long)slen - (long)l;
695 }
696 if (len == 0 || len > dlen)
697 return (size_t)-1;
698 if (len == 1) {
699 if (*wname <= UCHAR_MAX) {
700 if (__collate_char_pri_table[*wname].pri[0] >= 0) {
701 if (dlen > 0)
702 *dst = *wname;
703 return 1;
704 }
705 return 0;
706 } else if (__collate_info->large_pri_count > 0) {
707 struct __collate_st_large_char_pri *match;
708 match = largesearch(*wname, loc);
709 if (match && match->pri.pri[0] >= 0) {
710 if (dlen > 0)
711 *dst = *wname;
712 return 1;
713 }
714 }
715 return 0;
716 }
717 *wp = 0;
718 if (__collate_info->chain_count > 0) {
719 struct __collate_st_chain_pri *match;
720 int ll;
721 match = chainsearch(wname, &ll, loc);
722 if (match) {
723 if (ll < dlen)
724 dlen = ll;
725 wcsncpy(dst, wname, dlen);
726 return ll;
727 }
728 }
729 return 0;
730}
731
732/*
733 * __collate_equiv_class returns the equivalence class number for the symbol
734 * specified by src and slen, using ps to convert from multi-byte to wide
735 * character. Zero is returned if the symbol is not in an equivalence
736 * class. -1 is returned if there are wide character conversion error,
737 * if there are any greater-than-8-bit characters or if a multi-byte symbol
738 * is greater or equal to STR_LEN in length. It is up to the calling
739 * routine to preserve the mbstate_t structure as needed.
740 */
741__private_extern__ int
742__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc)
743{
744 wchar_t wname[STR_LEN];
745 wchar_t w, *wp;
746 size_t len, l;
747 int e;
748
749 /* POSIX locale */
750 if (loc->__collate_load_error)
751 return 0;
752 for(wp = wname, len = 0; slen > 0; len++) {
753 l = mbrtowc_l(&w, src, slen, ps, loc);
754 if (l == (size_t)-1 || l == (size_t)-2)
755 return -1;
756 if (l == 0)
757 break;
758 if (len >= STR_LEN)
759 return -1;
760 *wp++ = w;
761 src += l;
762 slen = (long)slen - (long)l;
763 }
764 if (len == 0)
765 return -1;
766 if (len == 1) {
767 e = -1;
768 if (*wname <= UCHAR_MAX)
769 e = __collate_char_pri_table[*wname].pri[0];
770 else if (__collate_info->large_pri_count > 0) {
771 struct __collate_st_large_char_pri *match;
772 match = largesearch(*wname, loc);
773 if (match)
774 e = match->pri.pri[0];
775 }
776 if (e == 0)
777 return IGNORE_EQUIV_CLASS;
778 return e > 0 ? e : 0;
779 }
780 *wp = 0;
781 if (__collate_info->chain_count > 0) {
782 struct __collate_st_chain_pri *match;
783 int ll;
784 match = chainsearch(wname, &ll, loc);
785 if (match) {
786 e = match->pri[0];
787 if (e == 0)
788 return IGNORE_EQUIV_CLASS;
789 return e < 0 ? -e : e;
790 }
791 }
792 return 0;
793}
794
795/*
796 * __collate_equiv_match tries to match any single or multi-character symbol
797 * in equivalence class equiv_class in the multi-byte string specified by src
798 * and slen. If start is non-zero, it is taken to be the first (pre-converted)
799 * wide character. Subsequence wide characters, if needed, will use ps in
800 * the conversion. On a successful match, the length of the matched string
801 * is returned (including the start character). If dst is non-NULL, the
802 * matched wide-character string is copied to dst, a wide character array of
803 * length dlen (the results are not zero-terminated). If rlen is non-NULL,
804 * the number of character in src actually used is returned. Zero is
805 * returned by __collate_equiv_match if there is no match. (size_t)-1 is
806 * returned on error: if there were conversion errors or if dlen is too small
807 * to accept the results. On no match or error, ps is restored to its incoming
808 * state.
809 */
810size_t
811__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc)
812{
813 wchar_t w;
814 size_t len, l, clen;
815 int i;
816 wchar_t buf[STR_LEN], *wp;
817 mbstate_t save;
818 const char *s = src;
819 size_t sl = slen;
820 struct __collate_st_chain_pri *ch = NULL;
821
822 /* POSIX locale */
823 if (loc->__collate_load_error)
824 return (size_t)-1;
825 if (equiv_class == IGNORE_EQUIV_CLASS)
826 equiv_class = 0;
827 if (ps)
828 save = *ps;
829 wp = buf;
830 len = clen = 0;
831 if (start) {
832 *wp++ = start;
833 len = 1;
834 }
835 /* convert up to the max chain length */
836 while(sl > 0 && len < __collate_info->chain_max_len) {
837 l = mbrtowc_l(&w, s, sl, ps, loc);
838 if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
839 break;
840 *wp++ = w;
841 s += l;
842 clen += l;
843 sl -= l;
844 len++;
845 }
846 *wp = 0;
847 if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) {
848 int e = ch->pri[0];
849 if (e < 0)
850 e = -e;
851 if (e == equiv_class)
852 goto found;
853 }
854 /* try single character */
855 i = 1;
856 if (*buf <= UCHAR_MAX) {
857 if (equiv_class == __collate_char_pri_table[*buf].pri[0])
858 goto found;
859 } else if (__collate_info->large_pri_count > 0) {
860 struct __collate_st_large_char_pri *match;
861 match = largesearch(*buf, loc);
862 if (match && equiv_class == match->pri.pri[0])
863 goto found;
864 }
865 /* no match */
866 if (ps)
867 *ps = save;
868 return 0;
869found:
870 /* if we converted more than we used, restore to initial and reconvert
871 * up to what did match */
872 if (i < len) {
873 len = i;
874 if (ps)
875 *ps = save;
876 if (start)
877 i--;
878 clen = 0;
879 while(i-- > 0) {
880 l = mbrtowc_l(&w, src, slen, ps, loc);
881 src += l;
882 clen += l;
883 slen -= l;
884 }
885 }
886 if (dst) {
887 if (dlen < len) {
888 if (ps)
889 *ps = save;
890 return (size_t)-1;
891 }
892 for(wp = buf; len > 0; len--)
893 *dst++ = *wp++;
894 }
895 if (rlen)
896 *rlen = clen;
897 return len;
898}
899
900/*
901 * __collate_equiv_value returns the primary collation value for the given
902 * collating symbol specified by str and len. Zero or negative is return
903 * if the collating symbol was not found. (Use by the bracket code in TRE.)
904 */
905__private_extern__ int
906__collate_equiv_value(locale_t loc, const wchar_t *str, size_t len)
907{
908 int e;
909
910 if (len < 1 || len >= STR_LEN)
911 return -1;
912
913 /* POSIX locale */
914 if (loc->__collate_load_error)
915 return (len == 1 && *str <= UCHAR_MAX) ? *str : -1;
916
917 if (len == 1) {
918 e = -1;
919 if (*str <= UCHAR_MAX)
920 e = __collate_char_pri_table[*str].pri[0];
921 else if (__collate_info->large_pri_count > 0) {
922 struct __collate_st_large_char_pri *match;
923 match = largesearch(*str, loc);
924 if (match)
925 e = match->pri.pri[0];
926 }
927 if (e == 0)
928 return IGNORE_EQUIV_CLASS;
929 return e > 0 ? e : 0;
930 }
931 if (__collate_info->chain_count > 0) {
932 wchar_t name[STR_LEN];
933 struct __collate_st_chain_pri *match;
934 int ll;
935
936 wcsncpy(name, str, len);
937 name[len] = 0;
938 match = chainsearch(name, &ll, loc);
939 if (match) {
940 e = match->pri[0];
941 if (e == 0)
942 return IGNORE_EQUIV_CLASS;
943 return e < 0 ? -e : e;
944 }
945 }
946 return 0;
947}
948
949#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
950static void
951wntohl(wchar_t *str, int len)
952{
953 for(; *str && len > 0; str++, len--)
954 *str = ntohl(*str);
955}
956#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
957
9385eb3d 958#ifdef COLLATE_DEBUG
ad3c9f2a
A
959static char *
960show(int c)
961{
962 static char buf[5];
963
964 if (c >=32 && c <= 126)
965 sprintf(buf, "'%c' ", c);
966 else
967 sprintf(buf, "\\x{%02x}", c);
968 return buf;
969}
970
971static char *
972showwcs(const wchar_t *t, int len)
973{
974 static char buf[64];
975 char *cp = buf;
976
977 for(; *t && len > 0; len--, t++) {
978 if (*t >=32 && *t <= 126)
979 *cp++ = *t;
980 else {
981 sprintf(cp, "\\x{%02x}", *t);
982 cp += strlen(cp);
983 }
984 }
985 *cp = 0;
986 return buf;
987}
988
9385eb3d
A
989void
990__collate_print_tables()
991{
ad3c9f2a
A
992 int i, z;
993 locale_t loc = __current_locale();
9385eb3d 994
ad3c9f2a
A
995 printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
996 __collate_info->directive[0], __collate_info->directive[1],
997 __collate_info->flags, __collate_info->chain_max_len,
998 __collate_info->directive_count,
999 __collate_info->undef_pri[0], __collate_info->undef_pri[1],
1000 __collate_info->subst_count[0], __collate_info->subst_count[1],
1001 __collate_info->chain_count, __collate_info->large_pri_count);
1002 for(z = 0; z < __collate_info->directive_count; z++) {
1003 if (__collate_info->subst_count[z] > 0) {
1004 struct __collate_st_subst *p2 = __collate_substitute_table[z];
1005 if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP))
1006 printf("Both substitute tables:\n");
1007 else
1008 printf("Substitute table %d:\n", z);
1009 for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
1010 printf("\t%s --> \"%s\"\n",
1011 show(p2->val),
1012 showwcs(p2->str, STR_LEN));
1013 }
1014 }
1015 if (__collate_info->chain_count > 0) {
1016 printf("Chain priority table:\n");
1017 struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
1018 for (i = __collate_info->chain_count; i-- > 0; p2++) {
1019 printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
1020 for(z = 0; z < __collate_info->directive_count; z++)
1021 printf(" %d", p2->pri[z]);
1022 putchar('\n');
1023 }
1024 }
9385eb3d 1025 printf("Char priority table:\n");
ad3c9f2a
A
1026 {
1027 struct __collate_st_char_pri *p2 = __collate_char_pri_table;
1028 for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
1029 printf("\t%s :", show(i));
1030 for(z = 0; z < __collate_info->directive_count; z++)
1031 printf(" %d", p2->pri[z]);
1032 putchar('\n');
1033 }
1034 }
1035 if (__collate_info->large_pri_count > 0) {
1036 struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
1037 printf("Large priority table:\n");
1038 for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
1039 printf("\t%s :", show(p2->val));
1040 for(z = 0; z < __collate_info->directive_count; z++)
1041 printf(" %d", p2->pri.pri[z]);
1042 putchar('\n');
1043 }
1044 }
9385eb3d
A
1045}
1046#endif