]> git.saurik.com Git - apple/libc.git/blame - locale/FreeBSD/collate.c.patch
Libc-498.tar.gz
[apple/libc.git] / locale / FreeBSD / collate.c.patch
CommitLineData
3d9156a7 1--- collate.c.orig 2004-11-25 11:38:16.000000000 -0800
224c7076
A
2+++ collate.c 2005-10-20 01:00:19.000000000 -0700
3@@ -28,14 +28,26 @@
3d9156a7
A
4 #include <sys/cdefs.h>
5 __FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.33 2004/09/22 16:56:48 stefanf Exp $");
9385eb3d 6
3d9156a7
A
7+#include "xlocale_private.h"
8+/* assumes the locale_t variable is named loc */
224c7076
A
9+#define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table)
10+#define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table)
3d9156a7 11+#define __collate_char_pri_table (loc->__lc_collate->__char_pri_table)
224c7076
A
12+#define __collate_info (&loc->__lc_collate->__info)
13+#define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table)
14+#define __collate_substitute_table (loc->__lc_collate->__substitute_table)
3d9156a7 15+
9385eb3d 16 #include "namespace.h"
3d9156a7 17 #include <arpa/inet.h>
9385eb3d 18 #include <stdio.h>
224c7076
A
19 #include <stdlib.h>
20+#include <stddef.h>
21 #include <string.h>
22+#include <wchar.h>
23 #include <errno.h>
24 #include <unistd.h>
25 #include <sysexits.h>
26+#include <ctype.h>
27 #include "un-namespace.h"
28
29 #include "collate.h"
30@@ -44,36 +56,50 @@
3d9156a7
A
31
32 #include "libc_private.h"
33
34-int __collate_load_error = 1;
35-int __collate_substitute_nontrivial;
36-
37-u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN];
38-struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
39-struct __collate_st_chain_pri *__collate_chain_pri_table;
40-
224c7076
A
41+#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
42+static void wntohl(wchar_t *, int);
43+#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
3d9156a7
A
44 void __collate_err(int ex, const char *f) __dead2;
45
46-int
47-__collate_load_tables(const char *encoding)
48+/*
49+ * Normally, the __collate_* routines should all be __private_extern__,
50+ * but grep is using them (3715846). Until we can provide an alternative,
51+ * we leave them public, and provide a read-only __collate_load_error variable
52+ */
53+#undef __collate_load_error
54+int __collate_load_error = 1;
55+
56+__private_extern__ int
57+__collate_load_tables(const char *encoding, locale_t loc)
58 {
59 FILE *fp;
224c7076
A
60- int i, saverr, chains;
61- uint32_t u32;
62+ int i, saverr, chains, z;
3d9156a7
A
63 char strbuf[STR_LEN], buf[PATH_MAX];
64- void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
65- static char collate_encoding[ENCODING_LEN + 1];
66+ struct __xlocale_st_collate *TMP;
67+ static struct __xlocale_st_collate *cache = NULL;
224c7076
A
68+ struct __collate_st_info info;
69+ void *vp;
3d9156a7
A
70
71 /* 'encoding' must be already checked. */
72 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
73- __collate_load_error = 1;
74+ loc->__collate_load_error = 1;
75+ if (loc == &__global_locale)
76+ __collate_load_error = 1;
77+ XL_RELEASE(loc->__lc_collate);
78+ loc->__lc_collate = NULL;
79 return (_LDP_CACHE);
80 }
81
82 /*
83 * If the locale name is the same as our cache, use the cache.
84 */
85- if (strcmp(encoding, collate_encoding) == 0) {
86- __collate_load_error = 0;
87+ if (cache && strcmp(encoding, cache->__encoding) == 0) {
88+ loc->__collate_load_error = 0;
89+ if (loc == &__global_locale)
90+ __collate_load_error = 0;
91+ XL_RELEASE(loc->__lc_collate);
92+ loc->__lc_collate = cache;
93+ XL_RETAIN(loc->__lc_collate);
94 return (_LDP_CACHE);
95 }
96
224c7076
A
97@@ -97,9 +123,7 @@
98 return (_LDP_ERROR);
99 }
100 chains = -1;
101- if (strcmp(strbuf, COLLATE_VERSION) == 0)
102- chains = 0;
103- else if (strcmp(strbuf, COLLATE_VERSION1_1) == 0)
104+ if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0)
105 chains = 1;
106 if (chains < 0) {
107 (void)fclose(fp);
108@@ -107,13 +131,21 @@
109 return (_LDP_ERROR);
110 }
111 if (chains) {
112- if (fread(&u32, sizeof(u32), 1, fp) != 1) {
113+ if (fread(&info, sizeof(info), 1, fp) != 1) {
114 saverr = errno;
115 (void)fclose(fp);
116 errno = saverr;
117 return (_LDP_ERROR);
118 }
119- if ((chains = (int)ntohl(u32)) < 1) {
120+#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
121+ for(z = 0; z < info.directive_count; z++) {
122+ info.undef_pri[z] = ntohl(info.undef_pri[z]);
123+ info.subst_count[z] = ntohl(info.subst_count[z]);
124+ }
125+ info.chain_count = ntohl(info.chain_count);
126+ info.large_pri_count = ntohl(info.large_pri_count);
127+#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
128+ if ((chains = info.chain_count) < 0) {
129 (void)fclose(fp);
130 errno = EFTYPE;
131 return (_LDP_ERROR);
132@@ -121,136 +153,446 @@
3d9156a7
A
133 } else
134 chains = TABLE_SIZE;
135
136- if ((TMP_substitute_table =
137- malloc(sizeof(__collate_substitute_table))) == NULL) {
138- saverr = errno;
139- (void)fclose(fp);
140- errno = saverr;
141- return (_LDP_ERROR);
142- }
143- if ((TMP_char_pri_table =
144- malloc(sizeof(__collate_char_pri_table))) == NULL) {
224c7076 145- saverr = errno;
3d9156a7
A
146- free(TMP_substitute_table);
147- (void)fclose(fp);
148- errno = saverr;
149- return (_LDP_ERROR);
150- }
151- if ((TMP_chain_pri_table =
152- malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
224c7076
A
153+ i = sizeof(struct __xlocale_st_collate)
154+ + sizeof(struct __collate_st_chain_pri) * chains
155+ + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
156+ for(z = 0; z < info.directive_count; z++)
157+ i += sizeof(struct __collate_st_subst) * info.subst_count[z];
158+ if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) {
159 saverr = errno;
3d9156a7
A
160- free(TMP_substitute_table);
161- free(TMP_char_pri_table);
162 (void)fclose(fp);
163 errno = saverr;
164 return (_LDP_ERROR);
165 }
166+ TMP->__refcount = 2; /* one for the locale, one for the cache */
167+ TMP->__free_extra = NULL;
3d9156a7
A
168
169 #define FREAD(a, b, c, d) \
170 { \
171 if (fread(a, b, c, d) != c) { \
172 saverr = errno; \
173- free(TMP_substitute_table); \
174- free(TMP_char_pri_table); \
175- free(TMP_chain_pri_table); \
176+ free(TMP); \
177 (void)fclose(d); \
178 errno = saverr; \
179 return (_LDP_ERROR); \
180 } \
181 }
182
183- FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
184- FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
185- FREAD(TMP_chain_pri_table,
186- sizeof(*__collate_chain_pri_table), chains, fp);
224c7076
A
187+ /* adjust size to read the remaining in one chunk */
188+ i -= offsetof(struct __xlocale_st_collate, __char_pri_table);
189+ FREAD(TMP->__char_pri_table, i, 1, fp);
3d9156a7
A
190 (void)fclose(fp);
191
192- (void)strcpy(collate_encoding, encoding);
193- if (__collate_substitute_table_ptr != NULL)
194- free(__collate_substitute_table_ptr);
195- __collate_substitute_table_ptr = TMP_substitute_table;
196- if (__collate_char_pri_table_ptr != NULL)
197- free(__collate_char_pri_table_ptr);
198- __collate_char_pri_table_ptr = TMP_char_pri_table;
199- if (__collate_chain_pri_table != NULL)
200- free(__collate_chain_pri_table);
201- __collate_chain_pri_table = TMP_chain_pri_table;
224c7076
A
202-
203- __collate_substitute_nontrivial = 0;
204- for (i = 0; i < UCHAR_MAX + 1; i++) {
205- if (__collate_substitute_table[i][0] != i ||
206- __collate_substitute_table[i][1] != 0) {
207- __collate_substitute_nontrivial = 1;
208- break;
209+ vp = (void *)(TMP + 1);
210+
211+ /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */
212+ if (info.subst_count[0] > 0) {
213+ TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
214+ vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
215+ } else
216+ TMP->__substitute_table[0] = NULL;
217+ if (info.flags & COLLATE_SUBST_DUP)
218+ TMP->__substitute_table[1] = TMP->__substitute_table[0];
219+ else if (info.subst_count[1] > 0) {
220+ TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
221+ vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
222+ } else
223+ TMP->__substitute_table[1] = NULL;
224+
225+ if (chains > 0) {
226+ TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
227+ vp += chains * sizeof(struct __collate_st_chain_pri);
228+ } else
229+ TMP->__chain_pri_table = NULL;
230+ if (info.large_pri_count > 0)
231+ TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp;
232+ else
233+ TMP->__large_char_pri_table = NULL;
234+
235+#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
236+ {
237+ struct __collate_st_char_pri *p = TMP->__char_pri_table;
238+ for(i = UCHAR_MAX + 1; i-- > 0; p++) {
239+ for(z = 0; z < info.directive_count; z++)
240+ p->pri[z] = ntohl(p->pri[z]);
241 }
242 }
243- __collate_load_error = 0;
244+ for(z = 0; z < info.directive_count; z++)
245+ if (info.subst_count[z] > 0) {
246+ struct __collate_st_subst *p = TMP->__substitute_table[z];
247+ for(i = info.subst_count[z]; i-- > 0; p++) {
248+ p->val = ntohl(p->val);
249+ wntohl(p->str, STR_LEN);
250+ }
251+ }
252+ {
253+ struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
254+ for(i = chains; i-- > 0; p++) {
255+ wntohl(p->str, STR_LEN);
256+ for(z = 0; z < info.directive_count; z++)
257+ p->pri[z] = ntohl(p->pri[z]);
258+ }
259+ }
260+ if (info.large_pri_count > 0) {
261+ struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table;
262+ for(i = info.large_pri_count; i-- > 0; p++) {
263+ p->val = ntohl(p->val);
264+ for(z = 0; z < info.directive_count; z++)
265+ p->pri.pri[z] = ntohl(p->pri.pri[z]);
266+ }
267+ }
268+#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
3d9156a7 269+ (void)strcpy(TMP->__encoding, encoding);
224c7076 270+ (void)memcpy(&TMP->__info, &info, sizeof(info));
3d9156a7
A
271+ XL_RELEASE(cache);
272+ cache = TMP;
273+ XL_RELEASE(loc->__lc_collate);
274+ loc->__lc_collate = cache;
275+ /* no need to retain, since we set __refcount to 2 above */
224c7076
A
276+
277+ loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0);
3d9156a7
A
278+ loc->__collate_load_error = 0;
279+ if (loc == &__global_locale)
280+ __collate_load_error = 0;
281
282 return (_LDP_LOADED);
283 }
284
285-u_char *
286-__collate_substitute(s)
224c7076
A
287- const u_char *s;
288+static int
289+__collate_wcsnlen(const wchar_t *s, int len)
290+{
291+ int n = 0;
292+ while (*s && n < len) {
293+ s++;
294+ n++;
295+ }
296+ return n;
297+}
298+
299+static struct __collate_st_subst *
300+substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
301+{
302+ int low = 0;
303+ int high = n - 1;
304+ int next, compar;
305+ struct __collate_st_subst *p;
306+
307+ while (low <= high) {
308+ next = (low + high) / 2;
309+ p = tab + next;
310+ compar = key - p->val;
311+ if (compar == 0)
312+ return p;
313+ if (compar > 0)
314+ low = next + 1;
315+ else
316+ high = next - 1;
317+ }
318+ return NULL;
319+}
320+
321+__private_extern__ wchar_t *
322+__collate_substitute(const wchar_t *s, int which, locale_t loc)
3d9156a7
A
323 {
324 int dest_len, len, nlen;
325- int delta = strlen(s);
224c7076
A
326- u_char *dest_str = NULL;
327+ int n, delta, nsubst;
328+ wchar_t *dest_str = NULL;
329+ const wchar_t *fp;
330+ struct __collate_st_subst *subst, *match;
3d9156a7
A
331
332 if (s == NULL || *s == '\0')
333- return (__collate_strdup(""));
224c7076
A
334- delta += delta / 8;
335- dest_str = malloc(dest_len = delta);
336+ return (__collate_wcsdup(L""));
337+ dest_len = wcslen(s);
338+ nsubst = __collate_info->subst_count[which];
339+ if (nsubst <= 0)
340+ return __collate_wcsdup(s);
341+ subst = __collate_substitute_table[which];
342+ delta = dest_len / 4;
343+ if (delta < 2)
344+ delta = 2;
345+ dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
3d9156a7
A
346 if (dest_str == NULL)
347 __collate_err(EX_OSERR, __func__);
348 len = 0;
349 while (*s) {
350- nlen = len + strlen(__collate_substitute_table[*s]);
224c7076
A
351+ if ((match = substsearch(*s, subst, nsubst)) != NULL) {
352+ fp = match->str;
353+ n = __collate_wcsnlen(fp, STR_LEN);
354+ } else {
355+ fp = s;
356+ n = 1;
357+ }
358+ nlen = len + n;
3d9156a7 359 if (dest_len <= nlen) {
224c7076
A
360- dest_str = reallocf(dest_str, dest_len = nlen + delta);
361+ dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
3d9156a7
A
362 if (dest_str == NULL)
363 __collate_err(EX_OSERR, __func__);
364 }
365- (void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
224c7076
A
366- len = nlen;
367+ wcsncpy(dest_str + len, fp, n);
368+ len += n;
369+ s++;
3d9156a7 370 }
224c7076 371+ dest_str[len] = 0;
3d9156a7
A
372 return (dest_str);
373 }
374
375-void
376-__collate_lookup(t, len, prim, sec)
224c7076
A
377- const u_char *t;
378- int *len, *prim, *sec;
379+static struct __collate_st_chain_pri *
380+chainsearch(const wchar_t *key, int *len, locale_t loc)
3d9156a7 381+{
224c7076
A
382+ int low = 0;
383+ int high = __collate_info->chain_count - 1;
384+ int next, compar, l;
385+ struct __collate_st_chain_pri *p;
386+ struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
387+
388+ while (low <= high) {
389+ next = (low + high) / 2;
390+ p = tab + next;
391+ compar = *key - *p->str;
392+ if (compar == 0) {
393+ l = __collate_wcsnlen(p->str, STR_LEN);
394+ compar = wcsncmp(key, p->str, l);
395+ if (compar == 0) {
396+ *len = l;
397+ return p;
398+ }
399+ }
400+ if (compar > 0)
401+ low = next + 1;
402+ else
403+ high = next - 1;
404+ }
405+ return NULL;
406+}
407+
408+static struct __collate_st_large_char_pri *
409+largesearch(const wchar_t key, locale_t loc)
410+{
411+ int low = 0;
412+ int high = __collate_info->large_pri_count - 1;
413+ int next, compar;
414+ struct __collate_st_large_char_pri *p;
415+ struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
416+
417+ while (low <= high) {
418+ next = (low + high) / 2;
419+ p = tab + next;
420+ compar = key - p->val;
421+ if (compar == 0)
422+ return p;
423+ if (compar > 0)
424+ low = next + 1;
425+ else
426+ high = next - 1;
427+ }
428+ return NULL;
3d9156a7
A
429+}
430+
431+__private_extern__ void
224c7076 432+__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc)
3d9156a7
A
433 {
434 struct __collate_st_chain_pri *p2;
224c7076 435+ int l;
3d9156a7
A
436
437 *len = 1;
438 *prim = *sec = 0;
439- for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
224c7076 440- if (*t == p2->str[0] &&
3d9156a7
A
441- strncmp(t, p2->str, strlen(p2->str)) == 0) {
442- *len = strlen(p2->str);
224c7076
A
443- *prim = p2->prim;
444- *sec = p2->sec;
445+ p2 = chainsearch(t, &l, loc);
446+ /* use the chain if prim >= 0 */
447+ if (p2 && p2->pri[0] >= 0) {
448+ *len = l;
449+ *prim = p2->pri[0];
450+ *sec = p2->pri[1];
451+ return;
452+ }
453+ if (*t <= UCHAR_MAX) {
454+ *prim = __collate_char_pri_table[*t].pri[0];
455+ *sec = __collate_char_pri_table[*t].pri[1];
456+ return;
457+ }
458+ if (__collate_info->large_pri_count > 0) {
459+ struct __collate_st_large_char_pri *match;
460+ match = largesearch(*t, loc);
461+ if (match) {
462+ *prim = match->pri.pri[0];
463+ *sec = match->pri.pri[1];
464+ return;
465+ }
466+ }
467+ *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
468+ *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
469+}
470+
471+/*
472+ * This is only provided for programs (like grep) that are calling this
473+ * private function. This will go away eventually.
474+ */
3d9156a7 475+void
224c7076 476+__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec)
3d9156a7 477+{
224c7076
A
478+ locale_t loc = __current_locale();
479+ wchar_t *w = __collate_mbstowcs((const char *)t, loc);
480+ int sverrno;
481+
482+ __collate_lookup_l(w, len, prim, sec, loc);
483+ sverrno = errno;
484+ free(w);
485+ errno = sverrno;
3d9156a7 486+}
224c7076
A
487+
488+__private_extern__ void
489+__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc)
490+{
491+ struct __collate_st_chain_pri *p2;
492+ int p, l;
3d9156a7 493+
224c7076
A
494+ *len = 1;
495+ *pri = 0;
496+ p2 = chainsearch(t, &l, loc);
497+ if (p2) {
498+ p = p2->pri[which];
499+ /* use the chain if pri >= 0 */
500+ if (p >= 0) {
501+ *len = l;
502+ *pri = p;
503+ return;
504+ }
505+ }
506+ if (*t <= UCHAR_MAX) {
507+ *pri = __collate_char_pri_table[*t].pri[which];
508+ return;
509+ }
510+ if (__collate_info->large_pri_count > 0) {
511+ struct __collate_st_large_char_pri *match;
512+ match = largesearch(*t, loc);
513+ if (match) {
514+ *pri = match->pri.pri[which];
515 return;
516 }
517 }
518- *prim = __collate_char_pri_table[*t].prim;
519- *sec = __collate_char_pri_table[*t].sec;
520+ *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
521 }
522
523-u_char *
524-__collate_strdup(s)
525- u_char *s;
526+__private_extern__ wchar_t *
527+__collate_mbstowcs(const char *s, locale_t loc)
3d9156a7
A
528 {
529- u_char *t = strdup(s);
224c7076
A
530+ static const mbstate_t initial;
531+ mbstate_t st;
532+ size_t len;
533+ const char *ss;
534+ wchar_t *wcs;
535+
536+ ss = s;
537+ st = initial;
538+ if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1)
539+ return NULL;
540+ if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
541+ __collate_err(EX_OSERR, __func__);
542+ st = initial;
543+ mbsrtowcs_l(wcs, &s, len, &st, loc);
544+ wcs[len] = 0;
3d9156a7 545
224c7076
A
546- if (t == NULL)
547+ return (wcs);
548+}
549+
550+__private_extern__ wchar_t *
551+__collate_wcsdup(const wchar_t *s)
552+{
553+ size_t len = wcslen(s) + 1;
554+ wchar_t *wcs;
555+
556+ if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
3d9156a7 557 __collate_err(EX_OSERR, __func__);
224c7076
A
558- return (t);
559+ wcscpy(wcs, s);
560+ return (wcs);
561 }
562
563-void
564+__private_extern__ void
565+__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc)
566+{
567+ int pri, len;
568+ size_t slen;
569+ const wchar_t *t;
570+ wchar_t *tt = NULL, *tr = NULL;
571+ int direc, pass;
572+ wchar_t *xfp;
573+ struct __collate_st_info *info = __collate_info;
574+ int sverrno;
575+
576+ for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
577+ xf[pass] = NULL;
578+ for(pass = 0; pass < info->directive_count; pass++) {
579+ direc = info->directive[pass];
580+ if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
581+ sverrno = errno;
582+ free(tt);
583+ errno = sverrno;
584+ tt = __collate_substitute(src, pass, loc);
585+ }
586+ if (direc & DIRECTIVE_BACKWARD) {
587+ wchar_t *bp, *fp, c;
588+ sverrno = errno;
589+ free(tr);
590+ errno = sverrno;
591+ tr = __collate_wcsdup(tt ? tt : src);
592+ bp = tr;
593+ fp = tr + wcslen(tr) - 1;
594+ while(bp < fp) {
595+ c = *bp;
596+ *bp++ = *fp;
597+ *fp-- = c;
598+ }
599+ t = (const wchar_t *)tr;
600+ } else if (tt)
601+ t = (const wchar_t *)tt;
602+ else
603+ t = (const wchar_t *)src;
604+ sverrno = errno;
605+ if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) {
606+ errno = sverrno;
607+ slen = 0;
608+ goto end;
609+ }
610+ errno = sverrno;
611+ xfp = xf[pass];
612+ if (direc & DIRECTIVE_POSITION) {
613+ while(*t) {
614+ __collate_lookup_which(t, &len, &pri, pass, loc);
615+ t += len;
616+ if (pri <= 0) {
617+ if (pri < 0) {
618+ errno = EINVAL;
619+ slen = 0;
620+ goto end;
621+ }
622+ pri = COLLATE_MAX_PRIORITY;
623+ }
624+ *xfp++ = pri;
625+ }
626+ } else {
627+ while(*t) {
628+ __collate_lookup_which(t, &len, &pri, pass, loc);
629+ t += len;
630+ if (pri <= 0) {
631+ if (pri < 0) {
632+ errno = EINVAL;
633+ slen = 0;
634+ goto end;
635+ }
636+ continue;
637+ }
638+ *xfp++ = pri;
639+ }
640+ }
641+ *xfp = 0;
642+ }
643+ end:
644+ sverrno = errno;
645+ free(tt);
646+ free(tr);
647+ errno = sverrno;
648+}
649+
650+__private_extern__ void
651 __collate_err(int ex, const char *f)
3d9156a7 652 {
224c7076
A
653 const char *s;
654@@ -268,24 +610,345 @@
655 exit(ex);
656 }
657
658+/*
659+ * __collate_collating_symbol takes the multibyte string specified by
660+ * src and slen, and using ps, converts that to a wide character. Then
661+ * it is checked to verify it is a collating symbol, and then copies
662+ * it to the wide character string specified by dst and dlen (the
663+ * results are not null terminated). The length of the wide characters
664+ * copied to dst is returned if successful. Zero is returned if no such
665+ * collating symbol exists. (size_t)-1 is returned if there are wide-character
666+ * conversion errors, if the length of the converted string is greater that
667+ * STR_LEN or if dlen is too small. It is up to the calling routine to
668+ * preserve the mbstate_t structure as needed.
669+ */
670+__private_extern__ size_t
671+__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc)
672+{
673+ wchar_t wname[STR_LEN];
674+ wchar_t w, *wp;
675+ size_t len, l;
676+
677+ /* POSIX locale */
678+ if (loc->__collate_load_error) {
679+ if (dlen < 1)
680+ return (size_t)-1;
681+ if (slen != 1 || !isascii(*src))
682+ return 0;
683+ *dst = *src;
684+ return 1;
685+ }
686+ for(wp = wname, len = 0; slen > 0; len++) {
687+ l = mbrtowc_l(&w, src, slen, ps, loc);
688+ if (l == (size_t)-1 || l == (size_t)-2)
689+ return (size_t)-1;
690+ if (l == 0)
691+ break;
692+ if (len >= STR_LEN)
693+ return -1;
694+ *wp++ = w;
695+ src += l;
696+ slen = (long)slen - (long)l;
697+ }
698+ if (len == 0 || len > dlen)
699+ return (size_t)-1;
700+ if (len == 1) {
701+ if (*wname <= UCHAR_MAX) {
702+ if (__collate_char_pri_table[*wname].pri[0] >= 0) {
703+ if (dlen > 0)
704+ *dst = *wname;
705+ return 1;
706+ }
707+ return 0;
708+ } else if (__collate_info->large_pri_count > 0) {
709+ struct __collate_st_large_char_pri *match;
710+ match = largesearch(*wname, loc);
711+ if (match && match->pri.pri[0] >= 0) {
712+ if (dlen > 0)
713+ *dst = *wname;
714+ return 1;
715+ }
716+ }
717+ return 0;
718+ }
719+ *wp = 0;
720+ if (__collate_info->chain_count > 0) {
721+ struct __collate_st_chain_pri *match;
722+ int ll;
723+ match = chainsearch(wname, &ll, loc);
724+ if (match) {
725+ if (ll < dlen)
726+ dlen = ll;
727+ wcsncpy(dst, wname, dlen);
728+ return ll;
729+ }
730+ }
731+ return 0;
732+}
733+
734+/*
735+ * __collate_equiv_class returns the equivalence class number for the symbol
736+ * specified by src and slen, using ps to convert from multi-byte to wide
737+ * character. Zero is returned if the symbol is not in an equivalence
738+ * class. -1 is returned if there are wide character conversion error,
739+ * if there are any greater-than-8-bit characters or if a multi-byte symbol
740+ * is greater or equal to STR_LEN in length. It is up to the calling
741+ * routine to preserve the mbstate_t structure as needed.
742+ */
743+__private_extern__ int
744+__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc)
745+{
746+ wchar_t wname[STR_LEN];
747+ wchar_t w, *wp;
748+ size_t len, l;
749+ int e;
750+
751+ /* POSIX locale */
752+ if (loc->__collate_load_error)
753+ return 0;
754+ for(wp = wname, len = 0; slen > 0; len++) {
755+ l = mbrtowc_l(&w, src, slen, ps, loc);
756+ if (l == (size_t)-1 || l == (size_t)-2)
757+ return -1;
758+ if (l == 0)
759+ break;
760+ if (len >= STR_LEN)
761+ return -1;
762+ *wp++ = w;
763+ src += l;
764+ slen = (long)slen - (long)l;
765+ }
766+ if (len == 0)
767+ return -1;
768+ if (len == 1) {
769+ e = -1;
770+ if (*wname <= UCHAR_MAX)
771+ e = __collate_char_pri_table[*wname].pri[0];
772+ else if (__collate_info->large_pri_count > 0) {
773+ struct __collate_st_large_char_pri *match;
774+ match = largesearch(*wname, loc);
775+ if (match)
776+ e = match->pri.pri[0];
777+ }
778+ if (e == 0)
779+ return IGNORE_EQUIV_CLASS;
780+ return e > 0 ? e : 0;
781+ }
782+ *wp = 0;
783+ if (__collate_info->chain_count > 0) {
784+ struct __collate_st_chain_pri *match;
785+ int ll;
786+ match = chainsearch(wname, &ll, loc);
787+ if (match) {
788+ e = match->pri[0];
789+ if (e == 0)
790+ return IGNORE_EQUIV_CLASS;
791+ return e < 0 ? -e : e;
792+ }
793+ }
794+ return 0;
795+}
796+
797+/*
798+ * __collate_equiv_match tries to match any single or multi-character symbol
799+ * in equivalence class equiv_class in the multi-byte string specified by src
800+ * and slen. If start is non-zero, it is taken to be the first (pre-converted)
801+ * wide character. Subsequence wide characters, if needed, will use ps in
802+ * the conversion. On a successful match, the length of the matched string
803+ * is returned (including the start character). If dst is non-NULL, the
804+ * matched wide-character string is copied to dst, a wide character array of
805+ * length dlen (the results are not zero-terminated). If rlen is non-NULL,
806+ * the number of character in src actually used is returned. Zero is
807+ * returned by __collate_equiv_match if there is no match. (size_t)-1 is
808+ * returned on error: if there were conversion errors or if dlen is too small
809+ * to accept the results. On no match or error, ps is restored to its incoming
810+ * state.
811+ */
812+size_t
813+__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc)
814+{
815+ wchar_t w;
816+ size_t len, l, clen;
817+ int i;
818+ wchar_t buf[STR_LEN], *wp;
819+ mbstate_t save;
820+ const char *s = src;
821+ size_t sl = slen;
822+ struct __collate_st_chain_pri *ch = NULL;
823+
824+ /* POSIX locale */
825+ if (loc->__collate_load_error)
826+ return (size_t)-1;
827+ if (equiv_class == IGNORE_EQUIV_CLASS)
828+ equiv_class = 0;
829+ if (ps)
830+ save = *ps;
831+ wp = buf;
832+ len = clen = 0;
833+ if (start) {
834+ *wp++ = start;
835+ len = 1;
836+ }
837+ /* convert up to the max chain length */
838+ while(sl > 0 && len < __collate_info->chain_max_len) {
839+ l = mbrtowc_l(&w, s, sl, ps, loc);
840+ if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
841+ break;
842+ *wp++ = w;
843+ s += l;
844+ clen += l;
845+ sl -= l;
846+ len++;
847+ }
848+ *wp = 0;
849+ if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) {
850+ int e = ch->pri[0];
851+ if (e < 0)
852+ e = -e;
853+ if (e == equiv_class)
854+ goto found;
855+ }
856+ /* try single character */
857+ i = 1;
858+ if (*buf <= UCHAR_MAX) {
859+ if (equiv_class == __collate_char_pri_table[*buf].pri[0])
860+ goto found;
861+ } else if (__collate_info->large_pri_count > 0) {
862+ struct __collate_st_large_char_pri *match;
863+ match = largesearch(*buf, loc);
864+ if (match && equiv_class == match->pri.pri[0])
865+ goto found;
866+ }
867+ /* no match */
868+ if (ps)
869+ *ps = save;
870+ return 0;
871+found:
872+ /* if we converted more than we used, restore to initial and reconvert
873+ * up to what did match */
874+ if (i < len) {
875+ len = i;
876+ if (ps)
877+ *ps = save;
878+ if (start)
879+ i--;
880+ clen = 0;
881+ while(i-- > 0) {
882+ l = mbrtowc_l(&w, src, slen, ps, loc);
883+ src += l;
884+ clen += l;
885+ slen -= l;
886+ }
887+ }
888+ if (dst) {
889+ if (dlen < len) {
890+ if (ps)
891+ *ps = save;
892+ return (size_t)-1;
893+ }
894+ for(wp = buf; len > 0; len--)
895+ *dst++ = *wp++;
896+ }
897+ if (rlen)
898+ *rlen = clen;
899+ return len;
900+}
901+
902+#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
903+static void
904+wntohl(wchar_t *str, int len)
905+{
906+ for(; *str && len > 0; str++, len--)
907+ *str = ntohl(*str);
908+}
909+#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
910+
911 #ifdef COLLATE_DEBUG
912+static char *
913+show(int c)
914+{
915+ static char buf[5];
916+
917+ if (c >=32 && c <= 126)
918+ sprintf(buf, "'%c' ", c);
919+ else
920+ sprintf(buf, "\\x{%02x}", c);
921+ return buf;
922+}
923+
924+static char *
925+showwcs(const wchar_t *t, int len)
926+{
927+ static char buf[64];
928+ char *cp = buf;
929+
930+ for(; *t && len > 0; len--, t++) {
931+ if (*t >=32 && *t <= 126)
932+ *cp++ = *t;
933+ else {
934+ sprintf(cp, "\\x{%02x}", *t);
935+ cp += strlen(cp);
936+ }
937+ }
938+ *cp = 0;
939+ return buf;
940+}
941+
942 void
943 __collate_print_tables()
944 {
945- int i;
946- struct __collate_st_chain_pri *p2;
947+ int i, z;
3d9156a7
A
948+ locale_t loc = __current_locale();
949
224c7076
A
950- printf("Substitute table:\n");
951- for (i = 0; i < UCHAR_MAX + 1; i++)
952- if (i != *__collate_substitute_table[i])
953- printf("\t'%c' --> \"%s\"\n", i,
954- __collate_substitute_table[i]);
955- printf("Chain priority table:\n");
3d9156a7 956- for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
224c7076
A
957- printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
958+ printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
959+ __collate_info->directive[0], __collate_info->directive[1],
960+ __collate_info->flags, __collate_info->chain_max_len,
961+ __collate_info->directive_count,
962+ __collate_info->undef_pri[0], __collate_info->undef_pri[1],
963+ __collate_info->subst_count[0], __collate_info->subst_count[1],
964+ __collate_info->chain_count, __collate_info->large_pri_count);
965+ for(z = 0; z < __collate_info->directive_count; z++) {
966+ if (__collate_info->subst_count[z] > 0) {
967+ struct __collate_st_subst *p2 = __collate_substitute_table[z];
968+ if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP))
969+ printf("Both substitute tables:\n");
970+ else
971+ printf("Substitute table %d:\n", z);
972+ for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
973+ printf("\t%s --> \"%s\"\n",
974+ show(p2->val),
975+ showwcs(p2->str, STR_LEN));
976+ }
977+ }
978+ if (__collate_info->chain_count > 0) {
979+ printf("Chain priority table:\n");
980+ struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
981+ for (i = __collate_info->chain_count; i-- > 0; p2++) {
982+ printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
983+ for(z = 0; z < __collate_info->directive_count; z++)
984+ printf(" %d", p2->pri[z]);
985+ putchar('\n');
986+ }
987+ }
3d9156a7 988 printf("Char priority table:\n");
224c7076
A
989- for (i = 0; i < UCHAR_MAX + 1; i++)
990- printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
991- __collate_char_pri_table[i].sec);
992+ {
993+ struct __collate_st_char_pri *p2 = __collate_char_pri_table;
994+ for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
995+ printf("\t%s :", show(i));
996+ for(z = 0; z < __collate_info->directive_count; z++)
997+ printf(" %d", p2->pri[z]);
998+ putchar('\n');
999+ }
1000+ }
1001+ if (__collate_info->large_pri_count > 0) {
1002+ struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
1003+ printf("Large priority table:\n");
1004+ for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
1005+ printf("\t%s :", show(p2->val));
1006+ for(z = 0; z < __collate_info->directive_count; z++)
1007+ printf(" %d", p2->pri.pri[z]);
1008+ putchar('\n');
1009+ }
1010+ }
1011 }
1012 #endif