]>
Commit | Line | Data |
---|---|---|
3d9156a7 | 1 | --- collate.c.orig 2004-11-25 11:38:16.000000000 -0800 |
224c7076 A |
2 | +++ collate.c 2005-10-20 01:00:19.000000000 -0700 |
3 | @@ -28,14 +28,26 @@ | |
3d9156a7 A |
4 | #include <sys/cdefs.h> |
5 | __FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.33 2004/09/22 16:56:48 stefanf Exp $"); | |
9385eb3d | 6 | |
3d9156a7 A |
7 | +#include "xlocale_private.h" |
8 | +/* assumes the locale_t variable is named loc */ | |
224c7076 A |
9 | +#define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table) |
10 | +#define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table) | |
3d9156a7 | 11 | +#define __collate_char_pri_table (loc->__lc_collate->__char_pri_table) |
224c7076 A |
12 | +#define __collate_info (&loc->__lc_collate->__info) |
13 | +#define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table) | |
14 | +#define __collate_substitute_table (loc->__lc_collate->__substitute_table) | |
3d9156a7 | 15 | + |
9385eb3d | 16 | #include "namespace.h" |
3d9156a7 | 17 | #include <arpa/inet.h> |
9385eb3d | 18 | #include <stdio.h> |
224c7076 A |
19 | #include <stdlib.h> |
20 | +#include <stddef.h> | |
21 | #include <string.h> | |
22 | +#include <wchar.h> | |
23 | #include <errno.h> | |
24 | #include <unistd.h> | |
25 | #include <sysexits.h> | |
26 | +#include <ctype.h> | |
27 | #include "un-namespace.h" | |
28 | ||
29 | #include "collate.h" | |
30 | @@ -44,36 +56,50 @@ | |
3d9156a7 A |
31 | |
32 | #include "libc_private.h" | |
33 | ||
34 | -int __collate_load_error = 1; | |
35 | -int __collate_substitute_nontrivial; | |
36 | - | |
37 | -u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN]; | |
38 | -struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; | |
39 | -struct __collate_st_chain_pri *__collate_chain_pri_table; | |
40 | - | |
224c7076 A |
41 | +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN |
42 | +static void wntohl(wchar_t *, int); | |
43 | +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
3d9156a7 A |
44 | void __collate_err(int ex, const char *f) __dead2; |
45 | ||
46 | -int | |
47 | -__collate_load_tables(const char *encoding) | |
48 | +/* | |
49 | + * Normally, the __collate_* routines should all be __private_extern__, | |
50 | + * but grep is using them (3715846). Until we can provide an alternative, | |
51 | + * we leave them public, and provide a read-only __collate_load_error variable | |
52 | + */ | |
53 | +#undef __collate_load_error | |
54 | +int __collate_load_error = 1; | |
55 | + | |
56 | +__private_extern__ int | |
57 | +__collate_load_tables(const char *encoding, locale_t loc) | |
58 | { | |
59 | FILE *fp; | |
224c7076 A |
60 | - int i, saverr, chains; |
61 | - uint32_t u32; | |
62 | + int i, saverr, chains, z; | |
3d9156a7 A |
63 | char strbuf[STR_LEN], buf[PATH_MAX]; |
64 | - void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table; | |
65 | - static char collate_encoding[ENCODING_LEN + 1]; | |
66 | + struct __xlocale_st_collate *TMP; | |
67 | + static struct __xlocale_st_collate *cache = NULL; | |
224c7076 A |
68 | + struct __collate_st_info info; |
69 | + void *vp; | |
3d9156a7 A |
70 | |
71 | /* 'encoding' must be already checked. */ | |
72 | if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { | |
73 | - __collate_load_error = 1; | |
74 | + loc->__collate_load_error = 1; | |
75 | + if (loc == &__global_locale) | |
76 | + __collate_load_error = 1; | |
77 | + XL_RELEASE(loc->__lc_collate); | |
78 | + loc->__lc_collate = NULL; | |
79 | return (_LDP_CACHE); | |
80 | } | |
81 | ||
82 | /* | |
83 | * If the locale name is the same as our cache, use the cache. | |
84 | */ | |
85 | - if (strcmp(encoding, collate_encoding) == 0) { | |
86 | - __collate_load_error = 0; | |
87 | + if (cache && strcmp(encoding, cache->__encoding) == 0) { | |
88 | + loc->__collate_load_error = 0; | |
89 | + if (loc == &__global_locale) | |
90 | + __collate_load_error = 0; | |
91 | + XL_RELEASE(loc->__lc_collate); | |
92 | + loc->__lc_collate = cache; | |
93 | + XL_RETAIN(loc->__lc_collate); | |
94 | return (_LDP_CACHE); | |
95 | } | |
96 | ||
224c7076 A |
97 | @@ -97,9 +123,7 @@ |
98 | return (_LDP_ERROR); | |
99 | } | |
100 | chains = -1; | |
101 | - if (strcmp(strbuf, COLLATE_VERSION) == 0) | |
102 | - chains = 0; | |
103 | - else if (strcmp(strbuf, COLLATE_VERSION1_1) == 0) | |
104 | + if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0) | |
105 | chains = 1; | |
106 | if (chains < 0) { | |
107 | (void)fclose(fp); | |
108 | @@ -107,13 +131,21 @@ | |
109 | return (_LDP_ERROR); | |
110 | } | |
111 | if (chains) { | |
112 | - if (fread(&u32, sizeof(u32), 1, fp) != 1) { | |
113 | + if (fread(&info, sizeof(info), 1, fp) != 1) { | |
114 | saverr = errno; | |
115 | (void)fclose(fp); | |
116 | errno = saverr; | |
117 | return (_LDP_ERROR); | |
118 | } | |
119 | - if ((chains = (int)ntohl(u32)) < 1) { | |
120 | +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN | |
121 | + for(z = 0; z < info.directive_count; z++) { | |
122 | + info.undef_pri[z] = ntohl(info.undef_pri[z]); | |
123 | + info.subst_count[z] = ntohl(info.subst_count[z]); | |
124 | + } | |
125 | + info.chain_count = ntohl(info.chain_count); | |
126 | + info.large_pri_count = ntohl(info.large_pri_count); | |
127 | +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
128 | + if ((chains = info.chain_count) < 0) { | |
129 | (void)fclose(fp); | |
130 | errno = EFTYPE; | |
131 | return (_LDP_ERROR); | |
132 | @@ -121,136 +153,446 @@ | |
3d9156a7 A |
133 | } else |
134 | chains = TABLE_SIZE; | |
135 | ||
136 | - if ((TMP_substitute_table = | |
137 | - malloc(sizeof(__collate_substitute_table))) == NULL) { | |
138 | - saverr = errno; | |
139 | - (void)fclose(fp); | |
140 | - errno = saverr; | |
141 | - return (_LDP_ERROR); | |
142 | - } | |
143 | - if ((TMP_char_pri_table = | |
144 | - malloc(sizeof(__collate_char_pri_table))) == NULL) { | |
224c7076 | 145 | - saverr = errno; |
3d9156a7 A |
146 | - free(TMP_substitute_table); |
147 | - (void)fclose(fp); | |
148 | - errno = saverr; | |
149 | - return (_LDP_ERROR); | |
150 | - } | |
151 | - if ((TMP_chain_pri_table = | |
152 | - malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) { | |
224c7076 A |
153 | + i = sizeof(struct __xlocale_st_collate) |
154 | + + sizeof(struct __collate_st_chain_pri) * chains | |
155 | + + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count; | |
156 | + for(z = 0; z < info.directive_count; z++) | |
157 | + i += sizeof(struct __collate_st_subst) * info.subst_count[z]; | |
158 | + if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) { | |
159 | saverr = errno; | |
3d9156a7 A |
160 | - free(TMP_substitute_table); |
161 | - free(TMP_char_pri_table); | |
162 | (void)fclose(fp); | |
163 | errno = saverr; | |
164 | return (_LDP_ERROR); | |
165 | } | |
166 | + TMP->__refcount = 2; /* one for the locale, one for the cache */ | |
167 | + TMP->__free_extra = NULL; | |
3d9156a7 A |
168 | |
169 | #define FREAD(a, b, c, d) \ | |
170 | { \ | |
171 | if (fread(a, b, c, d) != c) { \ | |
172 | saverr = errno; \ | |
173 | - free(TMP_substitute_table); \ | |
174 | - free(TMP_char_pri_table); \ | |
175 | - free(TMP_chain_pri_table); \ | |
176 | + free(TMP); \ | |
177 | (void)fclose(d); \ | |
178 | errno = saverr; \ | |
179 | return (_LDP_ERROR); \ | |
180 | } \ | |
181 | } | |
182 | ||
183 | - FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp); | |
184 | - FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp); | |
185 | - FREAD(TMP_chain_pri_table, | |
186 | - sizeof(*__collate_chain_pri_table), chains, fp); | |
224c7076 A |
187 | + /* adjust size to read the remaining in one chunk */ |
188 | + i -= offsetof(struct __xlocale_st_collate, __char_pri_table); | |
189 | + FREAD(TMP->__char_pri_table, i, 1, fp); | |
3d9156a7 A |
190 | (void)fclose(fp); |
191 | ||
192 | - (void)strcpy(collate_encoding, encoding); | |
193 | - if (__collate_substitute_table_ptr != NULL) | |
194 | - free(__collate_substitute_table_ptr); | |
195 | - __collate_substitute_table_ptr = TMP_substitute_table; | |
196 | - if (__collate_char_pri_table_ptr != NULL) | |
197 | - free(__collate_char_pri_table_ptr); | |
198 | - __collate_char_pri_table_ptr = TMP_char_pri_table; | |
199 | - if (__collate_chain_pri_table != NULL) | |
200 | - free(__collate_chain_pri_table); | |
201 | - __collate_chain_pri_table = TMP_chain_pri_table; | |
224c7076 A |
202 | - |
203 | - __collate_substitute_nontrivial = 0; | |
204 | - for (i = 0; i < UCHAR_MAX + 1; i++) { | |
205 | - if (__collate_substitute_table[i][0] != i || | |
206 | - __collate_substitute_table[i][1] != 0) { | |
207 | - __collate_substitute_nontrivial = 1; | |
208 | - break; | |
209 | + vp = (void *)(TMP + 1); | |
210 | + | |
211 | + /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */ | |
212 | + if (info.subst_count[0] > 0) { | |
213 | + TMP->__substitute_table[0] = (struct __collate_st_subst *)vp; | |
214 | + vp += info.subst_count[0] * sizeof(struct __collate_st_subst); | |
215 | + } else | |
216 | + TMP->__substitute_table[0] = NULL; | |
217 | + if (info.flags & COLLATE_SUBST_DUP) | |
218 | + TMP->__substitute_table[1] = TMP->__substitute_table[0]; | |
219 | + else if (info.subst_count[1] > 0) { | |
220 | + TMP->__substitute_table[1] = (struct __collate_st_subst *)vp; | |
221 | + vp += info.subst_count[1] * sizeof(struct __collate_st_subst); | |
222 | + } else | |
223 | + TMP->__substitute_table[1] = NULL; | |
224 | + | |
225 | + if (chains > 0) { | |
226 | + TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp; | |
227 | + vp += chains * sizeof(struct __collate_st_chain_pri); | |
228 | + } else | |
229 | + TMP->__chain_pri_table = NULL; | |
230 | + if (info.large_pri_count > 0) | |
231 | + TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp; | |
232 | + else | |
233 | + TMP->__large_char_pri_table = NULL; | |
234 | + | |
235 | +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN | |
236 | + { | |
237 | + struct __collate_st_char_pri *p = TMP->__char_pri_table; | |
238 | + for(i = UCHAR_MAX + 1; i-- > 0; p++) { | |
239 | + for(z = 0; z < info.directive_count; z++) | |
240 | + p->pri[z] = ntohl(p->pri[z]); | |
241 | } | |
242 | } | |
243 | - __collate_load_error = 0; | |
244 | + for(z = 0; z < info.directive_count; z++) | |
245 | + if (info.subst_count[z] > 0) { | |
246 | + struct __collate_st_subst *p = TMP->__substitute_table[z]; | |
247 | + for(i = info.subst_count[z]; i-- > 0; p++) { | |
248 | + p->val = ntohl(p->val); | |
249 | + wntohl(p->str, STR_LEN); | |
250 | + } | |
251 | + } | |
252 | + { | |
253 | + struct __collate_st_chain_pri *p = TMP->__chain_pri_table; | |
254 | + for(i = chains; i-- > 0; p++) { | |
255 | + wntohl(p->str, STR_LEN); | |
256 | + for(z = 0; z < info.directive_count; z++) | |
257 | + p->pri[z] = ntohl(p->pri[z]); | |
258 | + } | |
259 | + } | |
260 | + if (info.large_pri_count > 0) { | |
261 | + struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table; | |
262 | + for(i = info.large_pri_count; i-- > 0; p++) { | |
263 | + p->val = ntohl(p->val); | |
264 | + for(z = 0; z < info.directive_count; z++) | |
265 | + p->pri.pri[z] = ntohl(p->pri.pri[z]); | |
266 | + } | |
267 | + } | |
268 | +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
3d9156a7 | 269 | + (void)strcpy(TMP->__encoding, encoding); |
224c7076 | 270 | + (void)memcpy(&TMP->__info, &info, sizeof(info)); |
3d9156a7 A |
271 | + XL_RELEASE(cache); |
272 | + cache = TMP; | |
273 | + XL_RELEASE(loc->__lc_collate); | |
274 | + loc->__lc_collate = cache; | |
275 | + /* no need to retain, since we set __refcount to 2 above */ | |
224c7076 A |
276 | + |
277 | + loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0); | |
3d9156a7 A |
278 | + loc->__collate_load_error = 0; |
279 | + if (loc == &__global_locale) | |
280 | + __collate_load_error = 0; | |
281 | ||
282 | return (_LDP_LOADED); | |
283 | } | |
284 | ||
285 | -u_char * | |
286 | -__collate_substitute(s) | |
224c7076 A |
287 | - const u_char *s; |
288 | +static int | |
289 | +__collate_wcsnlen(const wchar_t *s, int len) | |
290 | +{ | |
291 | + int n = 0; | |
292 | + while (*s && n < len) { | |
293 | + s++; | |
294 | + n++; | |
295 | + } | |
296 | + return n; | |
297 | +} | |
298 | + | |
299 | +static struct __collate_st_subst * | |
300 | +substsearch(const wchar_t key, struct __collate_st_subst *tab, int n) | |
301 | +{ | |
302 | + int low = 0; | |
303 | + int high = n - 1; | |
304 | + int next, compar; | |
305 | + struct __collate_st_subst *p; | |
306 | + | |
307 | + while (low <= high) { | |
308 | + next = (low + high) / 2; | |
309 | + p = tab + next; | |
310 | + compar = key - p->val; | |
311 | + if (compar == 0) | |
312 | + return p; | |
313 | + if (compar > 0) | |
314 | + low = next + 1; | |
315 | + else | |
316 | + high = next - 1; | |
317 | + } | |
318 | + return NULL; | |
319 | +} | |
320 | + | |
321 | +__private_extern__ wchar_t * | |
322 | +__collate_substitute(const wchar_t *s, int which, locale_t loc) | |
3d9156a7 A |
323 | { |
324 | int dest_len, len, nlen; | |
325 | - int delta = strlen(s); | |
224c7076 A |
326 | - u_char *dest_str = NULL; |
327 | + int n, delta, nsubst; | |
328 | + wchar_t *dest_str = NULL; | |
329 | + const wchar_t *fp; | |
330 | + struct __collate_st_subst *subst, *match; | |
3d9156a7 A |
331 | |
332 | if (s == NULL || *s == '\0') | |
333 | - return (__collate_strdup("")); | |
224c7076 A |
334 | - delta += delta / 8; |
335 | - dest_str = malloc(dest_len = delta); | |
336 | + return (__collate_wcsdup(L"")); | |
337 | + dest_len = wcslen(s); | |
338 | + nsubst = __collate_info->subst_count[which]; | |
339 | + if (nsubst <= 0) | |
340 | + return __collate_wcsdup(s); | |
341 | + subst = __collate_substitute_table[which]; | |
342 | + delta = dest_len / 4; | |
343 | + if (delta < 2) | |
344 | + delta = 2; | |
345 | + dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t)); | |
3d9156a7 A |
346 | if (dest_str == NULL) |
347 | __collate_err(EX_OSERR, __func__); | |
348 | len = 0; | |
349 | while (*s) { | |
350 | - nlen = len + strlen(__collate_substitute_table[*s]); | |
224c7076 A |
351 | + if ((match = substsearch(*s, subst, nsubst)) != NULL) { |
352 | + fp = match->str; | |
353 | + n = __collate_wcsnlen(fp, STR_LEN); | |
354 | + } else { | |
355 | + fp = s; | |
356 | + n = 1; | |
357 | + } | |
358 | + nlen = len + n; | |
3d9156a7 | 359 | if (dest_len <= nlen) { |
224c7076 A |
360 | - dest_str = reallocf(dest_str, dest_len = nlen + delta); |
361 | + dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t)); | |
3d9156a7 A |
362 | if (dest_str == NULL) |
363 | __collate_err(EX_OSERR, __func__); | |
364 | } | |
365 | - (void)strcpy(dest_str + len, __collate_substitute_table[*s++]); | |
224c7076 A |
366 | - len = nlen; |
367 | + wcsncpy(dest_str + len, fp, n); | |
368 | + len += n; | |
369 | + s++; | |
3d9156a7 | 370 | } |
224c7076 | 371 | + dest_str[len] = 0; |
3d9156a7 A |
372 | return (dest_str); |
373 | } | |
374 | ||
375 | -void | |
376 | -__collate_lookup(t, len, prim, sec) | |
224c7076 A |
377 | - const u_char *t; |
378 | - int *len, *prim, *sec; | |
379 | +static struct __collate_st_chain_pri * | |
380 | +chainsearch(const wchar_t *key, int *len, locale_t loc) | |
3d9156a7 | 381 | +{ |
224c7076 A |
382 | + int low = 0; |
383 | + int high = __collate_info->chain_count - 1; | |
384 | + int next, compar, l; | |
385 | + struct __collate_st_chain_pri *p; | |
386 | + struct __collate_st_chain_pri *tab = __collate_chain_pri_table; | |
387 | + | |
388 | + while (low <= high) { | |
389 | + next = (low + high) / 2; | |
390 | + p = tab + next; | |
391 | + compar = *key - *p->str; | |
392 | + if (compar == 0) { | |
393 | + l = __collate_wcsnlen(p->str, STR_LEN); | |
394 | + compar = wcsncmp(key, p->str, l); | |
395 | + if (compar == 0) { | |
396 | + *len = l; | |
397 | + return p; | |
398 | + } | |
399 | + } | |
400 | + if (compar > 0) | |
401 | + low = next + 1; | |
402 | + else | |
403 | + high = next - 1; | |
404 | + } | |
405 | + return NULL; | |
406 | +} | |
407 | + | |
408 | +static struct __collate_st_large_char_pri * | |
409 | +largesearch(const wchar_t key, locale_t loc) | |
410 | +{ | |
411 | + int low = 0; | |
412 | + int high = __collate_info->large_pri_count - 1; | |
413 | + int next, compar; | |
414 | + struct __collate_st_large_char_pri *p; | |
415 | + struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table; | |
416 | + | |
417 | + while (low <= high) { | |
418 | + next = (low + high) / 2; | |
419 | + p = tab + next; | |
420 | + compar = key - p->val; | |
421 | + if (compar == 0) | |
422 | + return p; | |
423 | + if (compar > 0) | |
424 | + low = next + 1; | |
425 | + else | |
426 | + high = next - 1; | |
427 | + } | |
428 | + return NULL; | |
3d9156a7 A |
429 | +} |
430 | + | |
431 | +__private_extern__ void | |
224c7076 | 432 | +__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc) |
3d9156a7 A |
433 | { |
434 | struct __collate_st_chain_pri *p2; | |
224c7076 | 435 | + int l; |
3d9156a7 A |
436 | |
437 | *len = 1; | |
438 | *prim = *sec = 0; | |
439 | - for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) { | |
224c7076 | 440 | - if (*t == p2->str[0] && |
3d9156a7 A |
441 | - strncmp(t, p2->str, strlen(p2->str)) == 0) { |
442 | - *len = strlen(p2->str); | |
224c7076 A |
443 | - *prim = p2->prim; |
444 | - *sec = p2->sec; | |
445 | + p2 = chainsearch(t, &l, loc); | |
446 | + /* use the chain if prim >= 0 */ | |
447 | + if (p2 && p2->pri[0] >= 0) { | |
448 | + *len = l; | |
449 | + *prim = p2->pri[0]; | |
450 | + *sec = p2->pri[1]; | |
451 | + return; | |
452 | + } | |
453 | + if (*t <= UCHAR_MAX) { | |
454 | + *prim = __collate_char_pri_table[*t].pri[0]; | |
455 | + *sec = __collate_char_pri_table[*t].pri[1]; | |
456 | + return; | |
457 | + } | |
458 | + if (__collate_info->large_pri_count > 0) { | |
459 | + struct __collate_st_large_char_pri *match; | |
460 | + match = largesearch(*t, loc); | |
461 | + if (match) { | |
462 | + *prim = match->pri.pri[0]; | |
463 | + *sec = match->pri.pri[1]; | |
464 | + return; | |
465 | + } | |
466 | + } | |
467 | + *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l; | |
468 | + *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l; | |
469 | +} | |
470 | + | |
471 | +/* | |
472 | + * This is only provided for programs (like grep) that are calling this | |
473 | + * private function. This will go away eventually. | |
474 | + */ | |
3d9156a7 | 475 | +void |
224c7076 | 476 | +__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec) |
3d9156a7 | 477 | +{ |
224c7076 A |
478 | + locale_t loc = __current_locale(); |
479 | + wchar_t *w = __collate_mbstowcs((const char *)t, loc); | |
480 | + int sverrno; | |
481 | + | |
482 | + __collate_lookup_l(w, len, prim, sec, loc); | |
483 | + sverrno = errno; | |
484 | + free(w); | |
485 | + errno = sverrno; | |
3d9156a7 | 486 | +} |
224c7076 A |
487 | + |
488 | +__private_extern__ void | |
489 | +__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc) | |
490 | +{ | |
491 | + struct __collate_st_chain_pri *p2; | |
492 | + int p, l; | |
3d9156a7 | 493 | + |
224c7076 A |
494 | + *len = 1; |
495 | + *pri = 0; | |
496 | + p2 = chainsearch(t, &l, loc); | |
497 | + if (p2) { | |
498 | + p = p2->pri[which]; | |
499 | + /* use the chain if pri >= 0 */ | |
500 | + if (p >= 0) { | |
501 | + *len = l; | |
502 | + *pri = p; | |
503 | + return; | |
504 | + } | |
505 | + } | |
506 | + if (*t <= UCHAR_MAX) { | |
507 | + *pri = __collate_char_pri_table[*t].pri[which]; | |
508 | + return; | |
509 | + } | |
510 | + if (__collate_info->large_pri_count > 0) { | |
511 | + struct __collate_st_large_char_pri *match; | |
512 | + match = largesearch(*t, loc); | |
513 | + if (match) { | |
514 | + *pri = match->pri.pri[which]; | |
515 | return; | |
516 | } | |
517 | } | |
518 | - *prim = __collate_char_pri_table[*t].prim; | |
519 | - *sec = __collate_char_pri_table[*t].sec; | |
520 | + *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l; | |
521 | } | |
522 | ||
523 | -u_char * | |
524 | -__collate_strdup(s) | |
525 | - u_char *s; | |
526 | +__private_extern__ wchar_t * | |
527 | +__collate_mbstowcs(const char *s, locale_t loc) | |
3d9156a7 A |
528 | { |
529 | - u_char *t = strdup(s); | |
224c7076 A |
530 | + static const mbstate_t initial; |
531 | + mbstate_t st; | |
532 | + size_t len; | |
533 | + const char *ss; | |
534 | + wchar_t *wcs; | |
535 | + | |
536 | + ss = s; | |
537 | + st = initial; | |
538 | + if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1) | |
539 | + return NULL; | |
540 | + if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL) | |
541 | + __collate_err(EX_OSERR, __func__); | |
542 | + st = initial; | |
543 | + mbsrtowcs_l(wcs, &s, len, &st, loc); | |
544 | + wcs[len] = 0; | |
3d9156a7 | 545 | |
224c7076 A |
546 | - if (t == NULL) |
547 | + return (wcs); | |
548 | +} | |
549 | + | |
550 | +__private_extern__ wchar_t * | |
551 | +__collate_wcsdup(const wchar_t *s) | |
552 | +{ | |
553 | + size_t len = wcslen(s) + 1; | |
554 | + wchar_t *wcs; | |
555 | + | |
556 | + if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL) | |
3d9156a7 | 557 | __collate_err(EX_OSERR, __func__); |
224c7076 A |
558 | - return (t); |
559 | + wcscpy(wcs, s); | |
560 | + return (wcs); | |
561 | } | |
562 | ||
563 | -void | |
564 | +__private_extern__ void | |
565 | +__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc) | |
566 | +{ | |
567 | + int pri, len; | |
568 | + size_t slen; | |
569 | + const wchar_t *t; | |
570 | + wchar_t *tt = NULL, *tr = NULL; | |
571 | + int direc, pass; | |
572 | + wchar_t *xfp; | |
573 | + struct __collate_st_info *info = __collate_info; | |
574 | + int sverrno; | |
575 | + | |
576 | + for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++) | |
577 | + xf[pass] = NULL; | |
578 | + for(pass = 0; pass < info->directive_count; pass++) { | |
579 | + direc = info->directive[pass]; | |
580 | + if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) { | |
581 | + sverrno = errno; | |
582 | + free(tt); | |
583 | + errno = sverrno; | |
584 | + tt = __collate_substitute(src, pass, loc); | |
585 | + } | |
586 | + if (direc & DIRECTIVE_BACKWARD) { | |
587 | + wchar_t *bp, *fp, c; | |
588 | + sverrno = errno; | |
589 | + free(tr); | |
590 | + errno = sverrno; | |
591 | + tr = __collate_wcsdup(tt ? tt : src); | |
592 | + bp = tr; | |
593 | + fp = tr + wcslen(tr) - 1; | |
594 | + while(bp < fp) { | |
595 | + c = *bp; | |
596 | + *bp++ = *fp; | |
597 | + *fp-- = c; | |
598 | + } | |
599 | + t = (const wchar_t *)tr; | |
600 | + } else if (tt) | |
601 | + t = (const wchar_t *)tt; | |
602 | + else | |
603 | + t = (const wchar_t *)src; | |
604 | + sverrno = errno; | |
605 | + if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) { | |
606 | + errno = sverrno; | |
607 | + slen = 0; | |
608 | + goto end; | |
609 | + } | |
610 | + errno = sverrno; | |
611 | + xfp = xf[pass]; | |
612 | + if (direc & DIRECTIVE_POSITION) { | |
613 | + while(*t) { | |
614 | + __collate_lookup_which(t, &len, &pri, pass, loc); | |
615 | + t += len; | |
616 | + if (pri <= 0) { | |
617 | + if (pri < 0) { | |
618 | + errno = EINVAL; | |
619 | + slen = 0; | |
620 | + goto end; | |
621 | + } | |
622 | + pri = COLLATE_MAX_PRIORITY; | |
623 | + } | |
624 | + *xfp++ = pri; | |
625 | + } | |
626 | + } else { | |
627 | + while(*t) { | |
628 | + __collate_lookup_which(t, &len, &pri, pass, loc); | |
629 | + t += len; | |
630 | + if (pri <= 0) { | |
631 | + if (pri < 0) { | |
632 | + errno = EINVAL; | |
633 | + slen = 0; | |
634 | + goto end; | |
635 | + } | |
636 | + continue; | |
637 | + } | |
638 | + *xfp++ = pri; | |
639 | + } | |
640 | + } | |
641 | + *xfp = 0; | |
642 | + } | |
643 | + end: | |
644 | + sverrno = errno; | |
645 | + free(tt); | |
646 | + free(tr); | |
647 | + errno = sverrno; | |
648 | +} | |
649 | + | |
650 | +__private_extern__ void | |
651 | __collate_err(int ex, const char *f) | |
3d9156a7 | 652 | { |
224c7076 A |
653 | const char *s; |
654 | @@ -268,24 +610,345 @@ | |
655 | exit(ex); | |
656 | } | |
657 | ||
658 | +/* | |
659 | + * __collate_collating_symbol takes the multibyte string specified by | |
660 | + * src and slen, and using ps, converts that to a wide character. Then | |
661 | + * it is checked to verify it is a collating symbol, and then copies | |
662 | + * it to the wide character string specified by dst and dlen (the | |
663 | + * results are not null terminated). The length of the wide characters | |
664 | + * copied to dst is returned if successful. Zero is returned if no such | |
665 | + * collating symbol exists. (size_t)-1 is returned if there are wide-character | |
666 | + * conversion errors, if the length of the converted string is greater that | |
667 | + * STR_LEN or if dlen is too small. It is up to the calling routine to | |
668 | + * preserve the mbstate_t structure as needed. | |
669 | + */ | |
670 | +__private_extern__ size_t | |
671 | +__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc) | |
672 | +{ | |
673 | + wchar_t wname[STR_LEN]; | |
674 | + wchar_t w, *wp; | |
675 | + size_t len, l; | |
676 | + | |
677 | + /* POSIX locale */ | |
678 | + if (loc->__collate_load_error) { | |
679 | + if (dlen < 1) | |
680 | + return (size_t)-1; | |
681 | + if (slen != 1 || !isascii(*src)) | |
682 | + return 0; | |
683 | + *dst = *src; | |
684 | + return 1; | |
685 | + } | |
686 | + for(wp = wname, len = 0; slen > 0; len++) { | |
687 | + l = mbrtowc_l(&w, src, slen, ps, loc); | |
688 | + if (l == (size_t)-1 || l == (size_t)-2) | |
689 | + return (size_t)-1; | |
690 | + if (l == 0) | |
691 | + break; | |
692 | + if (len >= STR_LEN) | |
693 | + return -1; | |
694 | + *wp++ = w; | |
695 | + src += l; | |
696 | + slen = (long)slen - (long)l; | |
697 | + } | |
698 | + if (len == 0 || len > dlen) | |
699 | + return (size_t)-1; | |
700 | + if (len == 1) { | |
701 | + if (*wname <= UCHAR_MAX) { | |
702 | + if (__collate_char_pri_table[*wname].pri[0] >= 0) { | |
703 | + if (dlen > 0) | |
704 | + *dst = *wname; | |
705 | + return 1; | |
706 | + } | |
707 | + return 0; | |
708 | + } else if (__collate_info->large_pri_count > 0) { | |
709 | + struct __collate_st_large_char_pri *match; | |
710 | + match = largesearch(*wname, loc); | |
711 | + if (match && match->pri.pri[0] >= 0) { | |
712 | + if (dlen > 0) | |
713 | + *dst = *wname; | |
714 | + return 1; | |
715 | + } | |
716 | + } | |
717 | + return 0; | |
718 | + } | |
719 | + *wp = 0; | |
720 | + if (__collate_info->chain_count > 0) { | |
721 | + struct __collate_st_chain_pri *match; | |
722 | + int ll; | |
723 | + match = chainsearch(wname, &ll, loc); | |
724 | + if (match) { | |
725 | + if (ll < dlen) | |
726 | + dlen = ll; | |
727 | + wcsncpy(dst, wname, dlen); | |
728 | + return ll; | |
729 | + } | |
730 | + } | |
731 | + return 0; | |
732 | +} | |
733 | + | |
734 | +/* | |
735 | + * __collate_equiv_class returns the equivalence class number for the symbol | |
736 | + * specified by src and slen, using ps to convert from multi-byte to wide | |
737 | + * character. Zero is returned if the symbol is not in an equivalence | |
738 | + * class. -1 is returned if there are wide character conversion error, | |
739 | + * if there are any greater-than-8-bit characters or if a multi-byte symbol | |
740 | + * is greater or equal to STR_LEN in length. It is up to the calling | |
741 | + * routine to preserve the mbstate_t structure as needed. | |
742 | + */ | |
743 | +__private_extern__ int | |
744 | +__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc) | |
745 | +{ | |
746 | + wchar_t wname[STR_LEN]; | |
747 | + wchar_t w, *wp; | |
748 | + size_t len, l; | |
749 | + int e; | |
750 | + | |
751 | + /* POSIX locale */ | |
752 | + if (loc->__collate_load_error) | |
753 | + return 0; | |
754 | + for(wp = wname, len = 0; slen > 0; len++) { | |
755 | + l = mbrtowc_l(&w, src, slen, ps, loc); | |
756 | + if (l == (size_t)-1 || l == (size_t)-2) | |
757 | + return -1; | |
758 | + if (l == 0) | |
759 | + break; | |
760 | + if (len >= STR_LEN) | |
761 | + return -1; | |
762 | + *wp++ = w; | |
763 | + src += l; | |
764 | + slen = (long)slen - (long)l; | |
765 | + } | |
766 | + if (len == 0) | |
767 | + return -1; | |
768 | + if (len == 1) { | |
769 | + e = -1; | |
770 | + if (*wname <= UCHAR_MAX) | |
771 | + e = __collate_char_pri_table[*wname].pri[0]; | |
772 | + else if (__collate_info->large_pri_count > 0) { | |
773 | + struct __collate_st_large_char_pri *match; | |
774 | + match = largesearch(*wname, loc); | |
775 | + if (match) | |
776 | + e = match->pri.pri[0]; | |
777 | + } | |
778 | + if (e == 0) | |
779 | + return IGNORE_EQUIV_CLASS; | |
780 | + return e > 0 ? e : 0; | |
781 | + } | |
782 | + *wp = 0; | |
783 | + if (__collate_info->chain_count > 0) { | |
784 | + struct __collate_st_chain_pri *match; | |
785 | + int ll; | |
786 | + match = chainsearch(wname, &ll, loc); | |
787 | + if (match) { | |
788 | + e = match->pri[0]; | |
789 | + if (e == 0) | |
790 | + return IGNORE_EQUIV_CLASS; | |
791 | + return e < 0 ? -e : e; | |
792 | + } | |
793 | + } | |
794 | + return 0; | |
795 | +} | |
796 | + | |
797 | +/* | |
798 | + * __collate_equiv_match tries to match any single or multi-character symbol | |
799 | + * in equivalence class equiv_class in the multi-byte string specified by src | |
800 | + * and slen. If start is non-zero, it is taken to be the first (pre-converted) | |
801 | + * wide character. Subsequence wide characters, if needed, will use ps in | |
802 | + * the conversion. On a successful match, the length of the matched string | |
803 | + * is returned (including the start character). If dst is non-NULL, the | |
804 | + * matched wide-character string is copied to dst, a wide character array of | |
805 | + * length dlen (the results are not zero-terminated). If rlen is non-NULL, | |
806 | + * the number of character in src actually used is returned. Zero is | |
807 | + * returned by __collate_equiv_match if there is no match. (size_t)-1 is | |
808 | + * returned on error: if there were conversion errors or if dlen is too small | |
809 | + * to accept the results. On no match or error, ps is restored to its incoming | |
810 | + * state. | |
811 | + */ | |
812 | +size_t | |
813 | +__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc) | |
814 | +{ | |
815 | + wchar_t w; | |
816 | + size_t len, l, clen; | |
817 | + int i; | |
818 | + wchar_t buf[STR_LEN], *wp; | |
819 | + mbstate_t save; | |
820 | + const char *s = src; | |
821 | + size_t sl = slen; | |
822 | + struct __collate_st_chain_pri *ch = NULL; | |
823 | + | |
824 | + /* POSIX locale */ | |
825 | + if (loc->__collate_load_error) | |
826 | + return (size_t)-1; | |
827 | + if (equiv_class == IGNORE_EQUIV_CLASS) | |
828 | + equiv_class = 0; | |
829 | + if (ps) | |
830 | + save = *ps; | |
831 | + wp = buf; | |
832 | + len = clen = 0; | |
833 | + if (start) { | |
834 | + *wp++ = start; | |
835 | + len = 1; | |
836 | + } | |
837 | + /* convert up to the max chain length */ | |
838 | + while(sl > 0 && len < __collate_info->chain_max_len) { | |
839 | + l = mbrtowc_l(&w, s, sl, ps, loc); | |
840 | + if (l == (size_t)-1 || l == (size_t)-2 || l == 0) | |
841 | + break; | |
842 | + *wp++ = w; | |
843 | + s += l; | |
844 | + clen += l; | |
845 | + sl -= l; | |
846 | + len++; | |
847 | + } | |
848 | + *wp = 0; | |
849 | + if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) { | |
850 | + int e = ch->pri[0]; | |
851 | + if (e < 0) | |
852 | + e = -e; | |
853 | + if (e == equiv_class) | |
854 | + goto found; | |
855 | + } | |
856 | + /* try single character */ | |
857 | + i = 1; | |
858 | + if (*buf <= UCHAR_MAX) { | |
859 | + if (equiv_class == __collate_char_pri_table[*buf].pri[0]) | |
860 | + goto found; | |
861 | + } else if (__collate_info->large_pri_count > 0) { | |
862 | + struct __collate_st_large_char_pri *match; | |
863 | + match = largesearch(*buf, loc); | |
864 | + if (match && equiv_class == match->pri.pri[0]) | |
865 | + goto found; | |
866 | + } | |
867 | + /* no match */ | |
868 | + if (ps) | |
869 | + *ps = save; | |
870 | + return 0; | |
871 | +found: | |
872 | + /* if we converted more than we used, restore to initial and reconvert | |
873 | + * up to what did match */ | |
874 | + if (i < len) { | |
875 | + len = i; | |
876 | + if (ps) | |
877 | + *ps = save; | |
878 | + if (start) | |
879 | + i--; | |
880 | + clen = 0; | |
881 | + while(i-- > 0) { | |
882 | + l = mbrtowc_l(&w, src, slen, ps, loc); | |
883 | + src += l; | |
884 | + clen += l; | |
885 | + slen -= l; | |
886 | + } | |
887 | + } | |
888 | + if (dst) { | |
889 | + if (dlen < len) { | |
890 | + if (ps) | |
891 | + *ps = save; | |
892 | + return (size_t)-1; | |
893 | + } | |
894 | + for(wp = buf; len > 0; len--) | |
895 | + *dst++ = *wp++; | |
896 | + } | |
897 | + if (rlen) | |
898 | + *rlen = clen; | |
899 | + return len; | |
900 | +} | |
901 | + | |
902 | +#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN | |
903 | +static void | |
904 | +wntohl(wchar_t *str, int len) | |
905 | +{ | |
906 | + for(; *str && len > 0; str++, len--) | |
907 | + *str = ntohl(*str); | |
908 | +} | |
909 | +#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
910 | + | |
911 | #ifdef COLLATE_DEBUG | |
912 | +static char * | |
913 | +show(int c) | |
914 | +{ | |
915 | + static char buf[5]; | |
916 | + | |
917 | + if (c >=32 && c <= 126) | |
918 | + sprintf(buf, "'%c' ", c); | |
919 | + else | |
920 | + sprintf(buf, "\\x{%02x}", c); | |
921 | + return buf; | |
922 | +} | |
923 | + | |
924 | +static char * | |
925 | +showwcs(const wchar_t *t, int len) | |
926 | +{ | |
927 | + static char buf[64]; | |
928 | + char *cp = buf; | |
929 | + | |
930 | + for(; *t && len > 0; len--, t++) { | |
931 | + if (*t >=32 && *t <= 126) | |
932 | + *cp++ = *t; | |
933 | + else { | |
934 | + sprintf(cp, "\\x{%02x}", *t); | |
935 | + cp += strlen(cp); | |
936 | + } | |
937 | + } | |
938 | + *cp = 0; | |
939 | + return buf; | |
940 | +} | |
941 | + | |
942 | void | |
943 | __collate_print_tables() | |
944 | { | |
945 | - int i; | |
946 | - struct __collate_st_chain_pri *p2; | |
947 | + int i, z; | |
3d9156a7 A |
948 | + locale_t loc = __current_locale(); |
949 | ||
224c7076 A |
950 | - printf("Substitute table:\n"); |
951 | - for (i = 0; i < UCHAR_MAX + 1; i++) | |
952 | - if (i != *__collate_substitute_table[i]) | |
953 | - printf("\t'%c' --> \"%s\"\n", i, | |
954 | - __collate_substitute_table[i]); | |
955 | - printf("Chain priority table:\n"); | |
3d9156a7 | 956 | - for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) |
224c7076 A |
957 | - printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec); |
958 | + printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n", | |
959 | + __collate_info->directive[0], __collate_info->directive[1], | |
960 | + __collate_info->flags, __collate_info->chain_max_len, | |
961 | + __collate_info->directive_count, | |
962 | + __collate_info->undef_pri[0], __collate_info->undef_pri[1], | |
963 | + __collate_info->subst_count[0], __collate_info->subst_count[1], | |
964 | + __collate_info->chain_count, __collate_info->large_pri_count); | |
965 | + for(z = 0; z < __collate_info->directive_count; z++) { | |
966 | + if (__collate_info->subst_count[z] > 0) { | |
967 | + struct __collate_st_subst *p2 = __collate_substitute_table[z]; | |
968 | + if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP)) | |
969 | + printf("Both substitute tables:\n"); | |
970 | + else | |
971 | + printf("Substitute table %d:\n", z); | |
972 | + for (i = __collate_info->subst_count[z]; i-- > 0; p2++) | |
973 | + printf("\t%s --> \"%s\"\n", | |
974 | + show(p2->val), | |
975 | + showwcs(p2->str, STR_LEN)); | |
976 | + } | |
977 | + } | |
978 | + if (__collate_info->chain_count > 0) { | |
979 | + printf("Chain priority table:\n"); | |
980 | + struct __collate_st_chain_pri *p2 = __collate_chain_pri_table; | |
981 | + for (i = __collate_info->chain_count; i-- > 0; p2++) { | |
982 | + printf("\t\"%s\" :", showwcs(p2->str, STR_LEN)); | |
983 | + for(z = 0; z < __collate_info->directive_count; z++) | |
984 | + printf(" %d", p2->pri[z]); | |
985 | + putchar('\n'); | |
986 | + } | |
987 | + } | |
3d9156a7 | 988 | printf("Char priority table:\n"); |
224c7076 A |
989 | - for (i = 0; i < UCHAR_MAX + 1; i++) |
990 | - printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim, | |
991 | - __collate_char_pri_table[i].sec); | |
992 | + { | |
993 | + struct __collate_st_char_pri *p2 = __collate_char_pri_table; | |
994 | + for (i = 0; i < UCHAR_MAX + 1; i++, p2++) { | |
995 | + printf("\t%s :", show(i)); | |
996 | + for(z = 0; z < __collate_info->directive_count; z++) | |
997 | + printf(" %d", p2->pri[z]); | |
998 | + putchar('\n'); | |
999 | + } | |
1000 | + } | |
1001 | + if (__collate_info->large_pri_count > 0) { | |
1002 | + struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table; | |
1003 | + printf("Large priority table:\n"); | |
1004 | + for (i = __collate_info->large_pri_count; i-- > 0; p2++) { | |
1005 | + printf("\t%s :", show(p2->val)); | |
1006 | + for(z = 0; z < __collate_info->directive_count; z++) | |
1007 | + printf(" %d", p2->pri.pri[z]); | |
1008 | + putchar('\n'); | |
1009 | + } | |
1010 | + } | |
1011 | } | |
1012 | #endif |