]>
Commit | Line | Data |
---|---|---|
9385eb3d A |
1 | /*- |
2 | * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> | |
3 | * at Electronni Visti IA, Kiev, Ukraine. | |
4 | * All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * 1. Redistributions of source code must retain the above copyright | |
10 | * notice, this list of conditions and the following disclaimer. | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in the | |
13 | * documentation and/or other materials provided with the distribution. | |
14 | * | |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND | |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE | |
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
25 | * SUCH DAMAGE. | |
26 | */ | |
27 | ||
28 | #include <sys/cdefs.h> | |
1f2f436a | 29 | __FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $"); |
9385eb3d | 30 | |
ad3c9f2a A |
31 | #include "xlocale_private.h" |
32 | /* assumes the locale_t variable is named loc */ | |
33 | #define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table) | |
34 | #define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table) | |
35 | #define __collate_char_pri_table (loc->__lc_collate->__char_pri_table) | |
36 | #define __collate_info (&loc->__lc_collate->__info) | |
37 | #define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table) | |
38 | #define __collate_substitute_table (loc->__lc_collate->__substitute_table) | |
39 | ||
9385eb3d A |
40 | #include "namespace.h" |
41 | #include <arpa/inet.h> | |
42 | #include <stdio.h> | |
43 | #include <stdlib.h> | |
ad3c9f2a | 44 | #include <stddef.h> |
9385eb3d | 45 | #include <string.h> |
ad3c9f2a | 46 | #include <wchar.h> |
9385eb3d A |
47 | #include <errno.h> |
48 | #include <unistd.h> | |
49 | #include <sysexits.h> | |
ad3c9f2a | 50 | #include <ctype.h> |
9385eb3d A |
51 | #include "un-namespace.h" |
52 | ||
53 | #include "collate.h" | |
54 | #include "setlocale.h" | |
55 | #include "ldpart.h" | |
56 | ||
57 | #include "libc_private.h" | |
58 | ||
ad3c9f2a A |
59 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN |
60 | static void wntohl(wchar_t *, int); | |
61 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
9385eb3d A |
62 | void __collate_err(int ex, const char *f) __dead2; |
63 | ||
ad3c9f2a A |
64 | /* |
65 | * Normally, the __collate_* routines should all be __private_extern__, | |
66 | * but grep is using them (3715846). Until we can provide an alternative, | |
67 | * we leave them public, and provide a read-only __collate_load_error variable | |
68 | */ | |
69 | #undef __collate_load_error | |
70 | int __collate_load_error = 1; | |
71 | ||
72 | __private_extern__ int | |
73 | __collate_load_tables(const char *encoding, locale_t loc) | |
9385eb3d | 74 | { |
b061a43b | 75 | int fd; |
9385eb3d | 76 | FILE *fp; |
ad3c9f2a | 77 | int i, saverr, chains, z; |
9385eb3d | 78 | char strbuf[STR_LEN], buf[PATH_MAX]; |
ad3c9f2a A |
79 | struct __xlocale_st_collate *TMP; |
80 | static struct __xlocale_st_collate *cache = NULL; | |
81 | struct __collate_st_info info; | |
82 | void *vp; | |
9385eb3d A |
83 | |
84 | /* 'encoding' must be already checked. */ | |
85 | if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { | |
ad3c9f2a A |
86 | loc->__collate_load_error = 1; |
87 | if (loc == &__global_locale) | |
88 | __collate_load_error = 1; | |
89 | XL_RELEASE(loc->__lc_collate); | |
90 | loc->__lc_collate = NULL; | |
9385eb3d A |
91 | return (_LDP_CACHE); |
92 | } | |
93 | ||
94 | /* | |
95 | * If the locale name is the same as our cache, use the cache. | |
96 | */ | |
ad3c9f2a A |
97 | if (cache && strcmp(encoding, cache->__encoding) == 0) { |
98 | loc->__collate_load_error = 0; | |
99 | if (loc == &__global_locale) | |
100 | __collate_load_error = 0; | |
101 | XL_RELEASE(loc->__lc_collate); | |
102 | loc->__lc_collate = cache; | |
103 | XL_RETAIN(loc->__lc_collate); | |
9385eb3d A |
104 | return (_LDP_CACHE); |
105 | } | |
106 | ||
107 | /* | |
108 | * Slurp the locale file into the cache. | |
109 | */ | |
110 | ||
111 | /* 'PathLocale' must be already set & checked. */ | |
112 | /* Range checking not needed, encoding has fixed size */ | |
974e3884 | 113 | (void)strcpy(buf, encoding); |
9385eb3d | 114 | (void)strcat(buf, "/LC_COLLATE"); |
b061a43b A |
115 | fd = __open_path_locale(buf); |
116 | if (fd == -1) { | |
117 | return (_LDP_ERROR); | |
118 | } | |
119 | if ((fp = fdopen(fd, "r")) == NULL) { | |
120 | close(fd); | |
9385eb3d | 121 | return (_LDP_ERROR); |
974e3884 | 122 | } |
9385eb3d A |
123 | |
124 | if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) { | |
125 | saverr = errno; | |
126 | (void)fclose(fp); | |
127 | errno = saverr; | |
128 | return (_LDP_ERROR); | |
129 | } | |
130 | chains = -1; | |
ad3c9f2a | 131 | if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0) |
9385eb3d A |
132 | chains = 1; |
133 | if (chains < 0) { | |
134 | (void)fclose(fp); | |
135 | errno = EFTYPE; | |
136 | return (_LDP_ERROR); | |
137 | } | |
138 | if (chains) { | |
ad3c9f2a | 139 | if (fread(&info, sizeof(info), 1, fp) != 1) { |
9385eb3d A |
140 | saverr = errno; |
141 | (void)fclose(fp); | |
142 | errno = saverr; | |
143 | return (_LDP_ERROR); | |
144 | } | |
ad3c9f2a A |
145 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN |
146 | for(z = 0; z < info.directive_count; z++) { | |
147 | info.undef_pri[z] = ntohl(info.undef_pri[z]); | |
148 | info.subst_count[z] = ntohl(info.subst_count[z]); | |
149 | } | |
150 | info.chain_count = ntohl(info.chain_count); | |
151 | info.large_pri_count = ntohl(info.large_pri_count); | |
152 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
153 | if ((chains = info.chain_count) < 0) { | |
9385eb3d A |
154 | (void)fclose(fp); |
155 | errno = EFTYPE; | |
156 | return (_LDP_ERROR); | |
157 | } | |
158 | } else | |
159 | chains = TABLE_SIZE; | |
160 | ||
ad3c9f2a A |
161 | i = sizeof(struct __xlocale_st_collate) |
162 | + sizeof(struct __collate_st_chain_pri) * chains | |
163 | + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count; | |
164 | for(z = 0; z < info.directive_count; z++) | |
165 | i += sizeof(struct __collate_st_subst) * info.subst_count[z]; | |
166 | if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) { | |
9385eb3d A |
167 | saverr = errno; |
168 | (void)fclose(fp); | |
169 | errno = saverr; | |
170 | return (_LDP_ERROR); | |
171 | } | |
ad3c9f2a A |
172 | TMP->__refcount = 2; /* one for the locale, one for the cache */ |
173 | TMP->__free_extra = NULL; | |
9385eb3d A |
174 | |
175 | #define FREAD(a, b, c, d) \ | |
176 | { \ | |
177 | if (fread(a, b, c, d) != c) { \ | |
178 | saverr = errno; \ | |
ad3c9f2a | 179 | free(TMP); \ |
9385eb3d A |
180 | (void)fclose(d); \ |
181 | errno = saverr; \ | |
182 | return (_LDP_ERROR); \ | |
183 | } \ | |
184 | } | |
185 | ||
ad3c9f2a A |
186 | /* adjust size to read the remaining in one chunk */ |
187 | i -= offsetof(struct __xlocale_st_collate, __char_pri_table); | |
188 | FREAD(TMP->__char_pri_table, i, 1, fp); | |
9385eb3d A |
189 | (void)fclose(fp); |
190 | ||
ad3c9f2a A |
191 | vp = (void *)(TMP + 1); |
192 | ||
193 | /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */ | |
194 | if (info.subst_count[0] > 0) { | |
195 | TMP->__substitute_table[0] = (struct __collate_st_subst *)vp; | |
196 | vp += info.subst_count[0] * sizeof(struct __collate_st_subst); | |
197 | } else | |
198 | TMP->__substitute_table[0] = NULL; | |
199 | if (info.flags & COLLATE_SUBST_DUP) | |
200 | TMP->__substitute_table[1] = TMP->__substitute_table[0]; | |
201 | else if (info.subst_count[1] > 0) { | |
202 | TMP->__substitute_table[1] = (struct __collate_st_subst *)vp; | |
203 | vp += info.subst_count[1] * sizeof(struct __collate_st_subst); | |
204 | } else | |
205 | TMP->__substitute_table[1] = NULL; | |
206 | ||
207 | if (chains > 0) { | |
208 | TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp; | |
209 | vp += chains * sizeof(struct __collate_st_chain_pri); | |
210 | } else | |
211 | TMP->__chain_pri_table = NULL; | |
212 | if (info.large_pri_count > 0) | |
213 | TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp; | |
214 | else | |
215 | TMP->__large_char_pri_table = NULL; | |
216 | ||
217 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN | |
218 | { | |
219 | struct __collate_st_char_pri *p = TMP->__char_pri_table; | |
220 | for(i = UCHAR_MAX + 1; i-- > 0; p++) { | |
221 | for(z = 0; z < info.directive_count; z++) | |
222 | p->pri[z] = ntohl(p->pri[z]); | |
223 | } | |
224 | } | |
225 | for(z = 0; z < info.directive_count; z++) | |
226 | if (info.subst_count[z] > 0) { | |
227 | struct __collate_st_subst *p = TMP->__substitute_table[z]; | |
228 | for(i = info.subst_count[z]; i-- > 0; p++) { | |
229 | p->val = ntohl(p->val); | |
230 | wntohl(p->str, STR_LEN); | |
231 | } | |
232 | } | |
233 | { | |
234 | struct __collate_st_chain_pri *p = TMP->__chain_pri_table; | |
235 | for(i = chains; i-- > 0; p++) { | |
236 | wntohl(p->str, STR_LEN); | |
237 | for(z = 0; z < info.directive_count; z++) | |
238 | p->pri[z] = ntohl(p->pri[z]); | |
239 | } | |
240 | } | |
241 | if (info.large_pri_count > 0) { | |
242 | struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table; | |
243 | for(i = info.large_pri_count; i-- > 0; p++) { | |
244 | p->val = ntohl(p->val); | |
245 | for(z = 0; z < info.directive_count; z++) | |
246 | p->pri.pri[z] = ntohl(p->pri.pri[z]); | |
9385eb3d A |
247 | } |
248 | } | |
ad3c9f2a A |
249 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ |
250 | (void)strcpy(TMP->__encoding, encoding); | |
251 | (void)memcpy(&TMP->__info, &info, sizeof(info)); | |
252 | XL_RELEASE(cache); | |
253 | cache = TMP; | |
254 | XL_RELEASE(loc->__lc_collate); | |
255 | loc->__lc_collate = cache; | |
256 | /* no need to retain, since we set __refcount to 2 above */ | |
257 | ||
258 | loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0); | |
259 | loc->__collate_load_error = 0; | |
260 | if (loc == &__global_locale) | |
261 | __collate_load_error = 0; | |
9385eb3d A |
262 | |
263 | return (_LDP_LOADED); | |
264 | } | |
265 | ||
ad3c9f2a A |
266 | static int |
267 | __collate_wcsnlen(const wchar_t *s, int len) | |
268 | { | |
269 | int n = 0; | |
270 | while (*s && n < len) { | |
271 | s++; | |
272 | n++; | |
273 | } | |
274 | return n; | |
275 | } | |
276 | ||
277 | static struct __collate_st_subst * | |
278 | substsearch(const wchar_t key, struct __collate_st_subst *tab, int n) | |
279 | { | |
280 | int low = 0; | |
281 | int high = n - 1; | |
282 | int next, compar; | |
283 | struct __collate_st_subst *p; | |
284 | ||
285 | while (low <= high) { | |
286 | next = (low + high) / 2; | |
287 | p = tab + next; | |
288 | compar = key - p->val; | |
289 | if (compar == 0) | |
290 | return p; | |
291 | if (compar > 0) | |
292 | low = next + 1; | |
293 | else | |
294 | high = next - 1; | |
295 | } | |
296 | return NULL; | |
297 | } | |
298 | ||
299 | __private_extern__ wchar_t * | |
300 | __collate_substitute(const wchar_t *s, int which, locale_t loc) | |
9385eb3d A |
301 | { |
302 | int dest_len, len, nlen; | |
ad3c9f2a A |
303 | int n, delta, nsubst; |
304 | wchar_t *dest_str = NULL; | |
305 | const wchar_t *fp; | |
306 | struct __collate_st_subst *subst, *match; | |
9385eb3d A |
307 | |
308 | if (s == NULL || *s == '\0') | |
ad3c9f2a A |
309 | return (__collate_wcsdup(L"")); |
310 | dest_len = wcslen(s); | |
311 | nsubst = __collate_info->subst_count[which]; | |
312 | if (nsubst <= 0) | |
313 | return __collate_wcsdup(s); | |
314 | subst = __collate_substitute_table[which]; | |
315 | delta = dest_len / 4; | |
316 | if (delta < 2) | |
317 | delta = 2; | |
318 | dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t)); | |
9385eb3d | 319 | if (dest_str == NULL) |
3d9156a7 | 320 | __collate_err(EX_OSERR, __func__); |
9385eb3d A |
321 | len = 0; |
322 | while (*s) { | |
ad3c9f2a A |
323 | if ((match = substsearch(*s, subst, nsubst)) != NULL) { |
324 | fp = match->str; | |
325 | n = __collate_wcsnlen(fp, STR_LEN); | |
326 | } else { | |
327 | fp = s; | |
328 | n = 1; | |
329 | } | |
330 | nlen = len + n; | |
9385eb3d | 331 | if (dest_len <= nlen) { |
ad3c9f2a | 332 | dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t)); |
9385eb3d | 333 | if (dest_str == NULL) |
3d9156a7 | 334 | __collate_err(EX_OSERR, __func__); |
9385eb3d | 335 | } |
ad3c9f2a A |
336 | wcsncpy(dest_str + len, fp, n); |
337 | len += n; | |
338 | s++; | |
9385eb3d | 339 | } |
ad3c9f2a | 340 | dest_str[len] = 0; |
9385eb3d A |
341 | return (dest_str); |
342 | } | |
343 | ||
ad3c9f2a A |
344 | static struct __collate_st_chain_pri * |
345 | chainsearch(const wchar_t *key, int *len, locale_t loc) | |
346 | { | |
347 | int low = 0; | |
348 | int high = __collate_info->chain_count - 1; | |
349 | int next, compar, l; | |
350 | struct __collate_st_chain_pri *p; | |
351 | struct __collate_st_chain_pri *tab = __collate_chain_pri_table; | |
352 | ||
353 | while (low <= high) { | |
354 | next = (low + high) / 2; | |
355 | p = tab + next; | |
356 | compar = *key - *p->str; | |
357 | if (compar == 0) { | |
358 | l = __collate_wcsnlen(p->str, STR_LEN); | |
359 | compar = wcsncmp(key, p->str, l); | |
360 | if (compar == 0) { | |
361 | *len = l; | |
362 | return p; | |
363 | } | |
364 | } | |
365 | if (compar > 0) | |
366 | low = next + 1; | |
367 | else | |
368 | high = next - 1; | |
369 | } | |
370 | return NULL; | |
371 | } | |
372 | ||
373 | static struct __collate_st_large_char_pri * | |
374 | largesearch(const wchar_t key, locale_t loc) | |
375 | { | |
376 | int low = 0; | |
377 | int high = __collate_info->large_pri_count - 1; | |
378 | int next, compar; | |
379 | struct __collate_st_large_char_pri *p; | |
380 | struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table; | |
381 | ||
382 | while (low <= high) { | |
383 | next = (low + high) / 2; | |
384 | p = tab + next; | |
385 | compar = key - p->val; | |
386 | if (compar == 0) | |
387 | return p; | |
388 | if (compar > 0) | |
389 | low = next + 1; | |
390 | else | |
391 | high = next - 1; | |
392 | } | |
393 | return NULL; | |
394 | } | |
395 | ||
396 | __private_extern__ void | |
397 | __collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc) | |
9385eb3d A |
398 | { |
399 | struct __collate_st_chain_pri *p2; | |
ad3c9f2a | 400 | int l; |
9385eb3d A |
401 | |
402 | *len = 1; | |
403 | *prim = *sec = 0; | |
ad3c9f2a A |
404 | p2 = chainsearch(t, &l, loc); |
405 | /* use the chain if prim >= 0 */ | |
406 | if (p2 && p2->pri[0] >= 0) { | |
407 | *len = l; | |
408 | *prim = p2->pri[0]; | |
409 | *sec = p2->pri[1]; | |
410 | return; | |
411 | } | |
412 | if (*t <= UCHAR_MAX) { | |
413 | *prim = __collate_char_pri_table[*t].pri[0]; | |
414 | *sec = __collate_char_pri_table[*t].pri[1]; | |
415 | return; | |
416 | } | |
417 | if (__collate_info->large_pri_count > 0) { | |
418 | struct __collate_st_large_char_pri *match; | |
419 | match = largesearch(*t, loc); | |
420 | if (match) { | |
421 | *prim = match->pri.pri[0]; | |
422 | *sec = match->pri.pri[1]; | |
423 | return; | |
424 | } | |
425 | } | |
426 | *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l; | |
427 | *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l; | |
428 | } | |
429 | ||
430 | /* | |
431 | * This is only provided for programs (like grep) that are calling this | |
432 | * private function. This will go away eventually. | |
433 | */ | |
434 | void | |
435 | __collate_lookup(const unsigned char *t, int *len, int *prim, int *sec) | |
436 | { | |
437 | locale_t loc = __current_locale(); | |
438 | wchar_t *w = __collate_mbstowcs((const char *)t, loc); | |
439 | int sverrno; | |
440 | ||
441 | __collate_lookup_l(w, len, prim, sec, loc); | |
442 | sverrno = errno; | |
443 | free(w); | |
444 | errno = sverrno; | |
445 | } | |
446 | ||
447 | __private_extern__ void | |
448 | __collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc) | |
449 | { | |
450 | struct __collate_st_chain_pri *p2; | |
451 | int p, l; | |
452 | ||
453 | *len = 1; | |
454 | *pri = 0; | |
455 | p2 = chainsearch(t, &l, loc); | |
456 | if (p2) { | |
457 | p = p2->pri[which]; | |
458 | /* use the chain if pri >= 0 */ | |
459 | if (p >= 0) { | |
460 | *len = l; | |
461 | *pri = p; | |
462 | return; | |
463 | } | |
464 | } | |
465 | if (*t <= UCHAR_MAX) { | |
466 | *pri = __collate_char_pri_table[*t].pri[which]; | |
467 | return; | |
468 | } | |
469 | if (__collate_info->large_pri_count > 0) { | |
470 | struct __collate_st_large_char_pri *match; | |
471 | match = largesearch(*t, loc); | |
472 | if (match) { | |
473 | *pri = match->pri.pri[which]; | |
9385eb3d A |
474 | return; |
475 | } | |
476 | } | |
ad3c9f2a | 477 | *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l; |
9385eb3d A |
478 | } |
479 | ||
ad3c9f2a A |
480 | __private_extern__ wchar_t * |
481 | __collate_mbstowcs(const char *s, locale_t loc) | |
9385eb3d | 482 | { |
ad3c9f2a A |
483 | static const mbstate_t initial; |
484 | mbstate_t st; | |
485 | size_t len; | |
486 | const char *ss; | |
487 | wchar_t *wcs; | |
9385eb3d | 488 | |
ad3c9f2a A |
489 | ss = s; |
490 | st = initial; | |
491 | if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1) | |
492 | return NULL; | |
493 | if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL) | |
3d9156a7 | 494 | __collate_err(EX_OSERR, __func__); |
ad3c9f2a A |
495 | st = initial; |
496 | mbsrtowcs_l(wcs, &s, len, &st, loc); | |
497 | wcs[len] = 0; | |
498 | ||
499 | return (wcs); | |
9385eb3d A |
500 | } |
501 | ||
ad3c9f2a A |
502 | __private_extern__ wchar_t * |
503 | __collate_wcsdup(const wchar_t *s) | |
504 | { | |
505 | size_t len = wcslen(s) + 1; | |
506 | wchar_t *wcs; | |
507 | ||
508 | if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL) | |
509 | __collate_err(EX_OSERR, __func__); | |
510 | wcscpy(wcs, s); | |
511 | return (wcs); | |
512 | } | |
513 | ||
514 | __private_extern__ void | |
515 | __collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc) | |
516 | { | |
517 | int pri, len; | |
518 | size_t slen; | |
519 | const wchar_t *t; | |
520 | wchar_t *tt = NULL, *tr = NULL; | |
521 | int direc, pass; | |
522 | wchar_t *xfp; | |
523 | struct __collate_st_info *info = __collate_info; | |
524 | int sverrno; | |
525 | ||
526 | for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++) | |
527 | xf[pass] = NULL; | |
528 | for(pass = 0; pass < info->directive_count; pass++) { | |
529 | direc = info->directive[pass]; | |
530 | if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) { | |
531 | sverrno = errno; | |
532 | free(tt); | |
533 | errno = sverrno; | |
534 | tt = __collate_substitute(src, pass, loc); | |
535 | } | |
536 | if (direc & DIRECTIVE_BACKWARD) { | |
537 | wchar_t *bp, *fp, c; | |
538 | sverrno = errno; | |
539 | free(tr); | |
540 | errno = sverrno; | |
541 | tr = __collate_wcsdup(tt ? tt : src); | |
542 | bp = tr; | |
543 | fp = tr + wcslen(tr) - 1; | |
544 | while(bp < fp) { | |
545 | c = *bp; | |
546 | *bp++ = *fp; | |
547 | *fp-- = c; | |
548 | } | |
549 | t = (const wchar_t *)tr; | |
550 | } else if (tt) | |
551 | t = (const wchar_t *)tt; | |
552 | else | |
553 | t = (const wchar_t *)src; | |
554 | sverrno = errno; | |
555 | if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) { | |
556 | errno = sverrno; | |
557 | slen = 0; | |
558 | goto end; | |
559 | } | |
560 | errno = sverrno; | |
561 | xfp = xf[pass]; | |
562 | if (direc & DIRECTIVE_POSITION) { | |
563 | while(*t) { | |
564 | __collate_lookup_which(t, &len, &pri, pass, loc); | |
565 | t += len; | |
566 | if (pri <= 0) { | |
567 | if (pri < 0) { | |
568 | errno = EINVAL; | |
569 | slen = 0; | |
570 | goto end; | |
571 | } | |
572 | pri = COLLATE_MAX_PRIORITY; | |
573 | } | |
574 | *xfp++ = pri; | |
575 | } | |
576 | } else { | |
577 | while(*t) { | |
578 | __collate_lookup_which(t, &len, &pri, pass, loc); | |
579 | t += len; | |
580 | if (pri <= 0) { | |
581 | if (pri < 0) { | |
582 | errno = EINVAL; | |
583 | slen = 0; | |
584 | goto end; | |
585 | } | |
586 | continue; | |
587 | } | |
588 | *xfp++ = pri; | |
589 | } | |
590 | } | |
591 | *xfp = 0; | |
592 | } | |
593 | end: | |
594 | sverrno = errno; | |
595 | free(tt); | |
596 | free(tr); | |
597 | errno = sverrno; | |
598 | } | |
599 | ||
600 | __private_extern__ void | |
9385eb3d A |
601 | __collate_err(int ex, const char *f) |
602 | { | |
603 | const char *s; | |
604 | int serrno = errno; | |
605 | ||
606 | s = _getprogname(); | |
607 | _write(STDERR_FILENO, s, strlen(s)); | |
608 | _write(STDERR_FILENO, ": ", 2); | |
609 | s = f; | |
610 | _write(STDERR_FILENO, s, strlen(s)); | |
611 | _write(STDERR_FILENO, ": ", 2); | |
612 | s = strerror(serrno); | |
613 | _write(STDERR_FILENO, s, strlen(s)); | |
614 | _write(STDERR_FILENO, "\n", 1); | |
615 | exit(ex); | |
616 | } | |
617 | ||
ad3c9f2a A |
618 | /* |
619 | * __collate_collating_symbol takes the multibyte string specified by | |
620 | * src and slen, and using ps, converts that to a wide character. Then | |
621 | * it is checked to verify it is a collating symbol, and then copies | |
622 | * it to the wide character string specified by dst and dlen (the | |
623 | * results are not null terminated). The length of the wide characters | |
624 | * copied to dst is returned if successful. Zero is returned if no such | |
625 | * collating symbol exists. (size_t)-1 is returned if there are wide-character | |
626 | * conversion errors, if the length of the converted string is greater that | |
627 | * STR_LEN or if dlen is too small. It is up to the calling routine to | |
628 | * preserve the mbstate_t structure as needed. | |
629 | */ | |
630 | __private_extern__ size_t | |
631 | __collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc) | |
632 | { | |
633 | wchar_t wname[STR_LEN]; | |
634 | wchar_t w, *wp; | |
635 | size_t len, l; | |
636 | ||
637 | /* POSIX locale */ | |
638 | if (loc->__collate_load_error) { | |
639 | if (dlen < 1) | |
640 | return (size_t)-1; | |
641 | if (slen != 1 || !isascii(*src)) | |
642 | return 0; | |
643 | *dst = *src; | |
644 | return 1; | |
645 | } | |
646 | for(wp = wname, len = 0; slen > 0; len++) { | |
647 | l = mbrtowc_l(&w, src, slen, ps, loc); | |
648 | if (l == (size_t)-1 || l == (size_t)-2) | |
649 | return (size_t)-1; | |
650 | if (l == 0) | |
651 | break; | |
652 | if (len >= STR_LEN) | |
653 | return -1; | |
654 | *wp++ = w; | |
655 | src += l; | |
656 | slen = (long)slen - (long)l; | |
657 | } | |
658 | if (len == 0 || len > dlen) | |
659 | return (size_t)-1; | |
660 | if (len == 1) { | |
661 | if (*wname <= UCHAR_MAX) { | |
662 | if (__collate_char_pri_table[*wname].pri[0] >= 0) { | |
663 | if (dlen > 0) | |
664 | *dst = *wname; | |
665 | return 1; | |
666 | } | |
667 | return 0; | |
668 | } else if (__collate_info->large_pri_count > 0) { | |
669 | struct __collate_st_large_char_pri *match; | |
670 | match = largesearch(*wname, loc); | |
671 | if (match && match->pri.pri[0] >= 0) { | |
672 | if (dlen > 0) | |
673 | *dst = *wname; | |
674 | return 1; | |
675 | } | |
676 | } | |
677 | return 0; | |
678 | } | |
679 | *wp = 0; | |
680 | if (__collate_info->chain_count > 0) { | |
681 | struct __collate_st_chain_pri *match; | |
682 | int ll; | |
683 | match = chainsearch(wname, &ll, loc); | |
684 | if (match) { | |
685 | if (ll < dlen) | |
686 | dlen = ll; | |
687 | wcsncpy(dst, wname, dlen); | |
688 | return ll; | |
689 | } | |
690 | } | |
691 | return 0; | |
692 | } | |
693 | ||
694 | /* | |
695 | * __collate_equiv_class returns the equivalence class number for the symbol | |
696 | * specified by src and slen, using ps to convert from multi-byte to wide | |
697 | * character. Zero is returned if the symbol is not in an equivalence | |
698 | * class. -1 is returned if there are wide character conversion error, | |
699 | * if there are any greater-than-8-bit characters or if a multi-byte symbol | |
700 | * is greater or equal to STR_LEN in length. It is up to the calling | |
701 | * routine to preserve the mbstate_t structure as needed. | |
702 | */ | |
703 | __private_extern__ int | |
704 | __collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc) | |
705 | { | |
706 | wchar_t wname[STR_LEN]; | |
707 | wchar_t w, *wp; | |
708 | size_t len, l; | |
709 | int e; | |
710 | ||
711 | /* POSIX locale */ | |
712 | if (loc->__collate_load_error) | |
713 | return 0; | |
714 | for(wp = wname, len = 0; slen > 0; len++) { | |
715 | l = mbrtowc_l(&w, src, slen, ps, loc); | |
716 | if (l == (size_t)-1 || l == (size_t)-2) | |
717 | return -1; | |
718 | if (l == 0) | |
719 | break; | |
720 | if (len >= STR_LEN) | |
721 | return -1; | |
722 | *wp++ = w; | |
723 | src += l; | |
724 | slen = (long)slen - (long)l; | |
725 | } | |
726 | if (len == 0) | |
727 | return -1; | |
728 | if (len == 1) { | |
729 | e = -1; | |
730 | if (*wname <= UCHAR_MAX) | |
731 | e = __collate_char_pri_table[*wname].pri[0]; | |
732 | else if (__collate_info->large_pri_count > 0) { | |
733 | struct __collate_st_large_char_pri *match; | |
734 | match = largesearch(*wname, loc); | |
735 | if (match) | |
736 | e = match->pri.pri[0]; | |
737 | } | |
738 | if (e == 0) | |
739 | return IGNORE_EQUIV_CLASS; | |
740 | return e > 0 ? e : 0; | |
741 | } | |
742 | *wp = 0; | |
743 | if (__collate_info->chain_count > 0) { | |
744 | struct __collate_st_chain_pri *match; | |
745 | int ll; | |
746 | match = chainsearch(wname, &ll, loc); | |
747 | if (match) { | |
748 | e = match->pri[0]; | |
749 | if (e == 0) | |
750 | return IGNORE_EQUIV_CLASS; | |
751 | return e < 0 ? -e : e; | |
752 | } | |
753 | } | |
754 | return 0; | |
755 | } | |
756 | ||
757 | /* | |
758 | * __collate_equiv_match tries to match any single or multi-character symbol | |
759 | * in equivalence class equiv_class in the multi-byte string specified by src | |
760 | * and slen. If start is non-zero, it is taken to be the first (pre-converted) | |
761 | * wide character. Subsequence wide characters, if needed, will use ps in | |
762 | * the conversion. On a successful match, the length of the matched string | |
763 | * is returned (including the start character). If dst is non-NULL, the | |
764 | * matched wide-character string is copied to dst, a wide character array of | |
765 | * length dlen (the results are not zero-terminated). If rlen is non-NULL, | |
766 | * the number of character in src actually used is returned. Zero is | |
767 | * returned by __collate_equiv_match if there is no match. (size_t)-1 is | |
768 | * returned on error: if there were conversion errors or if dlen is too small | |
769 | * to accept the results. On no match or error, ps is restored to its incoming | |
770 | * state. | |
771 | */ | |
772 | size_t | |
773 | __collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc) | |
774 | { | |
775 | wchar_t w; | |
776 | size_t len, l, clen; | |
777 | int i; | |
778 | wchar_t buf[STR_LEN], *wp; | |
779 | mbstate_t save; | |
780 | const char *s = src; | |
781 | size_t sl = slen; | |
782 | struct __collate_st_chain_pri *ch = NULL; | |
783 | ||
784 | /* POSIX locale */ | |
785 | if (loc->__collate_load_error) | |
786 | return (size_t)-1; | |
787 | if (equiv_class == IGNORE_EQUIV_CLASS) | |
788 | equiv_class = 0; | |
789 | if (ps) | |
790 | save = *ps; | |
791 | wp = buf; | |
792 | len = clen = 0; | |
793 | if (start) { | |
794 | *wp++ = start; | |
795 | len = 1; | |
796 | } | |
797 | /* convert up to the max chain length */ | |
798 | while(sl > 0 && len < __collate_info->chain_max_len) { | |
799 | l = mbrtowc_l(&w, s, sl, ps, loc); | |
800 | if (l == (size_t)-1 || l == (size_t)-2 || l == 0) | |
801 | break; | |
802 | *wp++ = w; | |
803 | s += l; | |
804 | clen += l; | |
805 | sl -= l; | |
806 | len++; | |
807 | } | |
808 | *wp = 0; | |
809 | if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) { | |
810 | int e = ch->pri[0]; | |
811 | if (e < 0) | |
812 | e = -e; | |
813 | if (e == equiv_class) | |
814 | goto found; | |
815 | } | |
816 | /* try single character */ | |
817 | i = 1; | |
818 | if (*buf <= UCHAR_MAX) { | |
819 | if (equiv_class == __collate_char_pri_table[*buf].pri[0]) | |
820 | goto found; | |
821 | } else if (__collate_info->large_pri_count > 0) { | |
822 | struct __collate_st_large_char_pri *match; | |
823 | match = largesearch(*buf, loc); | |
824 | if (match && equiv_class == match->pri.pri[0]) | |
825 | goto found; | |
826 | } | |
827 | /* no match */ | |
828 | if (ps) | |
829 | *ps = save; | |
830 | return 0; | |
831 | found: | |
832 | /* if we converted more than we used, restore to initial and reconvert | |
833 | * up to what did match */ | |
834 | if (i < len) { | |
835 | len = i; | |
836 | if (ps) | |
837 | *ps = save; | |
838 | if (start) | |
839 | i--; | |
840 | clen = 0; | |
841 | while(i-- > 0) { | |
842 | l = mbrtowc_l(&w, src, slen, ps, loc); | |
843 | src += l; | |
844 | clen += l; | |
845 | slen -= l; | |
846 | } | |
847 | } | |
848 | if (dst) { | |
849 | if (dlen < len) { | |
850 | if (ps) | |
851 | *ps = save; | |
852 | return (size_t)-1; | |
853 | } | |
854 | for(wp = buf; len > 0; len--) | |
855 | *dst++ = *wp++; | |
856 | } | |
857 | if (rlen) | |
858 | *rlen = clen; | |
859 | return len; | |
860 | } | |
861 | ||
862 | /* | |
863 | * __collate_equiv_value returns the primary collation value for the given | |
864 | * collating symbol specified by str and len. Zero or negative is return | |
865 | * if the collating symbol was not found. (Use by the bracket code in TRE.) | |
866 | */ | |
867 | __private_extern__ int | |
868 | __collate_equiv_value(locale_t loc, const wchar_t *str, size_t len) | |
869 | { | |
870 | int e; | |
871 | ||
872 | if (len < 1 || len >= STR_LEN) | |
873 | return -1; | |
874 | ||
875 | /* POSIX locale */ | |
876 | if (loc->__collate_load_error) | |
877 | return (len == 1 && *str <= UCHAR_MAX) ? *str : -1; | |
878 | ||
879 | if (len == 1) { | |
880 | e = -1; | |
881 | if (*str <= UCHAR_MAX) | |
882 | e = __collate_char_pri_table[*str].pri[0]; | |
883 | else if (__collate_info->large_pri_count > 0) { | |
884 | struct __collate_st_large_char_pri *match; | |
885 | match = largesearch(*str, loc); | |
886 | if (match) | |
887 | e = match->pri.pri[0]; | |
888 | } | |
889 | if (e == 0) | |
890 | return IGNORE_EQUIV_CLASS; | |
891 | return e > 0 ? e : 0; | |
892 | } | |
893 | if (__collate_info->chain_count > 0) { | |
894 | wchar_t name[STR_LEN]; | |
895 | struct __collate_st_chain_pri *match; | |
896 | int ll; | |
897 | ||
898 | wcsncpy(name, str, len); | |
899 | name[len] = 0; | |
900 | match = chainsearch(name, &ll, loc); | |
901 | if (match) { | |
902 | e = match->pri[0]; | |
903 | if (e == 0) | |
904 | return IGNORE_EQUIV_CLASS; | |
905 | return e < 0 ? -e : e; | |
906 | } | |
907 | } | |
908 | return 0; | |
909 | } | |
910 | ||
911 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN | |
912 | static void | |
913 | wntohl(wchar_t *str, int len) | |
914 | { | |
915 | for(; *str && len > 0; str++, len--) | |
916 | *str = ntohl(*str); | |
917 | } | |
918 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
919 | ||
9385eb3d | 920 | #ifdef COLLATE_DEBUG |
ad3c9f2a A |
921 | static char * |
922 | show(int c) | |
923 | { | |
924 | static char buf[5]; | |
925 | ||
926 | if (c >=32 && c <= 126) | |
927 | sprintf(buf, "'%c' ", c); | |
928 | else | |
929 | sprintf(buf, "\\x{%02x}", c); | |
930 | return buf; | |
931 | } | |
932 | ||
933 | static char * | |
934 | showwcs(const wchar_t *t, int len) | |
935 | { | |
936 | static char buf[64]; | |
937 | char *cp = buf; | |
938 | ||
939 | for(; *t && len > 0; len--, t++) { | |
940 | if (*t >=32 && *t <= 126) | |
941 | *cp++ = *t; | |
942 | else { | |
943 | sprintf(cp, "\\x{%02x}", *t); | |
944 | cp += strlen(cp); | |
945 | } | |
946 | } | |
947 | *cp = 0; | |
948 | return buf; | |
949 | } | |
950 | ||
9385eb3d A |
951 | void |
952 | __collate_print_tables() | |
953 | { | |
ad3c9f2a A |
954 | int i, z; |
955 | locale_t loc = __current_locale(); | |
9385eb3d | 956 | |
ad3c9f2a A |
957 | printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n", |
958 | __collate_info->directive[0], __collate_info->directive[1], | |
959 | __collate_info->flags, __collate_info->chain_max_len, | |
960 | __collate_info->directive_count, | |
961 | __collate_info->undef_pri[0], __collate_info->undef_pri[1], | |
962 | __collate_info->subst_count[0], __collate_info->subst_count[1], | |
963 | __collate_info->chain_count, __collate_info->large_pri_count); | |
964 | for(z = 0; z < __collate_info->directive_count; z++) { | |
965 | if (__collate_info->subst_count[z] > 0) { | |
966 | struct __collate_st_subst *p2 = __collate_substitute_table[z]; | |
967 | if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP)) | |
968 | printf("Both substitute tables:\n"); | |
969 | else | |
970 | printf("Substitute table %d:\n", z); | |
971 | for (i = __collate_info->subst_count[z]; i-- > 0; p2++) | |
972 | printf("\t%s --> \"%s\"\n", | |
973 | show(p2->val), | |
974 | showwcs(p2->str, STR_LEN)); | |
975 | } | |
976 | } | |
977 | if (__collate_info->chain_count > 0) { | |
978 | printf("Chain priority table:\n"); | |
979 | struct __collate_st_chain_pri *p2 = __collate_chain_pri_table; | |
980 | for (i = __collate_info->chain_count; i-- > 0; p2++) { | |
981 | printf("\t\"%s\" :", showwcs(p2->str, STR_LEN)); | |
982 | for(z = 0; z < __collate_info->directive_count; z++) | |
983 | printf(" %d", p2->pri[z]); | |
984 | putchar('\n'); | |
985 | } | |
986 | } | |
9385eb3d | 987 | printf("Char priority table:\n"); |
ad3c9f2a A |
988 | { |
989 | struct __collate_st_char_pri *p2 = __collate_char_pri_table; | |
990 | for (i = 0; i < UCHAR_MAX + 1; i++, p2++) { | |
991 | printf("\t%s :", show(i)); | |
992 | for(z = 0; z < __collate_info->directive_count; z++) | |
993 | printf(" %d", p2->pri[z]); | |
994 | putchar('\n'); | |
995 | } | |
996 | } | |
997 | if (__collate_info->large_pri_count > 0) { | |
998 | struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table; | |
999 | printf("Large priority table:\n"); | |
1000 | for (i = __collate_info->large_pri_count; i-- > 0; p2++) { | |
1001 | printf("\t%s :", show(p2->val)); | |
1002 | for(z = 0; z < __collate_info->directive_count; z++) | |
1003 | printf(" %d", p2->pri.pri[z]); | |
1004 | putchar('\n'); | |
1005 | } | |
1006 | } | |
9385eb3d A |
1007 | } |
1008 | #endif |