]>
Commit | Line | Data |
---|---|---|
9385eb3d A |
1 | /*- |
2 | * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> | |
3 | * at Electronni Visti IA, Kiev, Ukraine. | |
4 | * All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * 1. Redistributions of source code must retain the above copyright | |
10 | * notice, this list of conditions and the following disclaimer. | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in the | |
13 | * documentation and/or other materials provided with the distribution. | |
14 | * | |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND | |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE | |
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
25 | * SUCH DAMAGE. | |
26 | */ | |
27 | ||
28 | #include <sys/cdefs.h> | |
1f2f436a | 29 | __FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $"); |
9385eb3d | 30 | |
ad3c9f2a A |
31 | #include "xlocale_private.h" |
32 | /* assumes the locale_t variable is named loc */ | |
33 | #define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table) | |
34 | #define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table) | |
35 | #define __collate_char_pri_table (loc->__lc_collate->__char_pri_table) | |
36 | #define __collate_info (&loc->__lc_collate->__info) | |
37 | #define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table) | |
38 | #define __collate_substitute_table (loc->__lc_collate->__substitute_table) | |
39 | ||
9385eb3d A |
40 | #include "namespace.h" |
41 | #include <arpa/inet.h> | |
42 | #include <stdio.h> | |
43 | #include <stdlib.h> | |
ad3c9f2a | 44 | #include <stddef.h> |
9385eb3d | 45 | #include <string.h> |
ad3c9f2a | 46 | #include <wchar.h> |
9385eb3d A |
47 | #include <errno.h> |
48 | #include <unistd.h> | |
49 | #include <sysexits.h> | |
ad3c9f2a | 50 | #include <ctype.h> |
9385eb3d A |
51 | #include "un-namespace.h" |
52 | ||
53 | #include "collate.h" | |
54 | #include "setlocale.h" | |
55 | #include "ldpart.h" | |
56 | ||
57 | #include "libc_private.h" | |
58 | ||
ad3c9f2a A |
59 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN |
60 | static void wntohl(wchar_t *, int); | |
61 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
9385eb3d A |
62 | void __collate_err(int ex, const char *f) __dead2; |
63 | ||
ad3c9f2a A |
64 | /* |
65 | * Normally, the __collate_* routines should all be __private_extern__, | |
66 | * but grep is using them (3715846). Until we can provide an alternative, | |
67 | * we leave them public, and provide a read-only __collate_load_error variable | |
68 | */ | |
69 | #undef __collate_load_error | |
70 | int __collate_load_error = 1; | |
71 | ||
72 | __private_extern__ int | |
73 | __collate_load_tables(const char *encoding, locale_t loc) | |
9385eb3d A |
74 | { |
75 | FILE *fp; | |
ad3c9f2a | 76 | int i, saverr, chains, z; |
9385eb3d | 77 | char strbuf[STR_LEN], buf[PATH_MAX]; |
ad3c9f2a A |
78 | struct __xlocale_st_collate *TMP; |
79 | static struct __xlocale_st_collate *cache = NULL; | |
80 | struct __collate_st_info info; | |
81 | void *vp; | |
9385eb3d A |
82 | |
83 | /* 'encoding' must be already checked. */ | |
84 | if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { | |
ad3c9f2a A |
85 | loc->__collate_load_error = 1; |
86 | if (loc == &__global_locale) | |
87 | __collate_load_error = 1; | |
88 | XL_RELEASE(loc->__lc_collate); | |
89 | loc->__lc_collate = NULL; | |
9385eb3d A |
90 | return (_LDP_CACHE); |
91 | } | |
92 | ||
93 | /* | |
94 | * If the locale name is the same as our cache, use the cache. | |
95 | */ | |
ad3c9f2a A |
96 | if (cache && strcmp(encoding, cache->__encoding) == 0) { |
97 | loc->__collate_load_error = 0; | |
98 | if (loc == &__global_locale) | |
99 | __collate_load_error = 0; | |
100 | XL_RELEASE(loc->__lc_collate); | |
101 | loc->__lc_collate = cache; | |
102 | XL_RETAIN(loc->__lc_collate); | |
9385eb3d A |
103 | return (_LDP_CACHE); |
104 | } | |
105 | ||
106 | /* | |
107 | * Slurp the locale file into the cache. | |
108 | */ | |
109 | ||
110 | /* 'PathLocale' must be already set & checked. */ | |
111 | /* Range checking not needed, encoding has fixed size */ | |
112 | (void)strcpy(buf, _PathLocale); | |
113 | (void)strcat(buf, "/"); | |
114 | (void)strcat(buf, encoding); | |
115 | (void)strcat(buf, "/LC_COLLATE"); | |
116 | if ((fp = fopen(buf, "r")) == NULL) | |
117 | return (_LDP_ERROR); | |
118 | ||
119 | if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) { | |
120 | saverr = errno; | |
121 | (void)fclose(fp); | |
122 | errno = saverr; | |
123 | return (_LDP_ERROR); | |
124 | } | |
125 | chains = -1; | |
ad3c9f2a | 126 | if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0) |
9385eb3d A |
127 | chains = 1; |
128 | if (chains < 0) { | |
129 | (void)fclose(fp); | |
130 | errno = EFTYPE; | |
131 | return (_LDP_ERROR); | |
132 | } | |
133 | if (chains) { | |
ad3c9f2a | 134 | if (fread(&info, sizeof(info), 1, fp) != 1) { |
9385eb3d A |
135 | saverr = errno; |
136 | (void)fclose(fp); | |
137 | errno = saverr; | |
138 | return (_LDP_ERROR); | |
139 | } | |
ad3c9f2a A |
140 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN |
141 | for(z = 0; z < info.directive_count; z++) { | |
142 | info.undef_pri[z] = ntohl(info.undef_pri[z]); | |
143 | info.subst_count[z] = ntohl(info.subst_count[z]); | |
144 | } | |
145 | info.chain_count = ntohl(info.chain_count); | |
146 | info.large_pri_count = ntohl(info.large_pri_count); | |
147 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
148 | if ((chains = info.chain_count) < 0) { | |
9385eb3d A |
149 | (void)fclose(fp); |
150 | errno = EFTYPE; | |
151 | return (_LDP_ERROR); | |
152 | } | |
153 | } else | |
154 | chains = TABLE_SIZE; | |
155 | ||
ad3c9f2a A |
156 | i = sizeof(struct __xlocale_st_collate) |
157 | + sizeof(struct __collate_st_chain_pri) * chains | |
158 | + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count; | |
159 | for(z = 0; z < info.directive_count; z++) | |
160 | i += sizeof(struct __collate_st_subst) * info.subst_count[z]; | |
161 | if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) { | |
9385eb3d A |
162 | saverr = errno; |
163 | (void)fclose(fp); | |
164 | errno = saverr; | |
165 | return (_LDP_ERROR); | |
166 | } | |
ad3c9f2a A |
167 | TMP->__refcount = 2; /* one for the locale, one for the cache */ |
168 | TMP->__free_extra = NULL; | |
9385eb3d A |
169 | |
170 | #define FREAD(a, b, c, d) \ | |
171 | { \ | |
172 | if (fread(a, b, c, d) != c) { \ | |
173 | saverr = errno; \ | |
ad3c9f2a | 174 | free(TMP); \ |
9385eb3d A |
175 | (void)fclose(d); \ |
176 | errno = saverr; \ | |
177 | return (_LDP_ERROR); \ | |
178 | } \ | |
179 | } | |
180 | ||
ad3c9f2a A |
181 | /* adjust size to read the remaining in one chunk */ |
182 | i -= offsetof(struct __xlocale_st_collate, __char_pri_table); | |
183 | FREAD(TMP->__char_pri_table, i, 1, fp); | |
9385eb3d A |
184 | (void)fclose(fp); |
185 | ||
ad3c9f2a A |
186 | vp = (void *)(TMP + 1); |
187 | ||
188 | /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */ | |
189 | if (info.subst_count[0] > 0) { | |
190 | TMP->__substitute_table[0] = (struct __collate_st_subst *)vp; | |
191 | vp += info.subst_count[0] * sizeof(struct __collate_st_subst); | |
192 | } else | |
193 | TMP->__substitute_table[0] = NULL; | |
194 | if (info.flags & COLLATE_SUBST_DUP) | |
195 | TMP->__substitute_table[1] = TMP->__substitute_table[0]; | |
196 | else if (info.subst_count[1] > 0) { | |
197 | TMP->__substitute_table[1] = (struct __collate_st_subst *)vp; | |
198 | vp += info.subst_count[1] * sizeof(struct __collate_st_subst); | |
199 | } else | |
200 | TMP->__substitute_table[1] = NULL; | |
201 | ||
202 | if (chains > 0) { | |
203 | TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp; | |
204 | vp += chains * sizeof(struct __collate_st_chain_pri); | |
205 | } else | |
206 | TMP->__chain_pri_table = NULL; | |
207 | if (info.large_pri_count > 0) | |
208 | TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp; | |
209 | else | |
210 | TMP->__large_char_pri_table = NULL; | |
211 | ||
212 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN | |
213 | { | |
214 | struct __collate_st_char_pri *p = TMP->__char_pri_table; | |
215 | for(i = UCHAR_MAX + 1; i-- > 0; p++) { | |
216 | for(z = 0; z < info.directive_count; z++) | |
217 | p->pri[z] = ntohl(p->pri[z]); | |
218 | } | |
219 | } | |
220 | for(z = 0; z < info.directive_count; z++) | |
221 | if (info.subst_count[z] > 0) { | |
222 | struct __collate_st_subst *p = TMP->__substitute_table[z]; | |
223 | for(i = info.subst_count[z]; i-- > 0; p++) { | |
224 | p->val = ntohl(p->val); | |
225 | wntohl(p->str, STR_LEN); | |
226 | } | |
227 | } | |
228 | { | |
229 | struct __collate_st_chain_pri *p = TMP->__chain_pri_table; | |
230 | for(i = chains; i-- > 0; p++) { | |
231 | wntohl(p->str, STR_LEN); | |
232 | for(z = 0; z < info.directive_count; z++) | |
233 | p->pri[z] = ntohl(p->pri[z]); | |
234 | } | |
235 | } | |
236 | if (info.large_pri_count > 0) { | |
237 | struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table; | |
238 | for(i = info.large_pri_count; i-- > 0; p++) { | |
239 | p->val = ntohl(p->val); | |
240 | for(z = 0; z < info.directive_count; z++) | |
241 | p->pri.pri[z] = ntohl(p->pri.pri[z]); | |
9385eb3d A |
242 | } |
243 | } | |
ad3c9f2a A |
244 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ |
245 | (void)strcpy(TMP->__encoding, encoding); | |
246 | (void)memcpy(&TMP->__info, &info, sizeof(info)); | |
247 | XL_RELEASE(cache); | |
248 | cache = TMP; | |
249 | XL_RELEASE(loc->__lc_collate); | |
250 | loc->__lc_collate = cache; | |
251 | /* no need to retain, since we set __refcount to 2 above */ | |
252 | ||
253 | loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0); | |
254 | loc->__collate_load_error = 0; | |
255 | if (loc == &__global_locale) | |
256 | __collate_load_error = 0; | |
9385eb3d A |
257 | |
258 | return (_LDP_LOADED); | |
259 | } | |
260 | ||
ad3c9f2a A |
261 | static int |
262 | __collate_wcsnlen(const wchar_t *s, int len) | |
263 | { | |
264 | int n = 0; | |
265 | while (*s && n < len) { | |
266 | s++; | |
267 | n++; | |
268 | } | |
269 | return n; | |
270 | } | |
271 | ||
272 | static struct __collate_st_subst * | |
273 | substsearch(const wchar_t key, struct __collate_st_subst *tab, int n) | |
274 | { | |
275 | int low = 0; | |
276 | int high = n - 1; | |
277 | int next, compar; | |
278 | struct __collate_st_subst *p; | |
279 | ||
280 | while (low <= high) { | |
281 | next = (low + high) / 2; | |
282 | p = tab + next; | |
283 | compar = key - p->val; | |
284 | if (compar == 0) | |
285 | return p; | |
286 | if (compar > 0) | |
287 | low = next + 1; | |
288 | else | |
289 | high = next - 1; | |
290 | } | |
291 | return NULL; | |
292 | } | |
293 | ||
294 | __private_extern__ wchar_t * | |
295 | __collate_substitute(const wchar_t *s, int which, locale_t loc) | |
9385eb3d A |
296 | { |
297 | int dest_len, len, nlen; | |
ad3c9f2a A |
298 | int n, delta, nsubst; |
299 | wchar_t *dest_str = NULL; | |
300 | const wchar_t *fp; | |
301 | struct __collate_st_subst *subst, *match; | |
9385eb3d A |
302 | |
303 | if (s == NULL || *s == '\0') | |
ad3c9f2a A |
304 | return (__collate_wcsdup(L"")); |
305 | dest_len = wcslen(s); | |
306 | nsubst = __collate_info->subst_count[which]; | |
307 | if (nsubst <= 0) | |
308 | return __collate_wcsdup(s); | |
309 | subst = __collate_substitute_table[which]; | |
310 | delta = dest_len / 4; | |
311 | if (delta < 2) | |
312 | delta = 2; | |
313 | dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t)); | |
9385eb3d | 314 | if (dest_str == NULL) |
3d9156a7 | 315 | __collate_err(EX_OSERR, __func__); |
9385eb3d A |
316 | len = 0; |
317 | while (*s) { | |
ad3c9f2a A |
318 | if ((match = substsearch(*s, subst, nsubst)) != NULL) { |
319 | fp = match->str; | |
320 | n = __collate_wcsnlen(fp, STR_LEN); | |
321 | } else { | |
322 | fp = s; | |
323 | n = 1; | |
324 | } | |
325 | nlen = len + n; | |
9385eb3d | 326 | if (dest_len <= nlen) { |
ad3c9f2a | 327 | dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t)); |
9385eb3d | 328 | if (dest_str == NULL) |
3d9156a7 | 329 | __collate_err(EX_OSERR, __func__); |
9385eb3d | 330 | } |
ad3c9f2a A |
331 | wcsncpy(dest_str + len, fp, n); |
332 | len += n; | |
333 | s++; | |
9385eb3d | 334 | } |
ad3c9f2a | 335 | dest_str[len] = 0; |
9385eb3d A |
336 | return (dest_str); |
337 | } | |
338 | ||
ad3c9f2a A |
339 | static struct __collate_st_chain_pri * |
340 | chainsearch(const wchar_t *key, int *len, locale_t loc) | |
341 | { | |
342 | int low = 0; | |
343 | int high = __collate_info->chain_count - 1; | |
344 | int next, compar, l; | |
345 | struct __collate_st_chain_pri *p; | |
346 | struct __collate_st_chain_pri *tab = __collate_chain_pri_table; | |
347 | ||
348 | while (low <= high) { | |
349 | next = (low + high) / 2; | |
350 | p = tab + next; | |
351 | compar = *key - *p->str; | |
352 | if (compar == 0) { | |
353 | l = __collate_wcsnlen(p->str, STR_LEN); | |
354 | compar = wcsncmp(key, p->str, l); | |
355 | if (compar == 0) { | |
356 | *len = l; | |
357 | return p; | |
358 | } | |
359 | } | |
360 | if (compar > 0) | |
361 | low = next + 1; | |
362 | else | |
363 | high = next - 1; | |
364 | } | |
365 | return NULL; | |
366 | } | |
367 | ||
368 | static struct __collate_st_large_char_pri * | |
369 | largesearch(const wchar_t key, locale_t loc) | |
370 | { | |
371 | int low = 0; | |
372 | int high = __collate_info->large_pri_count - 1; | |
373 | int next, compar; | |
374 | struct __collate_st_large_char_pri *p; | |
375 | struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table; | |
376 | ||
377 | while (low <= high) { | |
378 | next = (low + high) / 2; | |
379 | p = tab + next; | |
380 | compar = key - p->val; | |
381 | if (compar == 0) | |
382 | return p; | |
383 | if (compar > 0) | |
384 | low = next + 1; | |
385 | else | |
386 | high = next - 1; | |
387 | } | |
388 | return NULL; | |
389 | } | |
390 | ||
391 | __private_extern__ void | |
392 | __collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc) | |
9385eb3d A |
393 | { |
394 | struct __collate_st_chain_pri *p2; | |
ad3c9f2a | 395 | int l; |
9385eb3d A |
396 | |
397 | *len = 1; | |
398 | *prim = *sec = 0; | |
ad3c9f2a A |
399 | p2 = chainsearch(t, &l, loc); |
400 | /* use the chain if prim >= 0 */ | |
401 | if (p2 && p2->pri[0] >= 0) { | |
402 | *len = l; | |
403 | *prim = p2->pri[0]; | |
404 | *sec = p2->pri[1]; | |
405 | return; | |
406 | } | |
407 | if (*t <= UCHAR_MAX) { | |
408 | *prim = __collate_char_pri_table[*t].pri[0]; | |
409 | *sec = __collate_char_pri_table[*t].pri[1]; | |
410 | return; | |
411 | } | |
412 | if (__collate_info->large_pri_count > 0) { | |
413 | struct __collate_st_large_char_pri *match; | |
414 | match = largesearch(*t, loc); | |
415 | if (match) { | |
416 | *prim = match->pri.pri[0]; | |
417 | *sec = match->pri.pri[1]; | |
418 | return; | |
419 | } | |
420 | } | |
421 | *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l; | |
422 | *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l; | |
423 | } | |
424 | ||
425 | /* | |
426 | * This is only provided for programs (like grep) that are calling this | |
427 | * private function. This will go away eventually. | |
428 | */ | |
429 | void | |
430 | __collate_lookup(const unsigned char *t, int *len, int *prim, int *sec) | |
431 | { | |
432 | locale_t loc = __current_locale(); | |
433 | wchar_t *w = __collate_mbstowcs((const char *)t, loc); | |
434 | int sverrno; | |
435 | ||
436 | __collate_lookup_l(w, len, prim, sec, loc); | |
437 | sverrno = errno; | |
438 | free(w); | |
439 | errno = sverrno; | |
440 | } | |
441 | ||
442 | __private_extern__ void | |
443 | __collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc) | |
444 | { | |
445 | struct __collate_st_chain_pri *p2; | |
446 | int p, l; | |
447 | ||
448 | *len = 1; | |
449 | *pri = 0; | |
450 | p2 = chainsearch(t, &l, loc); | |
451 | if (p2) { | |
452 | p = p2->pri[which]; | |
453 | /* use the chain if pri >= 0 */ | |
454 | if (p >= 0) { | |
455 | *len = l; | |
456 | *pri = p; | |
457 | return; | |
458 | } | |
459 | } | |
460 | if (*t <= UCHAR_MAX) { | |
461 | *pri = __collate_char_pri_table[*t].pri[which]; | |
462 | return; | |
463 | } | |
464 | if (__collate_info->large_pri_count > 0) { | |
465 | struct __collate_st_large_char_pri *match; | |
466 | match = largesearch(*t, loc); | |
467 | if (match) { | |
468 | *pri = match->pri.pri[which]; | |
9385eb3d A |
469 | return; |
470 | } | |
471 | } | |
ad3c9f2a | 472 | *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l; |
9385eb3d A |
473 | } |
474 | ||
ad3c9f2a A |
475 | __private_extern__ wchar_t * |
476 | __collate_mbstowcs(const char *s, locale_t loc) | |
9385eb3d | 477 | { |
ad3c9f2a A |
478 | static const mbstate_t initial; |
479 | mbstate_t st; | |
480 | size_t len; | |
481 | const char *ss; | |
482 | wchar_t *wcs; | |
9385eb3d | 483 | |
ad3c9f2a A |
484 | ss = s; |
485 | st = initial; | |
486 | if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1) | |
487 | return NULL; | |
488 | if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL) | |
3d9156a7 | 489 | __collate_err(EX_OSERR, __func__); |
ad3c9f2a A |
490 | st = initial; |
491 | mbsrtowcs_l(wcs, &s, len, &st, loc); | |
492 | wcs[len] = 0; | |
493 | ||
494 | return (wcs); | |
9385eb3d A |
495 | } |
496 | ||
ad3c9f2a A |
497 | __private_extern__ wchar_t * |
498 | __collate_wcsdup(const wchar_t *s) | |
499 | { | |
500 | size_t len = wcslen(s) + 1; | |
501 | wchar_t *wcs; | |
502 | ||
503 | if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL) | |
504 | __collate_err(EX_OSERR, __func__); | |
505 | wcscpy(wcs, s); | |
506 | return (wcs); | |
507 | } | |
508 | ||
509 | __private_extern__ void | |
510 | __collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc) | |
511 | { | |
512 | int pri, len; | |
513 | size_t slen; | |
514 | const wchar_t *t; | |
515 | wchar_t *tt = NULL, *tr = NULL; | |
516 | int direc, pass; | |
517 | wchar_t *xfp; | |
518 | struct __collate_st_info *info = __collate_info; | |
519 | int sverrno; | |
520 | ||
521 | for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++) | |
522 | xf[pass] = NULL; | |
523 | for(pass = 0; pass < info->directive_count; pass++) { | |
524 | direc = info->directive[pass]; | |
525 | if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) { | |
526 | sverrno = errno; | |
527 | free(tt); | |
528 | errno = sverrno; | |
529 | tt = __collate_substitute(src, pass, loc); | |
530 | } | |
531 | if (direc & DIRECTIVE_BACKWARD) { | |
532 | wchar_t *bp, *fp, c; | |
533 | sverrno = errno; | |
534 | free(tr); | |
535 | errno = sverrno; | |
536 | tr = __collate_wcsdup(tt ? tt : src); | |
537 | bp = tr; | |
538 | fp = tr + wcslen(tr) - 1; | |
539 | while(bp < fp) { | |
540 | c = *bp; | |
541 | *bp++ = *fp; | |
542 | *fp-- = c; | |
543 | } | |
544 | t = (const wchar_t *)tr; | |
545 | } else if (tt) | |
546 | t = (const wchar_t *)tt; | |
547 | else | |
548 | t = (const wchar_t *)src; | |
549 | sverrno = errno; | |
550 | if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) { | |
551 | errno = sverrno; | |
552 | slen = 0; | |
553 | goto end; | |
554 | } | |
555 | errno = sverrno; | |
556 | xfp = xf[pass]; | |
557 | if (direc & DIRECTIVE_POSITION) { | |
558 | while(*t) { | |
559 | __collate_lookup_which(t, &len, &pri, pass, loc); | |
560 | t += len; | |
561 | if (pri <= 0) { | |
562 | if (pri < 0) { | |
563 | errno = EINVAL; | |
564 | slen = 0; | |
565 | goto end; | |
566 | } | |
567 | pri = COLLATE_MAX_PRIORITY; | |
568 | } | |
569 | *xfp++ = pri; | |
570 | } | |
571 | } else { | |
572 | while(*t) { | |
573 | __collate_lookup_which(t, &len, &pri, pass, loc); | |
574 | t += len; | |
575 | if (pri <= 0) { | |
576 | if (pri < 0) { | |
577 | errno = EINVAL; | |
578 | slen = 0; | |
579 | goto end; | |
580 | } | |
581 | continue; | |
582 | } | |
583 | *xfp++ = pri; | |
584 | } | |
585 | } | |
586 | *xfp = 0; | |
587 | } | |
588 | end: | |
589 | sverrno = errno; | |
590 | free(tt); | |
591 | free(tr); | |
592 | errno = sverrno; | |
593 | } | |
594 | ||
595 | __private_extern__ void | |
9385eb3d A |
596 | __collate_err(int ex, const char *f) |
597 | { | |
598 | const char *s; | |
599 | int serrno = errno; | |
600 | ||
601 | s = _getprogname(); | |
602 | _write(STDERR_FILENO, s, strlen(s)); | |
603 | _write(STDERR_FILENO, ": ", 2); | |
604 | s = f; | |
605 | _write(STDERR_FILENO, s, strlen(s)); | |
606 | _write(STDERR_FILENO, ": ", 2); | |
607 | s = strerror(serrno); | |
608 | _write(STDERR_FILENO, s, strlen(s)); | |
609 | _write(STDERR_FILENO, "\n", 1); | |
610 | exit(ex); | |
611 | } | |
612 | ||
ad3c9f2a A |
613 | /* |
614 | * __collate_collating_symbol takes the multibyte string specified by | |
615 | * src and slen, and using ps, converts that to a wide character. Then | |
616 | * it is checked to verify it is a collating symbol, and then copies | |
617 | * it to the wide character string specified by dst and dlen (the | |
618 | * results are not null terminated). The length of the wide characters | |
619 | * copied to dst is returned if successful. Zero is returned if no such | |
620 | * collating symbol exists. (size_t)-1 is returned if there are wide-character | |
621 | * conversion errors, if the length of the converted string is greater that | |
622 | * STR_LEN or if dlen is too small. It is up to the calling routine to | |
623 | * preserve the mbstate_t structure as needed. | |
624 | */ | |
625 | __private_extern__ size_t | |
626 | __collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc) | |
627 | { | |
628 | wchar_t wname[STR_LEN]; | |
629 | wchar_t w, *wp; | |
630 | size_t len, l; | |
631 | ||
632 | /* POSIX locale */ | |
633 | if (loc->__collate_load_error) { | |
634 | if (dlen < 1) | |
635 | return (size_t)-1; | |
636 | if (slen != 1 || !isascii(*src)) | |
637 | return 0; | |
638 | *dst = *src; | |
639 | return 1; | |
640 | } | |
641 | for(wp = wname, len = 0; slen > 0; len++) { | |
642 | l = mbrtowc_l(&w, src, slen, ps, loc); | |
643 | if (l == (size_t)-1 || l == (size_t)-2) | |
644 | return (size_t)-1; | |
645 | if (l == 0) | |
646 | break; | |
647 | if (len >= STR_LEN) | |
648 | return -1; | |
649 | *wp++ = w; | |
650 | src += l; | |
651 | slen = (long)slen - (long)l; | |
652 | } | |
653 | if (len == 0 || len > dlen) | |
654 | return (size_t)-1; | |
655 | if (len == 1) { | |
656 | if (*wname <= UCHAR_MAX) { | |
657 | if (__collate_char_pri_table[*wname].pri[0] >= 0) { | |
658 | if (dlen > 0) | |
659 | *dst = *wname; | |
660 | return 1; | |
661 | } | |
662 | return 0; | |
663 | } else if (__collate_info->large_pri_count > 0) { | |
664 | struct __collate_st_large_char_pri *match; | |
665 | match = largesearch(*wname, loc); | |
666 | if (match && match->pri.pri[0] >= 0) { | |
667 | if (dlen > 0) | |
668 | *dst = *wname; | |
669 | return 1; | |
670 | } | |
671 | } | |
672 | return 0; | |
673 | } | |
674 | *wp = 0; | |
675 | if (__collate_info->chain_count > 0) { | |
676 | struct __collate_st_chain_pri *match; | |
677 | int ll; | |
678 | match = chainsearch(wname, &ll, loc); | |
679 | if (match) { | |
680 | if (ll < dlen) | |
681 | dlen = ll; | |
682 | wcsncpy(dst, wname, dlen); | |
683 | return ll; | |
684 | } | |
685 | } | |
686 | return 0; | |
687 | } | |
688 | ||
689 | /* | |
690 | * __collate_equiv_class returns the equivalence class number for the symbol | |
691 | * specified by src and slen, using ps to convert from multi-byte to wide | |
692 | * character. Zero is returned if the symbol is not in an equivalence | |
693 | * class. -1 is returned if there are wide character conversion error, | |
694 | * if there are any greater-than-8-bit characters or if a multi-byte symbol | |
695 | * is greater or equal to STR_LEN in length. It is up to the calling | |
696 | * routine to preserve the mbstate_t structure as needed. | |
697 | */ | |
698 | __private_extern__ int | |
699 | __collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc) | |
700 | { | |
701 | wchar_t wname[STR_LEN]; | |
702 | wchar_t w, *wp; | |
703 | size_t len, l; | |
704 | int e; | |
705 | ||
706 | /* POSIX locale */ | |
707 | if (loc->__collate_load_error) | |
708 | return 0; | |
709 | for(wp = wname, len = 0; slen > 0; len++) { | |
710 | l = mbrtowc_l(&w, src, slen, ps, loc); | |
711 | if (l == (size_t)-1 || l == (size_t)-2) | |
712 | return -1; | |
713 | if (l == 0) | |
714 | break; | |
715 | if (len >= STR_LEN) | |
716 | return -1; | |
717 | *wp++ = w; | |
718 | src += l; | |
719 | slen = (long)slen - (long)l; | |
720 | } | |
721 | if (len == 0) | |
722 | return -1; | |
723 | if (len == 1) { | |
724 | e = -1; | |
725 | if (*wname <= UCHAR_MAX) | |
726 | e = __collate_char_pri_table[*wname].pri[0]; | |
727 | else if (__collate_info->large_pri_count > 0) { | |
728 | struct __collate_st_large_char_pri *match; | |
729 | match = largesearch(*wname, loc); | |
730 | if (match) | |
731 | e = match->pri.pri[0]; | |
732 | } | |
733 | if (e == 0) | |
734 | return IGNORE_EQUIV_CLASS; | |
735 | return e > 0 ? e : 0; | |
736 | } | |
737 | *wp = 0; | |
738 | if (__collate_info->chain_count > 0) { | |
739 | struct __collate_st_chain_pri *match; | |
740 | int ll; | |
741 | match = chainsearch(wname, &ll, loc); | |
742 | if (match) { | |
743 | e = match->pri[0]; | |
744 | if (e == 0) | |
745 | return IGNORE_EQUIV_CLASS; | |
746 | return e < 0 ? -e : e; | |
747 | } | |
748 | } | |
749 | return 0; | |
750 | } | |
751 | ||
752 | /* | |
753 | * __collate_equiv_match tries to match any single or multi-character symbol | |
754 | * in equivalence class equiv_class in the multi-byte string specified by src | |
755 | * and slen. If start is non-zero, it is taken to be the first (pre-converted) | |
756 | * wide character. Subsequence wide characters, if needed, will use ps in | |
757 | * the conversion. On a successful match, the length of the matched string | |
758 | * is returned (including the start character). If dst is non-NULL, the | |
759 | * matched wide-character string is copied to dst, a wide character array of | |
760 | * length dlen (the results are not zero-terminated). If rlen is non-NULL, | |
761 | * the number of character in src actually used is returned. Zero is | |
762 | * returned by __collate_equiv_match if there is no match. (size_t)-1 is | |
763 | * returned on error: if there were conversion errors or if dlen is too small | |
764 | * to accept the results. On no match or error, ps is restored to its incoming | |
765 | * state. | |
766 | */ | |
767 | size_t | |
768 | __collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc) | |
769 | { | |
770 | wchar_t w; | |
771 | size_t len, l, clen; | |
772 | int i; | |
773 | wchar_t buf[STR_LEN], *wp; | |
774 | mbstate_t save; | |
775 | const char *s = src; | |
776 | size_t sl = slen; | |
777 | struct __collate_st_chain_pri *ch = NULL; | |
778 | ||
779 | /* POSIX locale */ | |
780 | if (loc->__collate_load_error) | |
781 | return (size_t)-1; | |
782 | if (equiv_class == IGNORE_EQUIV_CLASS) | |
783 | equiv_class = 0; | |
784 | if (ps) | |
785 | save = *ps; | |
786 | wp = buf; | |
787 | len = clen = 0; | |
788 | if (start) { | |
789 | *wp++ = start; | |
790 | len = 1; | |
791 | } | |
792 | /* convert up to the max chain length */ | |
793 | while(sl > 0 && len < __collate_info->chain_max_len) { | |
794 | l = mbrtowc_l(&w, s, sl, ps, loc); | |
795 | if (l == (size_t)-1 || l == (size_t)-2 || l == 0) | |
796 | break; | |
797 | *wp++ = w; | |
798 | s += l; | |
799 | clen += l; | |
800 | sl -= l; | |
801 | len++; | |
802 | } | |
803 | *wp = 0; | |
804 | if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) { | |
805 | int e = ch->pri[0]; | |
806 | if (e < 0) | |
807 | e = -e; | |
808 | if (e == equiv_class) | |
809 | goto found; | |
810 | } | |
811 | /* try single character */ | |
812 | i = 1; | |
813 | if (*buf <= UCHAR_MAX) { | |
814 | if (equiv_class == __collate_char_pri_table[*buf].pri[0]) | |
815 | goto found; | |
816 | } else if (__collate_info->large_pri_count > 0) { | |
817 | struct __collate_st_large_char_pri *match; | |
818 | match = largesearch(*buf, loc); | |
819 | if (match && equiv_class == match->pri.pri[0]) | |
820 | goto found; | |
821 | } | |
822 | /* no match */ | |
823 | if (ps) | |
824 | *ps = save; | |
825 | return 0; | |
826 | found: | |
827 | /* if we converted more than we used, restore to initial and reconvert | |
828 | * up to what did match */ | |
829 | if (i < len) { | |
830 | len = i; | |
831 | if (ps) | |
832 | *ps = save; | |
833 | if (start) | |
834 | i--; | |
835 | clen = 0; | |
836 | while(i-- > 0) { | |
837 | l = mbrtowc_l(&w, src, slen, ps, loc); | |
838 | src += l; | |
839 | clen += l; | |
840 | slen -= l; | |
841 | } | |
842 | } | |
843 | if (dst) { | |
844 | if (dlen < len) { | |
845 | if (ps) | |
846 | *ps = save; | |
847 | return (size_t)-1; | |
848 | } | |
849 | for(wp = buf; len > 0; len--) | |
850 | *dst++ = *wp++; | |
851 | } | |
852 | if (rlen) | |
853 | *rlen = clen; | |
854 | return len; | |
855 | } | |
856 | ||
857 | /* | |
858 | * __collate_equiv_value returns the primary collation value for the given | |
859 | * collating symbol specified by str and len. Zero or negative is return | |
860 | * if the collating symbol was not found. (Use by the bracket code in TRE.) | |
861 | */ | |
862 | __private_extern__ int | |
863 | __collate_equiv_value(locale_t loc, const wchar_t *str, size_t len) | |
864 | { | |
865 | int e; | |
866 | ||
867 | if (len < 1 || len >= STR_LEN) | |
868 | return -1; | |
869 | ||
870 | /* POSIX locale */ | |
871 | if (loc->__collate_load_error) | |
872 | return (len == 1 && *str <= UCHAR_MAX) ? *str : -1; | |
873 | ||
874 | if (len == 1) { | |
875 | e = -1; | |
876 | if (*str <= UCHAR_MAX) | |
877 | e = __collate_char_pri_table[*str].pri[0]; | |
878 | else if (__collate_info->large_pri_count > 0) { | |
879 | struct __collate_st_large_char_pri *match; | |
880 | match = largesearch(*str, loc); | |
881 | if (match) | |
882 | e = match->pri.pri[0]; | |
883 | } | |
884 | if (e == 0) | |
885 | return IGNORE_EQUIV_CLASS; | |
886 | return e > 0 ? e : 0; | |
887 | } | |
888 | if (__collate_info->chain_count > 0) { | |
889 | wchar_t name[STR_LEN]; | |
890 | struct __collate_st_chain_pri *match; | |
891 | int ll; | |
892 | ||
893 | wcsncpy(name, str, len); | |
894 | name[len] = 0; | |
895 | match = chainsearch(name, &ll, loc); | |
896 | if (match) { | |
897 | e = match->pri[0]; | |
898 | if (e == 0) | |
899 | return IGNORE_EQUIV_CLASS; | |
900 | return e < 0 ? -e : e; | |
901 | } | |
902 | } | |
903 | return 0; | |
904 | } | |
905 | ||
906 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN | |
907 | static void | |
908 | wntohl(wchar_t *str, int len) | |
909 | { | |
910 | for(; *str && len > 0; str++, len--) | |
911 | *str = ntohl(*str); | |
912 | } | |
913 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
914 | ||
9385eb3d | 915 | #ifdef COLLATE_DEBUG |
ad3c9f2a A |
916 | static char * |
917 | show(int c) | |
918 | { | |
919 | static char buf[5]; | |
920 | ||
921 | if (c >=32 && c <= 126) | |
922 | sprintf(buf, "'%c' ", c); | |
923 | else | |
924 | sprintf(buf, "\\x{%02x}", c); | |
925 | return buf; | |
926 | } | |
927 | ||
928 | static char * | |
929 | showwcs(const wchar_t *t, int len) | |
930 | { | |
931 | static char buf[64]; | |
932 | char *cp = buf; | |
933 | ||
934 | for(; *t && len > 0; len--, t++) { | |
935 | if (*t >=32 && *t <= 126) | |
936 | *cp++ = *t; | |
937 | else { | |
938 | sprintf(cp, "\\x{%02x}", *t); | |
939 | cp += strlen(cp); | |
940 | } | |
941 | } | |
942 | *cp = 0; | |
943 | return buf; | |
944 | } | |
945 | ||
9385eb3d A |
946 | void |
947 | __collate_print_tables() | |
948 | { | |
ad3c9f2a A |
949 | int i, z; |
950 | locale_t loc = __current_locale(); | |
9385eb3d | 951 | |
ad3c9f2a A |
952 | printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n", |
953 | __collate_info->directive[0], __collate_info->directive[1], | |
954 | __collate_info->flags, __collate_info->chain_max_len, | |
955 | __collate_info->directive_count, | |
956 | __collate_info->undef_pri[0], __collate_info->undef_pri[1], | |
957 | __collate_info->subst_count[0], __collate_info->subst_count[1], | |
958 | __collate_info->chain_count, __collate_info->large_pri_count); | |
959 | for(z = 0; z < __collate_info->directive_count; z++) { | |
960 | if (__collate_info->subst_count[z] > 0) { | |
961 | struct __collate_st_subst *p2 = __collate_substitute_table[z]; | |
962 | if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP)) | |
963 | printf("Both substitute tables:\n"); | |
964 | else | |
965 | printf("Substitute table %d:\n", z); | |
966 | for (i = __collate_info->subst_count[z]; i-- > 0; p2++) | |
967 | printf("\t%s --> \"%s\"\n", | |
968 | show(p2->val), | |
969 | showwcs(p2->str, STR_LEN)); | |
970 | } | |
971 | } | |
972 | if (__collate_info->chain_count > 0) { | |
973 | printf("Chain priority table:\n"); | |
974 | struct __collate_st_chain_pri *p2 = __collate_chain_pri_table; | |
975 | for (i = __collate_info->chain_count; i-- > 0; p2++) { | |
976 | printf("\t\"%s\" :", showwcs(p2->str, STR_LEN)); | |
977 | for(z = 0; z < __collate_info->directive_count; z++) | |
978 | printf(" %d", p2->pri[z]); | |
979 | putchar('\n'); | |
980 | } | |
981 | } | |
9385eb3d | 982 | printf("Char priority table:\n"); |
ad3c9f2a A |
983 | { |
984 | struct __collate_st_char_pri *p2 = __collate_char_pri_table; | |
985 | for (i = 0; i < UCHAR_MAX + 1; i++, p2++) { | |
986 | printf("\t%s :", show(i)); | |
987 | for(z = 0; z < __collate_info->directive_count; z++) | |
988 | printf(" %d", p2->pri[z]); | |
989 | putchar('\n'); | |
990 | } | |
991 | } | |
992 | if (__collate_info->large_pri_count > 0) { | |
993 | struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table; | |
994 | printf("Large priority table:\n"); | |
995 | for (i = __collate_info->large_pri_count; i-- > 0; p2++) { | |
996 | printf("\t%s :", show(p2->val)); | |
997 | for(z = 0; z < __collate_info->directive_count; z++) | |
998 | printf(" %d", p2->pri.pri[z]); | |
999 | putchar('\n'); | |
1000 | } | |
1001 | } | |
9385eb3d A |
1002 | } |
1003 | #endif |