]>
Commit | Line | Data |
---|---|---|
9385eb3d A |
1 | /*- |
2 | * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> | |
3 | * at Electronni Visti IA, Kiev, Ukraine. | |
4 | * All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * 1. Redistributions of source code must retain the above copyright | |
10 | * notice, this list of conditions and the following disclaimer. | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in the | |
13 | * documentation and/or other materials provided with the distribution. | |
14 | * | |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND | |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE | |
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
25 | * SUCH DAMAGE. | |
26 | */ | |
27 | ||
28 | #include <sys/cdefs.h> | |
1f2f436a | 29 | __FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $"); |
9385eb3d | 30 | |
ad3c9f2a A |
31 | #include "xlocale_private.h" |
32 | /* assumes the locale_t variable is named loc */ | |
33 | #define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table) | |
34 | #define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table) | |
35 | #define __collate_char_pri_table (loc->__lc_collate->__char_pri_table) | |
36 | #define __collate_info (&loc->__lc_collate->__info) | |
37 | #define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table) | |
38 | #define __collate_substitute_table (loc->__lc_collate->__substitute_table) | |
39 | ||
9385eb3d A |
40 | #include "namespace.h" |
41 | #include <arpa/inet.h> | |
42 | #include <stdio.h> | |
43 | #include <stdlib.h> | |
ad3c9f2a | 44 | #include <stddef.h> |
9385eb3d | 45 | #include <string.h> |
ad3c9f2a | 46 | #include <wchar.h> |
9385eb3d A |
47 | #include <errno.h> |
48 | #include <unistd.h> | |
49 | #include <sysexits.h> | |
ad3c9f2a | 50 | #include <ctype.h> |
9385eb3d A |
51 | #include "un-namespace.h" |
52 | ||
53 | #include "collate.h" | |
54 | #include "setlocale.h" | |
55 | #include "ldpart.h" | |
56 | ||
57 | #include "libc_private.h" | |
58 | ||
ad3c9f2a A |
59 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN |
60 | static void wntohl(wchar_t *, int); | |
61 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
9385eb3d A |
62 | void __collate_err(int ex, const char *f) __dead2; |
63 | ||
ad3c9f2a A |
64 | /* |
65 | * Normally, the __collate_* routines should all be __private_extern__, | |
66 | * but grep is using them (3715846). Until we can provide an alternative, | |
67 | * we leave them public, and provide a read-only __collate_load_error variable | |
68 | */ | |
69 | #undef __collate_load_error | |
70 | int __collate_load_error = 1; | |
71 | ||
72 | __private_extern__ int | |
73 | __collate_load_tables(const char *encoding, locale_t loc) | |
9385eb3d A |
74 | { |
75 | FILE *fp; | |
ad3c9f2a | 76 | int i, saverr, chains, z; |
9385eb3d | 77 | char strbuf[STR_LEN], buf[PATH_MAX]; |
ad3c9f2a A |
78 | struct __xlocale_st_collate *TMP; |
79 | static struct __xlocale_st_collate *cache = NULL; | |
80 | struct __collate_st_info info; | |
81 | void *vp; | |
9385eb3d A |
82 | |
83 | /* 'encoding' must be already checked. */ | |
84 | if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { | |
ad3c9f2a A |
85 | loc->__collate_load_error = 1; |
86 | if (loc == &__global_locale) | |
87 | __collate_load_error = 1; | |
88 | XL_RELEASE(loc->__lc_collate); | |
89 | loc->__lc_collate = NULL; | |
9385eb3d A |
90 | return (_LDP_CACHE); |
91 | } | |
92 | ||
93 | /* | |
94 | * If the locale name is the same as our cache, use the cache. | |
95 | */ | |
ad3c9f2a A |
96 | if (cache && strcmp(encoding, cache->__encoding) == 0) { |
97 | loc->__collate_load_error = 0; | |
98 | if (loc == &__global_locale) | |
99 | __collate_load_error = 0; | |
100 | XL_RELEASE(loc->__lc_collate); | |
101 | loc->__lc_collate = cache; | |
102 | XL_RETAIN(loc->__lc_collate); | |
9385eb3d A |
103 | return (_LDP_CACHE); |
104 | } | |
105 | ||
106 | /* | |
107 | * Slurp the locale file into the cache. | |
108 | */ | |
109 | ||
110 | /* 'PathLocale' must be already set & checked. */ | |
111 | /* Range checking not needed, encoding has fixed size */ | |
974e3884 | 112 | (void)strcpy(buf, encoding); |
9385eb3d | 113 | (void)strcat(buf, "/LC_COLLATE"); |
974e3884 | 114 | if ((fp = fdopen(__open_path_locale(buf), "r")) == NULL) { |
9385eb3d | 115 | return (_LDP_ERROR); |
974e3884 | 116 | } |
9385eb3d A |
117 | |
118 | if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) { | |
119 | saverr = errno; | |
120 | (void)fclose(fp); | |
121 | errno = saverr; | |
122 | return (_LDP_ERROR); | |
123 | } | |
124 | chains = -1; | |
ad3c9f2a | 125 | if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0) |
9385eb3d A |
126 | chains = 1; |
127 | if (chains < 0) { | |
128 | (void)fclose(fp); | |
129 | errno = EFTYPE; | |
130 | return (_LDP_ERROR); | |
131 | } | |
132 | if (chains) { | |
ad3c9f2a | 133 | if (fread(&info, sizeof(info), 1, fp) != 1) { |
9385eb3d A |
134 | saverr = errno; |
135 | (void)fclose(fp); | |
136 | errno = saverr; | |
137 | return (_LDP_ERROR); | |
138 | } | |
ad3c9f2a A |
139 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN |
140 | for(z = 0; z < info.directive_count; z++) { | |
141 | info.undef_pri[z] = ntohl(info.undef_pri[z]); | |
142 | info.subst_count[z] = ntohl(info.subst_count[z]); | |
143 | } | |
144 | info.chain_count = ntohl(info.chain_count); | |
145 | info.large_pri_count = ntohl(info.large_pri_count); | |
146 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
147 | if ((chains = info.chain_count) < 0) { | |
9385eb3d A |
148 | (void)fclose(fp); |
149 | errno = EFTYPE; | |
150 | return (_LDP_ERROR); | |
151 | } | |
152 | } else | |
153 | chains = TABLE_SIZE; | |
154 | ||
ad3c9f2a A |
155 | i = sizeof(struct __xlocale_st_collate) |
156 | + sizeof(struct __collate_st_chain_pri) * chains | |
157 | + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count; | |
158 | for(z = 0; z < info.directive_count; z++) | |
159 | i += sizeof(struct __collate_st_subst) * info.subst_count[z]; | |
160 | if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) { | |
9385eb3d A |
161 | saverr = errno; |
162 | (void)fclose(fp); | |
163 | errno = saverr; | |
164 | return (_LDP_ERROR); | |
165 | } | |
ad3c9f2a A |
166 | TMP->__refcount = 2; /* one for the locale, one for the cache */ |
167 | TMP->__free_extra = NULL; | |
9385eb3d A |
168 | |
169 | #define FREAD(a, b, c, d) \ | |
170 | { \ | |
171 | if (fread(a, b, c, d) != c) { \ | |
172 | saverr = errno; \ | |
ad3c9f2a | 173 | free(TMP); \ |
9385eb3d A |
174 | (void)fclose(d); \ |
175 | errno = saverr; \ | |
176 | return (_LDP_ERROR); \ | |
177 | } \ | |
178 | } | |
179 | ||
ad3c9f2a A |
180 | /* adjust size to read the remaining in one chunk */ |
181 | i -= offsetof(struct __xlocale_st_collate, __char_pri_table); | |
182 | FREAD(TMP->__char_pri_table, i, 1, fp); | |
9385eb3d A |
183 | (void)fclose(fp); |
184 | ||
ad3c9f2a A |
185 | vp = (void *)(TMP + 1); |
186 | ||
187 | /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */ | |
188 | if (info.subst_count[0] > 0) { | |
189 | TMP->__substitute_table[0] = (struct __collate_st_subst *)vp; | |
190 | vp += info.subst_count[0] * sizeof(struct __collate_st_subst); | |
191 | } else | |
192 | TMP->__substitute_table[0] = NULL; | |
193 | if (info.flags & COLLATE_SUBST_DUP) | |
194 | TMP->__substitute_table[1] = TMP->__substitute_table[0]; | |
195 | else if (info.subst_count[1] > 0) { | |
196 | TMP->__substitute_table[1] = (struct __collate_st_subst *)vp; | |
197 | vp += info.subst_count[1] * sizeof(struct __collate_st_subst); | |
198 | } else | |
199 | TMP->__substitute_table[1] = NULL; | |
200 | ||
201 | if (chains > 0) { | |
202 | TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp; | |
203 | vp += chains * sizeof(struct __collate_st_chain_pri); | |
204 | } else | |
205 | TMP->__chain_pri_table = NULL; | |
206 | if (info.large_pri_count > 0) | |
207 | TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp; | |
208 | else | |
209 | TMP->__large_char_pri_table = NULL; | |
210 | ||
211 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN | |
212 | { | |
213 | struct __collate_st_char_pri *p = TMP->__char_pri_table; | |
214 | for(i = UCHAR_MAX + 1; i-- > 0; p++) { | |
215 | for(z = 0; z < info.directive_count; z++) | |
216 | p->pri[z] = ntohl(p->pri[z]); | |
217 | } | |
218 | } | |
219 | for(z = 0; z < info.directive_count; z++) | |
220 | if (info.subst_count[z] > 0) { | |
221 | struct __collate_st_subst *p = TMP->__substitute_table[z]; | |
222 | for(i = info.subst_count[z]; i-- > 0; p++) { | |
223 | p->val = ntohl(p->val); | |
224 | wntohl(p->str, STR_LEN); | |
225 | } | |
226 | } | |
227 | { | |
228 | struct __collate_st_chain_pri *p = TMP->__chain_pri_table; | |
229 | for(i = chains; i-- > 0; p++) { | |
230 | wntohl(p->str, STR_LEN); | |
231 | for(z = 0; z < info.directive_count; z++) | |
232 | p->pri[z] = ntohl(p->pri[z]); | |
233 | } | |
234 | } | |
235 | if (info.large_pri_count > 0) { | |
236 | struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table; | |
237 | for(i = info.large_pri_count; i-- > 0; p++) { | |
238 | p->val = ntohl(p->val); | |
239 | for(z = 0; z < info.directive_count; z++) | |
240 | p->pri.pri[z] = ntohl(p->pri.pri[z]); | |
9385eb3d A |
241 | } |
242 | } | |
ad3c9f2a A |
243 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ |
244 | (void)strcpy(TMP->__encoding, encoding); | |
245 | (void)memcpy(&TMP->__info, &info, sizeof(info)); | |
246 | XL_RELEASE(cache); | |
247 | cache = TMP; | |
248 | XL_RELEASE(loc->__lc_collate); | |
249 | loc->__lc_collate = cache; | |
250 | /* no need to retain, since we set __refcount to 2 above */ | |
251 | ||
252 | loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0); | |
253 | loc->__collate_load_error = 0; | |
254 | if (loc == &__global_locale) | |
255 | __collate_load_error = 0; | |
9385eb3d A |
256 | |
257 | return (_LDP_LOADED); | |
258 | } | |
259 | ||
ad3c9f2a A |
260 | static int |
261 | __collate_wcsnlen(const wchar_t *s, int len) | |
262 | { | |
263 | int n = 0; | |
264 | while (*s && n < len) { | |
265 | s++; | |
266 | n++; | |
267 | } | |
268 | return n; | |
269 | } | |
270 | ||
271 | static struct __collate_st_subst * | |
272 | substsearch(const wchar_t key, struct __collate_st_subst *tab, int n) | |
273 | { | |
274 | int low = 0; | |
275 | int high = n - 1; | |
276 | int next, compar; | |
277 | struct __collate_st_subst *p; | |
278 | ||
279 | while (low <= high) { | |
280 | next = (low + high) / 2; | |
281 | p = tab + next; | |
282 | compar = key - p->val; | |
283 | if (compar == 0) | |
284 | return p; | |
285 | if (compar > 0) | |
286 | low = next + 1; | |
287 | else | |
288 | high = next - 1; | |
289 | } | |
290 | return NULL; | |
291 | } | |
292 | ||
293 | __private_extern__ wchar_t * | |
294 | __collate_substitute(const wchar_t *s, int which, locale_t loc) | |
9385eb3d A |
295 | { |
296 | int dest_len, len, nlen; | |
ad3c9f2a A |
297 | int n, delta, nsubst; |
298 | wchar_t *dest_str = NULL; | |
299 | const wchar_t *fp; | |
300 | struct __collate_st_subst *subst, *match; | |
9385eb3d A |
301 | |
302 | if (s == NULL || *s == '\0') | |
ad3c9f2a A |
303 | return (__collate_wcsdup(L"")); |
304 | dest_len = wcslen(s); | |
305 | nsubst = __collate_info->subst_count[which]; | |
306 | if (nsubst <= 0) | |
307 | return __collate_wcsdup(s); | |
308 | subst = __collate_substitute_table[which]; | |
309 | delta = dest_len / 4; | |
310 | if (delta < 2) | |
311 | delta = 2; | |
312 | dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t)); | |
9385eb3d | 313 | if (dest_str == NULL) |
3d9156a7 | 314 | __collate_err(EX_OSERR, __func__); |
9385eb3d A |
315 | len = 0; |
316 | while (*s) { | |
ad3c9f2a A |
317 | if ((match = substsearch(*s, subst, nsubst)) != NULL) { |
318 | fp = match->str; | |
319 | n = __collate_wcsnlen(fp, STR_LEN); | |
320 | } else { | |
321 | fp = s; | |
322 | n = 1; | |
323 | } | |
324 | nlen = len + n; | |
9385eb3d | 325 | if (dest_len <= nlen) { |
ad3c9f2a | 326 | dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t)); |
9385eb3d | 327 | if (dest_str == NULL) |
3d9156a7 | 328 | __collate_err(EX_OSERR, __func__); |
9385eb3d | 329 | } |
ad3c9f2a A |
330 | wcsncpy(dest_str + len, fp, n); |
331 | len += n; | |
332 | s++; | |
9385eb3d | 333 | } |
ad3c9f2a | 334 | dest_str[len] = 0; |
9385eb3d A |
335 | return (dest_str); |
336 | } | |
337 | ||
ad3c9f2a A |
338 | static struct __collate_st_chain_pri * |
339 | chainsearch(const wchar_t *key, int *len, locale_t loc) | |
340 | { | |
341 | int low = 0; | |
342 | int high = __collate_info->chain_count - 1; | |
343 | int next, compar, l; | |
344 | struct __collate_st_chain_pri *p; | |
345 | struct __collate_st_chain_pri *tab = __collate_chain_pri_table; | |
346 | ||
347 | while (low <= high) { | |
348 | next = (low + high) / 2; | |
349 | p = tab + next; | |
350 | compar = *key - *p->str; | |
351 | if (compar == 0) { | |
352 | l = __collate_wcsnlen(p->str, STR_LEN); | |
353 | compar = wcsncmp(key, p->str, l); | |
354 | if (compar == 0) { | |
355 | *len = l; | |
356 | return p; | |
357 | } | |
358 | } | |
359 | if (compar > 0) | |
360 | low = next + 1; | |
361 | else | |
362 | high = next - 1; | |
363 | } | |
364 | return NULL; | |
365 | } | |
366 | ||
367 | static struct __collate_st_large_char_pri * | |
368 | largesearch(const wchar_t key, locale_t loc) | |
369 | { | |
370 | int low = 0; | |
371 | int high = __collate_info->large_pri_count - 1; | |
372 | int next, compar; | |
373 | struct __collate_st_large_char_pri *p; | |
374 | struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table; | |
375 | ||
376 | while (low <= high) { | |
377 | next = (low + high) / 2; | |
378 | p = tab + next; | |
379 | compar = key - p->val; | |
380 | if (compar == 0) | |
381 | return p; | |
382 | if (compar > 0) | |
383 | low = next + 1; | |
384 | else | |
385 | high = next - 1; | |
386 | } | |
387 | return NULL; | |
388 | } | |
389 | ||
390 | __private_extern__ void | |
391 | __collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc) | |
9385eb3d A |
392 | { |
393 | struct __collate_st_chain_pri *p2; | |
ad3c9f2a | 394 | int l; |
9385eb3d A |
395 | |
396 | *len = 1; | |
397 | *prim = *sec = 0; | |
ad3c9f2a A |
398 | p2 = chainsearch(t, &l, loc); |
399 | /* use the chain if prim >= 0 */ | |
400 | if (p2 && p2->pri[0] >= 0) { | |
401 | *len = l; | |
402 | *prim = p2->pri[0]; | |
403 | *sec = p2->pri[1]; | |
404 | return; | |
405 | } | |
406 | if (*t <= UCHAR_MAX) { | |
407 | *prim = __collate_char_pri_table[*t].pri[0]; | |
408 | *sec = __collate_char_pri_table[*t].pri[1]; | |
409 | return; | |
410 | } | |
411 | if (__collate_info->large_pri_count > 0) { | |
412 | struct __collate_st_large_char_pri *match; | |
413 | match = largesearch(*t, loc); | |
414 | if (match) { | |
415 | *prim = match->pri.pri[0]; | |
416 | *sec = match->pri.pri[1]; | |
417 | return; | |
418 | } | |
419 | } | |
420 | *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l; | |
421 | *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l; | |
422 | } | |
423 | ||
424 | /* | |
425 | * This is only provided for programs (like grep) that are calling this | |
426 | * private function. This will go away eventually. | |
427 | */ | |
428 | void | |
429 | __collate_lookup(const unsigned char *t, int *len, int *prim, int *sec) | |
430 | { | |
431 | locale_t loc = __current_locale(); | |
432 | wchar_t *w = __collate_mbstowcs((const char *)t, loc); | |
433 | int sverrno; | |
434 | ||
435 | __collate_lookup_l(w, len, prim, sec, loc); | |
436 | sverrno = errno; | |
437 | free(w); | |
438 | errno = sverrno; | |
439 | } | |
440 | ||
441 | __private_extern__ void | |
442 | __collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc) | |
443 | { | |
444 | struct __collate_st_chain_pri *p2; | |
445 | int p, l; | |
446 | ||
447 | *len = 1; | |
448 | *pri = 0; | |
449 | p2 = chainsearch(t, &l, loc); | |
450 | if (p2) { | |
451 | p = p2->pri[which]; | |
452 | /* use the chain if pri >= 0 */ | |
453 | if (p >= 0) { | |
454 | *len = l; | |
455 | *pri = p; | |
456 | return; | |
457 | } | |
458 | } | |
459 | if (*t <= UCHAR_MAX) { | |
460 | *pri = __collate_char_pri_table[*t].pri[which]; | |
461 | return; | |
462 | } | |
463 | if (__collate_info->large_pri_count > 0) { | |
464 | struct __collate_st_large_char_pri *match; | |
465 | match = largesearch(*t, loc); | |
466 | if (match) { | |
467 | *pri = match->pri.pri[which]; | |
9385eb3d A |
468 | return; |
469 | } | |
470 | } | |
ad3c9f2a | 471 | *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l; |
9385eb3d A |
472 | } |
473 | ||
ad3c9f2a A |
474 | __private_extern__ wchar_t * |
475 | __collate_mbstowcs(const char *s, locale_t loc) | |
9385eb3d | 476 | { |
ad3c9f2a A |
477 | static const mbstate_t initial; |
478 | mbstate_t st; | |
479 | size_t len; | |
480 | const char *ss; | |
481 | wchar_t *wcs; | |
9385eb3d | 482 | |
ad3c9f2a A |
483 | ss = s; |
484 | st = initial; | |
485 | if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1) | |
486 | return NULL; | |
487 | if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL) | |
3d9156a7 | 488 | __collate_err(EX_OSERR, __func__); |
ad3c9f2a A |
489 | st = initial; |
490 | mbsrtowcs_l(wcs, &s, len, &st, loc); | |
491 | wcs[len] = 0; | |
492 | ||
493 | return (wcs); | |
9385eb3d A |
494 | } |
495 | ||
ad3c9f2a A |
496 | __private_extern__ wchar_t * |
497 | __collate_wcsdup(const wchar_t *s) | |
498 | { | |
499 | size_t len = wcslen(s) + 1; | |
500 | wchar_t *wcs; | |
501 | ||
502 | if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL) | |
503 | __collate_err(EX_OSERR, __func__); | |
504 | wcscpy(wcs, s); | |
505 | return (wcs); | |
506 | } | |
507 | ||
508 | __private_extern__ void | |
509 | __collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc) | |
510 | { | |
511 | int pri, len; | |
512 | size_t slen; | |
513 | const wchar_t *t; | |
514 | wchar_t *tt = NULL, *tr = NULL; | |
515 | int direc, pass; | |
516 | wchar_t *xfp; | |
517 | struct __collate_st_info *info = __collate_info; | |
518 | int sverrno; | |
519 | ||
520 | for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++) | |
521 | xf[pass] = NULL; | |
522 | for(pass = 0; pass < info->directive_count; pass++) { | |
523 | direc = info->directive[pass]; | |
524 | if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) { | |
525 | sverrno = errno; | |
526 | free(tt); | |
527 | errno = sverrno; | |
528 | tt = __collate_substitute(src, pass, loc); | |
529 | } | |
530 | if (direc & DIRECTIVE_BACKWARD) { | |
531 | wchar_t *bp, *fp, c; | |
532 | sverrno = errno; | |
533 | free(tr); | |
534 | errno = sverrno; | |
535 | tr = __collate_wcsdup(tt ? tt : src); | |
536 | bp = tr; | |
537 | fp = tr + wcslen(tr) - 1; | |
538 | while(bp < fp) { | |
539 | c = *bp; | |
540 | *bp++ = *fp; | |
541 | *fp-- = c; | |
542 | } | |
543 | t = (const wchar_t *)tr; | |
544 | } else if (tt) | |
545 | t = (const wchar_t *)tt; | |
546 | else | |
547 | t = (const wchar_t *)src; | |
548 | sverrno = errno; | |
549 | if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) { | |
550 | errno = sverrno; | |
551 | slen = 0; | |
552 | goto end; | |
553 | } | |
554 | errno = sverrno; | |
555 | xfp = xf[pass]; | |
556 | if (direc & DIRECTIVE_POSITION) { | |
557 | while(*t) { | |
558 | __collate_lookup_which(t, &len, &pri, pass, loc); | |
559 | t += len; | |
560 | if (pri <= 0) { | |
561 | if (pri < 0) { | |
562 | errno = EINVAL; | |
563 | slen = 0; | |
564 | goto end; | |
565 | } | |
566 | pri = COLLATE_MAX_PRIORITY; | |
567 | } | |
568 | *xfp++ = pri; | |
569 | } | |
570 | } else { | |
571 | while(*t) { | |
572 | __collate_lookup_which(t, &len, &pri, pass, loc); | |
573 | t += len; | |
574 | if (pri <= 0) { | |
575 | if (pri < 0) { | |
576 | errno = EINVAL; | |
577 | slen = 0; | |
578 | goto end; | |
579 | } | |
580 | continue; | |
581 | } | |
582 | *xfp++ = pri; | |
583 | } | |
584 | } | |
585 | *xfp = 0; | |
586 | } | |
587 | end: | |
588 | sverrno = errno; | |
589 | free(tt); | |
590 | free(tr); | |
591 | errno = sverrno; | |
592 | } | |
593 | ||
594 | __private_extern__ void | |
9385eb3d A |
595 | __collate_err(int ex, const char *f) |
596 | { | |
597 | const char *s; | |
598 | int serrno = errno; | |
599 | ||
600 | s = _getprogname(); | |
601 | _write(STDERR_FILENO, s, strlen(s)); | |
602 | _write(STDERR_FILENO, ": ", 2); | |
603 | s = f; | |
604 | _write(STDERR_FILENO, s, strlen(s)); | |
605 | _write(STDERR_FILENO, ": ", 2); | |
606 | s = strerror(serrno); | |
607 | _write(STDERR_FILENO, s, strlen(s)); | |
608 | _write(STDERR_FILENO, "\n", 1); | |
609 | exit(ex); | |
610 | } | |
611 | ||
ad3c9f2a A |
612 | /* |
613 | * __collate_collating_symbol takes the multibyte string specified by | |
614 | * src and slen, and using ps, converts that to a wide character. Then | |
615 | * it is checked to verify it is a collating symbol, and then copies | |
616 | * it to the wide character string specified by dst and dlen (the | |
617 | * results are not null terminated). The length of the wide characters | |
618 | * copied to dst is returned if successful. Zero is returned if no such | |
619 | * collating symbol exists. (size_t)-1 is returned if there are wide-character | |
620 | * conversion errors, if the length of the converted string is greater that | |
621 | * STR_LEN or if dlen is too small. It is up to the calling routine to | |
622 | * preserve the mbstate_t structure as needed. | |
623 | */ | |
624 | __private_extern__ size_t | |
625 | __collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc) | |
626 | { | |
627 | wchar_t wname[STR_LEN]; | |
628 | wchar_t w, *wp; | |
629 | size_t len, l; | |
630 | ||
631 | /* POSIX locale */ | |
632 | if (loc->__collate_load_error) { | |
633 | if (dlen < 1) | |
634 | return (size_t)-1; | |
635 | if (slen != 1 || !isascii(*src)) | |
636 | return 0; | |
637 | *dst = *src; | |
638 | return 1; | |
639 | } | |
640 | for(wp = wname, len = 0; slen > 0; len++) { | |
641 | l = mbrtowc_l(&w, src, slen, ps, loc); | |
642 | if (l == (size_t)-1 || l == (size_t)-2) | |
643 | return (size_t)-1; | |
644 | if (l == 0) | |
645 | break; | |
646 | if (len >= STR_LEN) | |
647 | return -1; | |
648 | *wp++ = w; | |
649 | src += l; | |
650 | slen = (long)slen - (long)l; | |
651 | } | |
652 | if (len == 0 || len > dlen) | |
653 | return (size_t)-1; | |
654 | if (len == 1) { | |
655 | if (*wname <= UCHAR_MAX) { | |
656 | if (__collate_char_pri_table[*wname].pri[0] >= 0) { | |
657 | if (dlen > 0) | |
658 | *dst = *wname; | |
659 | return 1; | |
660 | } | |
661 | return 0; | |
662 | } else if (__collate_info->large_pri_count > 0) { | |
663 | struct __collate_st_large_char_pri *match; | |
664 | match = largesearch(*wname, loc); | |
665 | if (match && match->pri.pri[0] >= 0) { | |
666 | if (dlen > 0) | |
667 | *dst = *wname; | |
668 | return 1; | |
669 | } | |
670 | } | |
671 | return 0; | |
672 | } | |
673 | *wp = 0; | |
674 | if (__collate_info->chain_count > 0) { | |
675 | struct __collate_st_chain_pri *match; | |
676 | int ll; | |
677 | match = chainsearch(wname, &ll, loc); | |
678 | if (match) { | |
679 | if (ll < dlen) | |
680 | dlen = ll; | |
681 | wcsncpy(dst, wname, dlen); | |
682 | return ll; | |
683 | } | |
684 | } | |
685 | return 0; | |
686 | } | |
687 | ||
688 | /* | |
689 | * __collate_equiv_class returns the equivalence class number for the symbol | |
690 | * specified by src and slen, using ps to convert from multi-byte to wide | |
691 | * character. Zero is returned if the symbol is not in an equivalence | |
692 | * class. -1 is returned if there are wide character conversion error, | |
693 | * if there are any greater-than-8-bit characters or if a multi-byte symbol | |
694 | * is greater or equal to STR_LEN in length. It is up to the calling | |
695 | * routine to preserve the mbstate_t structure as needed. | |
696 | */ | |
697 | __private_extern__ int | |
698 | __collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc) | |
699 | { | |
700 | wchar_t wname[STR_LEN]; | |
701 | wchar_t w, *wp; | |
702 | size_t len, l; | |
703 | int e; | |
704 | ||
705 | /* POSIX locale */ | |
706 | if (loc->__collate_load_error) | |
707 | return 0; | |
708 | for(wp = wname, len = 0; slen > 0; len++) { | |
709 | l = mbrtowc_l(&w, src, slen, ps, loc); | |
710 | if (l == (size_t)-1 || l == (size_t)-2) | |
711 | return -1; | |
712 | if (l == 0) | |
713 | break; | |
714 | if (len >= STR_LEN) | |
715 | return -1; | |
716 | *wp++ = w; | |
717 | src += l; | |
718 | slen = (long)slen - (long)l; | |
719 | } | |
720 | if (len == 0) | |
721 | return -1; | |
722 | if (len == 1) { | |
723 | e = -1; | |
724 | if (*wname <= UCHAR_MAX) | |
725 | e = __collate_char_pri_table[*wname].pri[0]; | |
726 | else if (__collate_info->large_pri_count > 0) { | |
727 | struct __collate_st_large_char_pri *match; | |
728 | match = largesearch(*wname, loc); | |
729 | if (match) | |
730 | e = match->pri.pri[0]; | |
731 | } | |
732 | if (e == 0) | |
733 | return IGNORE_EQUIV_CLASS; | |
734 | return e > 0 ? e : 0; | |
735 | } | |
736 | *wp = 0; | |
737 | if (__collate_info->chain_count > 0) { | |
738 | struct __collate_st_chain_pri *match; | |
739 | int ll; | |
740 | match = chainsearch(wname, &ll, loc); | |
741 | if (match) { | |
742 | e = match->pri[0]; | |
743 | if (e == 0) | |
744 | return IGNORE_EQUIV_CLASS; | |
745 | return e < 0 ? -e : e; | |
746 | } | |
747 | } | |
748 | return 0; | |
749 | } | |
750 | ||
751 | /* | |
752 | * __collate_equiv_match tries to match any single or multi-character symbol | |
753 | * in equivalence class equiv_class in the multi-byte string specified by src | |
754 | * and slen. If start is non-zero, it is taken to be the first (pre-converted) | |
755 | * wide character. Subsequence wide characters, if needed, will use ps in | |
756 | * the conversion. On a successful match, the length of the matched string | |
757 | * is returned (including the start character). If dst is non-NULL, the | |
758 | * matched wide-character string is copied to dst, a wide character array of | |
759 | * length dlen (the results are not zero-terminated). If rlen is non-NULL, | |
760 | * the number of character in src actually used is returned. Zero is | |
761 | * returned by __collate_equiv_match if there is no match. (size_t)-1 is | |
762 | * returned on error: if there were conversion errors or if dlen is too small | |
763 | * to accept the results. On no match or error, ps is restored to its incoming | |
764 | * state. | |
765 | */ | |
766 | size_t | |
767 | __collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc) | |
768 | { | |
769 | wchar_t w; | |
770 | size_t len, l, clen; | |
771 | int i; | |
772 | wchar_t buf[STR_LEN], *wp; | |
773 | mbstate_t save; | |
774 | const char *s = src; | |
775 | size_t sl = slen; | |
776 | struct __collate_st_chain_pri *ch = NULL; | |
777 | ||
778 | /* POSIX locale */ | |
779 | if (loc->__collate_load_error) | |
780 | return (size_t)-1; | |
781 | if (equiv_class == IGNORE_EQUIV_CLASS) | |
782 | equiv_class = 0; | |
783 | if (ps) | |
784 | save = *ps; | |
785 | wp = buf; | |
786 | len = clen = 0; | |
787 | if (start) { | |
788 | *wp++ = start; | |
789 | len = 1; | |
790 | } | |
791 | /* convert up to the max chain length */ | |
792 | while(sl > 0 && len < __collate_info->chain_max_len) { | |
793 | l = mbrtowc_l(&w, s, sl, ps, loc); | |
794 | if (l == (size_t)-1 || l == (size_t)-2 || l == 0) | |
795 | break; | |
796 | *wp++ = w; | |
797 | s += l; | |
798 | clen += l; | |
799 | sl -= l; | |
800 | len++; | |
801 | } | |
802 | *wp = 0; | |
803 | if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) { | |
804 | int e = ch->pri[0]; | |
805 | if (e < 0) | |
806 | e = -e; | |
807 | if (e == equiv_class) | |
808 | goto found; | |
809 | } | |
810 | /* try single character */ | |
811 | i = 1; | |
812 | if (*buf <= UCHAR_MAX) { | |
813 | if (equiv_class == __collate_char_pri_table[*buf].pri[0]) | |
814 | goto found; | |
815 | } else if (__collate_info->large_pri_count > 0) { | |
816 | struct __collate_st_large_char_pri *match; | |
817 | match = largesearch(*buf, loc); | |
818 | if (match && equiv_class == match->pri.pri[0]) | |
819 | goto found; | |
820 | } | |
821 | /* no match */ | |
822 | if (ps) | |
823 | *ps = save; | |
824 | return 0; | |
825 | found: | |
826 | /* if we converted more than we used, restore to initial and reconvert | |
827 | * up to what did match */ | |
828 | if (i < len) { | |
829 | len = i; | |
830 | if (ps) | |
831 | *ps = save; | |
832 | if (start) | |
833 | i--; | |
834 | clen = 0; | |
835 | while(i-- > 0) { | |
836 | l = mbrtowc_l(&w, src, slen, ps, loc); | |
837 | src += l; | |
838 | clen += l; | |
839 | slen -= l; | |
840 | } | |
841 | } | |
842 | if (dst) { | |
843 | if (dlen < len) { | |
844 | if (ps) | |
845 | *ps = save; | |
846 | return (size_t)-1; | |
847 | } | |
848 | for(wp = buf; len > 0; len--) | |
849 | *dst++ = *wp++; | |
850 | } | |
851 | if (rlen) | |
852 | *rlen = clen; | |
853 | return len; | |
854 | } | |
855 | ||
856 | /* | |
857 | * __collate_equiv_value returns the primary collation value for the given | |
858 | * collating symbol specified by str and len. Zero or negative is return | |
859 | * if the collating symbol was not found. (Use by the bracket code in TRE.) | |
860 | */ | |
861 | __private_extern__ int | |
862 | __collate_equiv_value(locale_t loc, const wchar_t *str, size_t len) | |
863 | { | |
864 | int e; | |
865 | ||
866 | if (len < 1 || len >= STR_LEN) | |
867 | return -1; | |
868 | ||
869 | /* POSIX locale */ | |
870 | if (loc->__collate_load_error) | |
871 | return (len == 1 && *str <= UCHAR_MAX) ? *str : -1; | |
872 | ||
873 | if (len == 1) { | |
874 | e = -1; | |
875 | if (*str <= UCHAR_MAX) | |
876 | e = __collate_char_pri_table[*str].pri[0]; | |
877 | else if (__collate_info->large_pri_count > 0) { | |
878 | struct __collate_st_large_char_pri *match; | |
879 | match = largesearch(*str, loc); | |
880 | if (match) | |
881 | e = match->pri.pri[0]; | |
882 | } | |
883 | if (e == 0) | |
884 | return IGNORE_EQUIV_CLASS; | |
885 | return e > 0 ? e : 0; | |
886 | } | |
887 | if (__collate_info->chain_count > 0) { | |
888 | wchar_t name[STR_LEN]; | |
889 | struct __collate_st_chain_pri *match; | |
890 | int ll; | |
891 | ||
892 | wcsncpy(name, str, len); | |
893 | name[len] = 0; | |
894 | match = chainsearch(name, &ll, loc); | |
895 | if (match) { | |
896 | e = match->pri[0]; | |
897 | if (e == 0) | |
898 | return IGNORE_EQUIV_CLASS; | |
899 | return e < 0 ? -e : e; | |
900 | } | |
901 | } | |
902 | return 0; | |
903 | } | |
904 | ||
905 | #if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN | |
906 | static void | |
907 | wntohl(wchar_t *str, int len) | |
908 | { | |
909 | for(; *str && len > 0; str++, len--) | |
910 | *str = ntohl(*str); | |
911 | } | |
912 | #endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ | |
913 | ||
9385eb3d | 914 | #ifdef COLLATE_DEBUG |
ad3c9f2a A |
915 | static char * |
916 | show(int c) | |
917 | { | |
918 | static char buf[5]; | |
919 | ||
920 | if (c >=32 && c <= 126) | |
921 | sprintf(buf, "'%c' ", c); | |
922 | else | |
923 | sprintf(buf, "\\x{%02x}", c); | |
924 | return buf; | |
925 | } | |
926 | ||
927 | static char * | |
928 | showwcs(const wchar_t *t, int len) | |
929 | { | |
930 | static char buf[64]; | |
931 | char *cp = buf; | |
932 | ||
933 | for(; *t && len > 0; len--, t++) { | |
934 | if (*t >=32 && *t <= 126) | |
935 | *cp++ = *t; | |
936 | else { | |
937 | sprintf(cp, "\\x{%02x}", *t); | |
938 | cp += strlen(cp); | |
939 | } | |
940 | } | |
941 | *cp = 0; | |
942 | return buf; | |
943 | } | |
944 | ||
9385eb3d A |
945 | void |
946 | __collate_print_tables() | |
947 | { | |
ad3c9f2a A |
948 | int i, z; |
949 | locale_t loc = __current_locale(); | |
9385eb3d | 950 | |
ad3c9f2a A |
951 | printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n", |
952 | __collate_info->directive[0], __collate_info->directive[1], | |
953 | __collate_info->flags, __collate_info->chain_max_len, | |
954 | __collate_info->directive_count, | |
955 | __collate_info->undef_pri[0], __collate_info->undef_pri[1], | |
956 | __collate_info->subst_count[0], __collate_info->subst_count[1], | |
957 | __collate_info->chain_count, __collate_info->large_pri_count); | |
958 | for(z = 0; z < __collate_info->directive_count; z++) { | |
959 | if (__collate_info->subst_count[z] > 0) { | |
960 | struct __collate_st_subst *p2 = __collate_substitute_table[z]; | |
961 | if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP)) | |
962 | printf("Both substitute tables:\n"); | |
963 | else | |
964 | printf("Substitute table %d:\n", z); | |
965 | for (i = __collate_info->subst_count[z]; i-- > 0; p2++) | |
966 | printf("\t%s --> \"%s\"\n", | |
967 | show(p2->val), | |
968 | showwcs(p2->str, STR_LEN)); | |
969 | } | |
970 | } | |
971 | if (__collate_info->chain_count > 0) { | |
972 | printf("Chain priority table:\n"); | |
973 | struct __collate_st_chain_pri *p2 = __collate_chain_pri_table; | |
974 | for (i = __collate_info->chain_count; i-- > 0; p2++) { | |
975 | printf("\t\"%s\" :", showwcs(p2->str, STR_LEN)); | |
976 | for(z = 0; z < __collate_info->directive_count; z++) | |
977 | printf(" %d", p2->pri[z]); | |
978 | putchar('\n'); | |
979 | } | |
980 | } | |
9385eb3d | 981 | printf("Char priority table:\n"); |
ad3c9f2a A |
982 | { |
983 | struct __collate_st_char_pri *p2 = __collate_char_pri_table; | |
984 | for (i = 0; i < UCHAR_MAX + 1; i++, p2++) { | |
985 | printf("\t%s :", show(i)); | |
986 | for(z = 0; z < __collate_info->directive_count; z++) | |
987 | printf(" %d", p2->pri[z]); | |
988 | putchar('\n'); | |
989 | } | |
990 | } | |
991 | if (__collate_info->large_pri_count > 0) { | |
992 | struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table; | |
993 | printf("Large priority table:\n"); | |
994 | for (i = __collate_info->large_pri_count; i-- > 0; p2++) { | |
995 | printf("\t%s :", show(p2->val)); | |
996 | for(z = 0; z < __collate_info->directive_count; z++) | |
997 | printf(" %d", p2->pri.pri[z]); | |
998 | putchar('\n'); | |
999 | } | |
1000 | } | |
9385eb3d A |
1001 | } |
1002 | #endif |