]>
Commit | Line | Data |
---|---|---|
1 | --- wcscoll.c.orig 2004-11-25 11:38:47.000000000 -0800 | |
2 | +++ wcscoll.c 2005-04-11 15:44:35.000000000 -0700 | |
3 | @@ -27,72 +27,222 @@ | |
4 | #include <sys/cdefs.h> | |
5 | __FBSDID("$FreeBSD: src/lib/libc/string/wcscoll.c,v 1.3 2004/04/07 09:47:56 tjr Exp $"); | |
6 | ||
7 | +#include "xlocale_private.h" | |
8 | + | |
9 | #include <errno.h> | |
10 | #include <stdlib.h> | |
11 | #include <string.h> | |
12 | #include <wchar.h> | |
13 | #include "collate.h" | |
14 | ||
15 | -static char *__mbsdup(const wchar_t *); | |
16 | +#define NOTFORWARD (DIRECTIVE_BACKWARD | DIRECTIVE_POSITION) | |
17 | ||
18 | -/* | |
19 | - * Placeholder implementation of wcscoll(). Attempts to use the single-byte | |
20 | - * collation ordering where possible, and falls back on wcscmp() in locales | |
21 | - * with extended character sets. | |
22 | - */ | |
23 | int | |
24 | -wcscoll(const wchar_t *ws1, const wchar_t *ws2) | |
25 | +wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t loc) | |
26 | { | |
27 | - char *mbs1, *mbs2; | |
28 | - int diff, sverrno; | |
29 | + int sverrno; | |
30 | + int len, len2, prim, prim2, sec, sec2, ret, ret2; | |
31 | + const wchar_t *t, *t2; | |
32 | + wchar_t *tt = NULL, *tt2 = NULL; | |
33 | + wchar_t *tr = NULL, *tr2 = NULL; | |
34 | + wchar_t w, w2; | |
35 | + struct __collate_st_info *info; | |
36 | ||
37 | - if (__collate_load_error || MB_CUR_MAX > 1) | |
38 | + NORMALIZE_LOCALE(loc); | |
39 | + if (loc->__collate_load_error) | |
40 | /* | |
41 | - * Locale has no special collating order, could not be | |
42 | - * loaded, or has an extended character set; do a fast binary | |
43 | - * comparison. | |
44 | + * Locale has no special collating order or could not be | |
45 | + * loaded, do a fast binary comparison. | |
46 | */ | |
47 | return (wcscmp(ws1, ws2)); | |
48 | ||
49 | - if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) { | |
50 | - /* | |
51 | - * Out of memory or illegal wide chars; fall back to wcscmp() | |
52 | - * but leave errno indicating the error. Callers that don't | |
53 | - * check for error will get a reasonable but often slightly | |
54 | - * incorrect result. | |
55 | - */ | |
56 | - sverrno = errno; | |
57 | - free(mbs1); | |
58 | - errno = sverrno; | |
59 | - return (wcscmp(ws1, ws2)); | |
60 | + info = &loc->__lc_collate->__info; | |
61 | + len = len2 = 1; | |
62 | + ret = ret2 = 0; | |
63 | + | |
64 | + if ((info->directive[0] & NOTFORWARD) || | |
65 | + (info->directive[1] & NOTFORWARD) || | |
66 | + (!(info->flags && COLLATE_SUBST_DUP) && | |
67 | + (info->subst_count[0] > 0 || info->subst_count[1] > 0))) { | |
68 | + int direc, pass; | |
69 | + for(pass = 0; pass < info->directive_count; pass++) { | |
70 | + direc = info->directive[pass]; | |
71 | + if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) { | |
72 | + free(tt); | |
73 | + tt = __collate_substitute(ws1, pass, loc); | |
74 | + free(tt2); | |
75 | + tt2 = tt ? __collate_substitute(ws2, pass, loc) : NULL; | |
76 | + } | |
77 | + if (direc & DIRECTIVE_BACKWARD) { | |
78 | + wchar_t *bp, *fp, c; | |
79 | + tr = __collate_wcsdup(tt ? tt : ws1); | |
80 | + bp = tr; | |
81 | + fp = tr + wcslen(tr) - 1; | |
82 | + while(bp < fp) { | |
83 | + c = *bp; | |
84 | + *bp++ = *fp; | |
85 | + *fp-- = c; | |
86 | + } | |
87 | + tr2 = __collate_wcsdup(tt2 ? tt2 : ws2); | |
88 | + bp = tr2; | |
89 | + fp = tr2 + wcslen(tr2) - 1; | |
90 | + while(bp < fp) { | |
91 | + c = *bp; | |
92 | + *bp++ = *fp; | |
93 | + *fp-- = c; | |
94 | + } | |
95 | + t = (const wchar_t *)tr; | |
96 | + t2 = (const wchar_t *)tr2; | |
97 | + } else if (tt) { | |
98 | + t = (const wchar_t *)tt; | |
99 | + t2 = (const wchar_t *)tt2; | |
100 | + } else { | |
101 | + t = (const wchar_t *)ws1; | |
102 | + t2 = (const wchar_t *)ws2; | |
103 | + } | |
104 | + if(direc & DIRECTIVE_POSITION) { | |
105 | + while(*t && *t2) { | |
106 | + prim = prim2 = 0; | |
107 | + __collate_lookup_which(t, &len, &prim, pass, loc); | |
108 | + if (prim <= 0) { | |
109 | + if (prim < 0) { | |
110 | + errno = EINVAL; | |
111 | + ret = -1; | |
112 | + goto end; | |
113 | + } | |
114 | + prim = COLLATE_MAX_PRIORITY; | |
115 | + } | |
116 | + __collate_lookup_which(t2, &len2, &prim2, pass, loc); | |
117 | + if (prim2 <= 0) { | |
118 | + if (prim2 < 0) { | |
119 | + errno = EINVAL; | |
120 | + ret = -1; | |
121 | + goto end; | |
122 | + } | |
123 | + prim2 = COLLATE_MAX_PRIORITY; | |
124 | + } | |
125 | + if(prim != prim2) { | |
126 | + ret = prim - prim2; | |
127 | + goto end; | |
128 | + } | |
129 | + t += len; | |
130 | + t2 += len2; | |
131 | + } | |
132 | + } else { | |
133 | + while(*t && *t2) { | |
134 | + prim = prim2 = 0; | |
135 | + while(*t) { | |
136 | + __collate_lookup_which(t, &len, &prim, pass, loc); | |
137 | + if(prim > 0) | |
138 | + break; | |
139 | + if (prim < 0) { | |
140 | + errno = EINVAL; | |
141 | + ret = -1; | |
142 | + goto end; | |
143 | + } | |
144 | + t += len; | |
145 | + } | |
146 | + while(*t2) { | |
147 | + __collate_lookup_which(t2, &len2, &prim2, pass, loc); | |
148 | + if(prim2 > 0) | |
149 | + break; | |
150 | + if (prim2 < 0) { | |
151 | + errno = EINVAL; | |
152 | + ret = -1; | |
153 | + goto end; | |
154 | + } | |
155 | + t2 += len2; | |
156 | + } | |
157 | + if(!prim || !prim2) | |
158 | + break; | |
159 | + if(prim != prim2) { | |
160 | + ret = prim - prim2; | |
161 | + goto end; | |
162 | + } | |
163 | + t += len; | |
164 | + t2 += len2; | |
165 | + } | |
166 | + } | |
167 | + if(!*t) { | |
168 | + if(*t2) { | |
169 | + ret = -(int)*t2; | |
170 | + goto end; | |
171 | + } | |
172 | + } else { | |
173 | + ret = *t; | |
174 | + goto end; | |
175 | + } | |
176 | + } | |
177 | + ret = 0; | |
178 | + goto end; | |
179 | } | |
180 | ||
181 | - diff = strcoll(mbs1, mbs2); | |
182 | + /* optimized common case: order_start forward;forward and duplicate | |
183 | + * (or no) substitute tables */ | |
184 | + tt = __collate_substitute(ws1, 0, loc); | |
185 | + if (tt == NULL) { | |
186 | + tt2 = NULL; | |
187 | + t = (const wchar_t *)ws1; | |
188 | + t2 = (const wchar_t *)ws2; | |
189 | + } else { | |
190 | + tt2 = __collate_substitute(ws2, 0, loc); | |
191 | + t = (const wchar_t *)tt; | |
192 | + t2 = (const wchar_t *)tt2; | |
193 | + } | |
194 | + while(*t && *t2) { | |
195 | + prim = prim2 = 0; | |
196 | + while(*t) { | |
197 | + __collate_lookup_l(t, &len, &prim, &sec, loc); | |
198 | + if (prim > 0) | |
199 | + break; | |
200 | + if (prim < 0) { | |
201 | + errno = EINVAL; | |
202 | + ret = -1; | |
203 | + goto end; | |
204 | + } | |
205 | + t += len; | |
206 | + } | |
207 | + while(*t2) { | |
208 | + __collate_lookup_l(t2, &len2, &prim2, &sec2, loc); | |
209 | + if (prim2 > 0) | |
210 | + break; | |
211 | + if (prim2 < 0) { | |
212 | + errno = EINVAL; | |
213 | + ret = -1; | |
214 | + goto end; | |
215 | + } | |
216 | + t2 += len2; | |
217 | + } | |
218 | + if(!prim || !prim2) | |
219 | + break; | |
220 | + if(prim != prim2) { | |
221 | + ret = prim - prim2; | |
222 | + goto end; | |
223 | + } | |
224 | + if(!ret2) | |
225 | + ret2 = sec - sec2; | |
226 | + t += len; | |
227 | + t2 += len2; | |
228 | + } | |
229 | + if(!*t && *t2) | |
230 | + ret = -(int)*t2; | |
231 | + else if(*t && !*t2) | |
232 | + ret = *t; | |
233 | + else if(!*t && !*t2) | |
234 | + ret = ret2; | |
235 | + end: | |
236 | sverrno = errno; | |
237 | - free(mbs1); | |
238 | - free(mbs2); | |
239 | + free(tt); | |
240 | + free(tt2); | |
241 | + free(tr); | |
242 | + free(tr2); | |
243 | errno = sverrno; | |
244 | ||
245 | - return (diff); | |
246 | + return ret; | |
247 | } | |
248 | ||
249 | -static char * | |
250 | -__mbsdup(const wchar_t *ws) | |
251 | +int | |
252 | +wcscoll(const wchar_t *ws1, const wchar_t *ws2) | |
253 | { | |
254 | - static const mbstate_t initial; | |
255 | - mbstate_t st; | |
256 | - const wchar_t *wcp; | |
257 | - size_t len; | |
258 | - char *mbs; | |
259 | - | |
260 | - wcp = ws; | |
261 | - st = initial; | |
262 | - if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1) | |
263 | - return (NULL); | |
264 | - if ((mbs = malloc(len + 1)) == NULL) | |
265 | - return (NULL); | |
266 | - st = initial; | |
267 | - wcsrtombs(mbs, &ws, len + 1, &st); | |
268 | - | |
269 | - return (mbs); | |
270 | + return wcscoll_l(ws1, ws2, __current_locale()); | |
271 | } |