]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2005, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: ucasemap.c | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2005may06 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Case mapping service object and functions using it. | |
17 | */ | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | #include "unicode/uloc.h" | |
21 | #include "unicode/ustring.h" | |
22 | #include "unicode/ucasemap.h" | |
23 | #include "cmemory.h" | |
24 | #include "cstring.h" | |
25 | #include "ucase.h" | |
26 | #include "ustr_imp.h" | |
27 | ||
28 | /* UCaseMap service object -------------------------------------------------- */ | |
29 | ||
30 | struct UCaseMap { | |
31 | const UCaseProps *csp; | |
32 | char locale[32]; | |
33 | int32_t locCache; | |
34 | uint32_t options; | |
35 | }; | |
36 | ||
37 | U_DRAFT UCaseMap * U_EXPORT2 | |
38 | ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { | |
39 | UCaseMap *csm; | |
40 | ||
41 | if(U_FAILURE(*pErrorCode)) { | |
42 | return NULL; | |
43 | } | |
44 | ||
45 | csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap)); | |
46 | if(csm==NULL) { | |
47 | return NULL; | |
48 | } | |
49 | uprv_memset(csm, 0, sizeof(UCaseMap)); | |
50 | ||
51 | csm->csp=ucase_getSingleton(pErrorCode); | |
52 | ucasemap_setLocale(csm, locale, pErrorCode); | |
53 | if(U_FAILURE(*pErrorCode)) { | |
54 | uprv_free(csm); | |
55 | return NULL; | |
56 | } | |
57 | ||
58 | csm->options=options; | |
59 | return csm; | |
60 | } | |
61 | ||
62 | U_DRAFT void U_EXPORT2 | |
63 | ucasemap_close(UCaseMap *csm) { | |
64 | if(csm!=NULL) { | |
65 | uprv_free(csm); | |
66 | } | |
67 | } | |
68 | ||
69 | U_DRAFT const char * U_EXPORT2 | |
70 | ucasemap_getLocale(const UCaseMap *csm) { | |
71 | return csm->locale; | |
72 | } | |
73 | ||
74 | U_DRAFT uint32_t U_EXPORT2 | |
75 | ucasemap_getOptions(const UCaseMap *csm) { | |
76 | return csm->options; | |
77 | } | |
78 | ||
79 | U_DRAFT void U_EXPORT2 | |
80 | ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { | |
81 | int32_t length; | |
82 | ||
83 | if(U_FAILURE(*pErrorCode)) { | |
84 | return; | |
85 | } | |
86 | ||
87 | length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); | |
88 | if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { | |
89 | *pErrorCode=U_ZERO_ERROR; | |
90 | /* we only really need the language code for case mappings */ | |
91 | length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); | |
92 | } | |
93 | if(length==sizeof(csm->locale)) { | |
94 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; | |
95 | } | |
96 | csm->locCache=0; | |
97 | if(U_SUCCESS(*pErrorCode)) { | |
98 | ucase_getCaseLocale(csm->locale, &csm->locCache); | |
99 | } else { | |
100 | csm->locale[0]=0; | |
101 | } | |
102 | } | |
103 | ||
104 | U_DRAFT void U_EXPORT2 | |
105 | ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) { | |
106 | csm->options=options; | |
107 | } | |
108 | ||
109 | /* UTF-8 string case mappings ----------------------------------------------- */ | |
110 | ||
111 | /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ | |
112 | static U_INLINE int32_t | |
113 | appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, | |
114 | int32_t result, const UChar *s) { | |
115 | UChar32 c; | |
116 | int32_t length, destLength; | |
117 | UErrorCode errorCode; | |
118 | ||
119 | /* decode the result */ | |
120 | if(result<0) { | |
121 | /* (not) original code point */ | |
122 | c=~result; | |
123 | length=-1; | |
124 | } else if(result<=UCASE_MAX_STRING_LENGTH) { | |
125 | c=U_SENTINEL; | |
126 | length=result; | |
127 | } else { | |
128 | c=result; | |
129 | length=-1; | |
130 | } | |
131 | ||
132 | if(destIndex<destCapacity) { | |
133 | /* append the result */ | |
134 | if(length<0) { | |
135 | /* code point */ | |
136 | UBool isError=FALSE; | |
137 | U8_APPEND(dest, destIndex, destCapacity, c, isError); | |
138 | if(isError) { | |
139 | /* overflow, nothing written */ | |
140 | destIndex+=U8_LENGTH(c); | |
141 | } | |
142 | } else { | |
143 | /* string */ | |
144 | errorCode=U_ZERO_ERROR; | |
145 | u_strToUTF8( | |
146 | (char *)(dest+destIndex), destCapacity-destIndex, &destLength, | |
147 | s, length, | |
148 | &errorCode); | |
149 | destIndex+=length; | |
150 | /* we might have an overflow, but we know the actual length */ | |
151 | } | |
152 | } else { | |
153 | /* preflight */ | |
154 | if(length<0) { | |
155 | destIndex+=U8_LENGTH(c); | |
156 | } else { | |
157 | errorCode=U_ZERO_ERROR; | |
158 | u_strToUTF8( | |
159 | NULL, 0, &destLength, | |
160 | s, length, | |
161 | &errorCode); | |
162 | destIndex+=length; | |
163 | } | |
164 | } | |
165 | return destIndex; | |
166 | } | |
167 | ||
168 | static UChar32 U_CALLCONV | |
169 | utf8_caseContextIterator(void *context, int8_t dir) { | |
170 | UCaseContext *csc=(UCaseContext *)context; | |
171 | UChar32 c; | |
172 | ||
173 | if(dir<0) { | |
174 | /* reset for backward iteration */ | |
175 | csc->index=csc->cpStart; | |
176 | csc->dir=dir; | |
177 | } else if(dir>0) { | |
178 | /* reset for forward iteration */ | |
179 | csc->index=csc->cpLimit; | |
180 | csc->dir=dir; | |
181 | } else { | |
182 | /* continue current iteration direction */ | |
183 | dir=csc->dir; | |
184 | } | |
185 | ||
186 | if(dir<0) { | |
187 | if(csc->start<csc->index) { | |
188 | U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c); | |
189 | return c; | |
190 | } | |
191 | } else { | |
192 | if(csc->index<csc->limit) { | |
193 | U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c); | |
194 | return c; | |
195 | } | |
196 | } | |
197 | return U_SENTINEL; | |
198 | } | |
199 | ||
200 | typedef int32_t U_CALLCONV | |
201 | UCaseMapFull(const UCaseProps *csp, UChar32 c, | |
202 | UCaseContextIterator *iter, void *context, | |
203 | const UChar **pString, | |
204 | const char *locale, int32_t *locCache); | |
205 | ||
206 | /* | |
207 | * Case-maps [srcStart..srcLimit[ but takes | |
208 | * context [0..srcLength[ into account. | |
209 | */ | |
210 | static int32_t | |
211 | _caseMap(const UCaseMap *csm, UCaseMapFull *map, | |
212 | uint8_t *dest, int32_t destCapacity, | |
213 | const uint8_t *src, UCaseContext *csc, | |
214 | int32_t srcStart, int32_t srcLimit, | |
215 | UErrorCode *pErrorCode) { | |
216 | const UChar *s; | |
217 | UChar32 c; | |
218 | int32_t srcIndex, destIndex; | |
219 | int32_t locCache; | |
220 | ||
221 | locCache=csm->locCache; | |
222 | ||
223 | /* case mapping loop */ | |
224 | srcIndex=srcStart; | |
225 | destIndex=0; | |
226 | while(srcIndex<srcLimit) { | |
227 | csc->cpStart=srcIndex; | |
228 | U8_NEXT(src, srcIndex, srcLimit, c); | |
229 | csc->cpLimit=srcIndex; | |
230 | c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache); | |
231 | destIndex=appendResult(dest, destIndex, destCapacity, c, s); | |
232 | } | |
233 | ||
234 | if(destIndex>destCapacity) { | |
235 | *pErrorCode=U_BUFFER_OVERFLOW_ERROR; | |
236 | } | |
237 | return destIndex; | |
238 | } | |
239 | ||
240 | /* | |
241 | * Implement argument checking and buffer handling | |
242 | * for string case mapping as a common function. | |
243 | */ | |
244 | enum { | |
245 | TO_LOWER, | |
246 | TO_UPPER, | |
247 | TO_TITLE, | |
248 | FOLD_CASE | |
249 | }; | |
250 | ||
251 | /* common internal function for public API functions */ | |
252 | ||
253 | static int32_t | |
254 | caseMap(const UCaseMap *csm, | |
255 | uint8_t *dest, int32_t destCapacity, | |
256 | const uint8_t *src, int32_t srcLength, | |
257 | int32_t toWhichCase, | |
258 | UErrorCode *pErrorCode) { | |
259 | UCaseContext csc={ NULL }; | |
260 | int32_t destLength; | |
261 | ||
262 | /* check argument values */ | |
263 | if(U_FAILURE(*pErrorCode)) { | |
264 | return 0; | |
265 | } | |
266 | if( destCapacity<0 || | |
267 | (dest==NULL && destCapacity>0) || | |
268 | src==NULL || | |
269 | srcLength<-1 | |
270 | ) { | |
271 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
272 | return 0; | |
273 | } | |
274 | ||
275 | /* get the string length */ | |
276 | if(srcLength==-1) { | |
277 | srcLength=uprv_strlen((const char *)src); | |
278 | } | |
279 | ||
280 | /* check for overlapping source and destination */ | |
281 | if( dest!=NULL && | |
282 | ((src>=dest && src<(dest+destCapacity)) || | |
283 | (dest>=src && dest<(src+srcLength))) | |
284 | ) { | |
285 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
286 | return 0; | |
287 | } | |
288 | ||
289 | destLength=0; | |
290 | ||
291 | csc.p=(void *)src; | |
292 | csc.limit=srcLength; | |
293 | ||
294 | if(toWhichCase==TO_LOWER) { | |
295 | destLength=_caseMap(csm, ucase_toFullLower, | |
296 | dest, destCapacity, | |
297 | src, &csc, | |
298 | 0, srcLength, | |
299 | pErrorCode); | |
300 | } else /* if(toWhichCase==TO_UPPER) */ { | |
301 | destLength=_caseMap(csm, ucase_toFullUpper, | |
302 | dest, destCapacity, | |
303 | src, &csc, | |
304 | 0, srcLength, | |
305 | pErrorCode); | |
306 | } | |
307 | ||
308 | return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode); | |
309 | } | |
310 | ||
311 | /* public API functions */ | |
312 | ||
313 | U_DRAFT int32_t U_EXPORT2 | |
314 | ucasemap_utf8ToLower(const UCaseMap *csm, | |
315 | char *dest, int32_t destCapacity, | |
316 | const char *src, int32_t srcLength, | |
317 | UErrorCode *pErrorCode) { | |
318 | return caseMap(csm, | |
319 | (uint8_t *)dest, destCapacity, | |
320 | (const uint8_t *)src, srcLength, | |
321 | TO_LOWER, pErrorCode); | |
322 | } | |
323 | ||
324 | U_DRAFT int32_t U_EXPORT2 | |
325 | ucasemap_utf8ToUpper(const UCaseMap *csm, | |
326 | char *dest, int32_t destCapacity, | |
327 | const char *src, int32_t srcLength, | |
328 | UErrorCode *pErrorCode) { | |
329 | return caseMap(csm, | |
330 | (uint8_t *)dest, destCapacity, | |
331 | (const uint8_t *)src, srcLength, | |
332 | TO_UPPER, pErrorCode); | |
333 | } |