]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
73c04bcf | 4 | * Copyright (C) 1999-2005, International Business Machines |
374ca955 A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: unistr_case.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:2 | |
12 | * | |
13 | * created on: 2004aug19 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Case-mapping functions moved here from unistr.cpp | |
17 | */ | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | #include "unicode/putil.h" | |
21 | #include "unicode/locid.h" | |
22 | #include "cstring.h" | |
23 | #include "cmemory.h" | |
24 | #include "unicode/ustring.h" | |
25 | #include "unicode/unistr.h" | |
26 | #include "unicode/uchar.h" | |
27 | #include "unicode/ubrk.h" | |
28 | #include "ustr_imp.h" | |
29 | #include "unormimp.h" | |
73c04bcf | 30 | #include "uhash.h" |
374ca955 A |
31 | |
32 | U_NAMESPACE_BEGIN | |
33 | ||
34 | //======================================== | |
35 | // Read-only implementation | |
36 | //======================================== | |
37 | ||
38 | int8_t | |
39 | UnicodeString::doCaseCompare(int32_t start, | |
40 | int32_t length, | |
41 | const UChar *srcChars, | |
42 | int32_t srcStart, | |
43 | int32_t srcLength, | |
44 | uint32_t options) const | |
45 | { | |
46 | // compare illegal string values | |
47 | // treat const UChar *srcChars==NULL as an empty string | |
48 | if(isBogus()) { | |
49 | return -1; | |
50 | } | |
51 | ||
52 | // pin indices to legal values | |
53 | pinIndices(start, length); | |
54 | ||
55 | if(srcChars == NULL) { | |
56 | srcStart = srcLength = 0; | |
57 | } | |
58 | ||
59 | // get the correct pointer | |
60 | const UChar *chars = getArrayStart(); | |
61 | ||
62 | chars += start; | |
63 | srcChars += srcStart; | |
64 | ||
65 | if(chars != srcChars) { | |
66 | UErrorCode errorCode=U_ZERO_ERROR; | |
67 | int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, | |
68 | options|U_COMPARE_IGNORE_CASE, &errorCode); | |
69 | if(result!=0) { | |
70 | return (int8_t)(result >> 24 | 1); | |
71 | } | |
72 | } else { | |
73 | // get the srcLength if necessary | |
74 | if(srcLength < 0) { | |
75 | srcLength = u_strlen(srcChars + srcStart); | |
76 | } | |
77 | if(length != srcLength) { | |
78 | return (int8_t)((length - srcLength) >> 24 | 1); | |
79 | } | |
80 | } | |
81 | return 0; | |
82 | } | |
83 | ||
84 | //======================================== | |
85 | // Write implementation | |
86 | //======================================== | |
87 | ||
88 | /* | |
89 | * Implement argument checking and buffer handling | |
90 | * for string case mapping as a common function. | |
91 | */ | |
92 | enum { | |
93 | TO_LOWER, | |
94 | TO_UPPER, | |
95 | TO_TITLE, | |
96 | FOLD_CASE | |
97 | }; | |
98 | ||
99 | UnicodeString & | |
100 | UnicodeString::caseMap(BreakIterator *titleIter, | |
101 | const char *locale, | |
102 | uint32_t options, | |
103 | int32_t toWhichCase) { | |
104 | if(fLength <= 0) { | |
105 | // nothing to do | |
106 | return *this; | |
107 | } | |
108 | ||
109 | UErrorCode errorCode; | |
110 | ||
111 | errorCode = U_ZERO_ERROR; | |
73c04bcf | 112 | const UCaseProps *csp=ucase_getSingleton(&errorCode); |
374ca955 A |
113 | if(U_FAILURE(errorCode)) { |
114 | setToBogus(); | |
115 | return *this; | |
116 | } | |
117 | ||
118 | // We need to allocate a new buffer for the internal string case mapping function. | |
119 | // This is very similar to how doReplace() below keeps the old array pointer | |
120 | // and deletes the old array itself after it is done. | |
121 | // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. | |
122 | UChar *oldArray = fArray; | |
123 | int32_t oldLength = fLength; | |
124 | int32_t *bufferToDelete = 0; | |
125 | ||
126 | // Make sure that if the string is in fStackBuffer we do not overwrite it! | |
127 | int32_t capacity; | |
128 | if(fLength <= US_STACKBUF_SIZE) { | |
129 | if(fArray == fStackBuffer) { | |
130 | capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer | |
131 | } else { | |
132 | capacity = US_STACKBUF_SIZE; | |
133 | } | |
134 | } else { | |
135 | capacity = fLength + 20; | |
136 | } | |
137 | if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { | |
138 | return *this; | |
139 | } | |
140 | ||
141 | #if !UCONFIG_NO_BREAK_ITERATION | |
142 | // set up the titlecasing break iterator | |
143 | UBreakIterator *cTitleIter = 0; | |
144 | ||
145 | if(toWhichCase == TO_TITLE) { | |
146 | errorCode = U_ZERO_ERROR; | |
147 | if(titleIter != 0) { | |
148 | cTitleIter = (UBreakIterator *)titleIter; | |
149 | ubrk_setText(cTitleIter, oldArray, oldLength, &errorCode); | |
150 | } else { | |
151 | cTitleIter = ubrk_open(UBRK_WORD, locale, | |
152 | oldArray, oldLength, | |
153 | &errorCode); | |
154 | } | |
155 | if(U_FAILURE(errorCode)) { | |
156 | uprv_free(bufferToDelete); | |
157 | setToBogus(); | |
158 | return *this; | |
159 | } | |
160 | } | |
161 | #endif | |
162 | ||
163 | // Case-map, and if the result is too long, then reallocate and repeat. | |
164 | do { | |
165 | errorCode = U_ZERO_ERROR; | |
166 | if(toWhichCase==TO_LOWER) { | |
167 | fLength = ustr_toLower(csp, fArray, fCapacity, | |
168 | oldArray, oldLength, | |
169 | locale, &errorCode); | |
170 | } else if(toWhichCase==TO_UPPER) { | |
171 | fLength = ustr_toUpper(csp, fArray, fCapacity, | |
172 | oldArray, oldLength, | |
173 | locale, &errorCode); | |
174 | } else if(toWhichCase==TO_TITLE) { | |
175 | #if UCONFIG_NO_BREAK_ITERATION | |
176 | errorCode=U_UNSUPPORTED_ERROR; | |
177 | #else | |
178 | fLength = ustr_toTitle(csp, fArray, fCapacity, | |
179 | oldArray, oldLength, | |
180 | cTitleIter, locale, &errorCode); | |
181 | #endif | |
182 | } else { | |
183 | fLength = ustr_foldCase(csp, fArray, fCapacity, | |
184 | oldArray, oldLength, | |
185 | options, | |
186 | &errorCode); | |
187 | } | |
188 | } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE)); | |
189 | ||
190 | #if !UCONFIG_NO_BREAK_ITERATION | |
191 | if(cTitleIter != 0 && titleIter == 0) { | |
192 | ubrk_close(cTitleIter); | |
193 | } | |
194 | #endif | |
195 | ||
196 | if (bufferToDelete) { | |
197 | uprv_free(bufferToDelete); | |
198 | } | |
199 | if(U_FAILURE(errorCode)) { | |
200 | setToBogus(); | |
201 | } | |
202 | return *this; | |
203 | } | |
204 | ||
205 | UnicodeString & | |
206 | UnicodeString::toLower() { | |
207 | return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER); | |
208 | } | |
209 | ||
210 | UnicodeString & | |
211 | UnicodeString::toLower(const Locale &locale) { | |
212 | return caseMap(0, locale.getName(), 0, TO_LOWER); | |
213 | } | |
214 | ||
215 | UnicodeString & | |
216 | UnicodeString::toUpper() { | |
217 | return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER); | |
218 | } | |
219 | ||
220 | UnicodeString & | |
221 | UnicodeString::toUpper(const Locale &locale) { | |
222 | return caseMap(0, locale.getName(), 0, TO_UPPER); | |
223 | } | |
224 | ||
225 | #if !UCONFIG_NO_BREAK_ITERATION | |
226 | ||
227 | UnicodeString & | |
228 | UnicodeString::toTitle(BreakIterator *titleIter) { | |
229 | return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE); | |
230 | } | |
231 | ||
232 | UnicodeString & | |
233 | UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { | |
234 | return caseMap(titleIter, locale.getName(), 0, TO_TITLE); | |
235 | } | |
236 | ||
237 | #endif | |
238 | ||
239 | UnicodeString & | |
240 | UnicodeString::foldCase(uint32_t options) { | |
241 | /* The Locale parameter isn't used. Use "" instead. */ | |
242 | return caseMap(0, "", options, FOLD_CASE); | |
243 | } | |
244 | ||
245 | U_NAMESPACE_END | |
73c04bcf A |
246 | |
247 | // Defined here to reduce dependencies on break iterator | |
248 | U_CAPI int32_t U_EXPORT2 | |
249 | uhash_hashCaselessUnicodeString(const UHashTok key) { | |
250 | U_NAMESPACE_USE | |
251 | const UnicodeString *str = (const UnicodeString*) key.pointer; | |
252 | if (str == NULL) { | |
253 | return 0; | |
254 | } | |
255 | // Inefficient; a better way would be to have a hash function in | |
256 | // UnicodeString that does case folding on the fly. | |
257 | UnicodeString copy(*str); | |
258 | return copy.foldCase().hashCode(); | |
259 | } | |
260 | ||
261 | // Defined here to reduce dependencies on break iterator | |
262 | U_CAPI UBool U_EXPORT2 | |
263 | uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) { | |
264 | U_NAMESPACE_USE | |
265 | const UnicodeString *str1 = (const UnicodeString*) key1.pointer; | |
266 | const UnicodeString *str2 = (const UnicodeString*) key2.pointer; | |
267 | if (str1 == str2) { | |
268 | return TRUE; | |
269 | } | |
270 | if (str1 == NULL || str2 == NULL) { | |
271 | return FALSE; | |
272 | } | |
273 | return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; | |
274 | } | |
275 |