]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
46f4442e | 4 | * Copyright (C) 1999-2007, International Business Machines |
374ca955 A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: unistr_case.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:2 | |
12 | * | |
13 | * created on: 2004aug19 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Case-mapping functions moved here from unistr.cpp | |
17 | */ | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | #include "unicode/putil.h" | |
21 | #include "unicode/locid.h" | |
22 | #include "cstring.h" | |
23 | #include "cmemory.h" | |
24 | #include "unicode/ustring.h" | |
25 | #include "unicode/unistr.h" | |
26 | #include "unicode/uchar.h" | |
27 | #include "unicode/ubrk.h" | |
28 | #include "ustr_imp.h" | |
29 | #include "unormimp.h" | |
73c04bcf | 30 | #include "uhash.h" |
374ca955 A |
31 | |
32 | U_NAMESPACE_BEGIN | |
33 | ||
34 | //======================================== | |
35 | // Read-only implementation | |
36 | //======================================== | |
37 | ||
38 | int8_t | |
39 | UnicodeString::doCaseCompare(int32_t start, | |
40 | int32_t length, | |
41 | const UChar *srcChars, | |
42 | int32_t srcStart, | |
43 | int32_t srcLength, | |
44 | uint32_t options) const | |
45 | { | |
46 | // compare illegal string values | |
47 | // treat const UChar *srcChars==NULL as an empty string | |
48 | if(isBogus()) { | |
49 | return -1; | |
50 | } | |
51 | ||
52 | // pin indices to legal values | |
53 | pinIndices(start, length); | |
54 | ||
55 | if(srcChars == NULL) { | |
56 | srcStart = srcLength = 0; | |
57 | } | |
58 | ||
59 | // get the correct pointer | |
60 | const UChar *chars = getArrayStart(); | |
61 | ||
62 | chars += start; | |
63 | srcChars += srcStart; | |
64 | ||
65 | if(chars != srcChars) { | |
66 | UErrorCode errorCode=U_ZERO_ERROR; | |
67 | int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, | |
68 | options|U_COMPARE_IGNORE_CASE, &errorCode); | |
69 | if(result!=0) { | |
70 | return (int8_t)(result >> 24 | 1); | |
71 | } | |
72 | } else { | |
73 | // get the srcLength if necessary | |
74 | if(srcLength < 0) { | |
75 | srcLength = u_strlen(srcChars + srcStart); | |
76 | } | |
77 | if(length != srcLength) { | |
78 | return (int8_t)((length - srcLength) >> 24 | 1); | |
79 | } | |
80 | } | |
81 | return 0; | |
82 | } | |
83 | ||
84 | //======================================== | |
85 | // Write implementation | |
86 | //======================================== | |
87 | ||
88 | /* | |
89 | * Implement argument checking and buffer handling | |
90 | * for string case mapping as a common function. | |
91 | */ | |
374ca955 A |
92 | |
93 | UnicodeString & | |
94 | UnicodeString::caseMap(BreakIterator *titleIter, | |
95 | const char *locale, | |
96 | uint32_t options, | |
97 | int32_t toWhichCase) { | |
46f4442e | 98 | if(isEmpty() || !isWritable()) { |
374ca955 A |
99 | // nothing to do |
100 | return *this; | |
101 | } | |
102 | ||
103 | UErrorCode errorCode; | |
104 | ||
105 | errorCode = U_ZERO_ERROR; | |
73c04bcf | 106 | const UCaseProps *csp=ucase_getSingleton(&errorCode); |
374ca955 A |
107 | if(U_FAILURE(errorCode)) { |
108 | setToBogus(); | |
109 | return *this; | |
110 | } | |
111 | ||
112 | // We need to allocate a new buffer for the internal string case mapping function. | |
46f4442e | 113 | // This is very similar to how doReplace() keeps the old array pointer |
374ca955 A |
114 | // and deletes the old array itself after it is done. |
115 | // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. | |
46f4442e A |
116 | UChar oldStackBuffer[US_STACKBUF_SIZE]; |
117 | UChar *oldArray; | |
118 | int32_t oldLength; | |
119 | ||
120 | if(fFlags&kUsingStackBuffer) { | |
121 | // copy the stack buffer contents because it will be overwritten | |
122 | u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength); | |
123 | oldArray = oldStackBuffer; | |
124 | oldLength = fShortLength; | |
125 | } else { | |
126 | oldArray = getArrayStart(); | |
127 | oldLength = length(); | |
128 | } | |
374ca955 | 129 | |
374ca955 | 130 | int32_t capacity; |
46f4442e A |
131 | if(oldLength <= US_STACKBUF_SIZE) { |
132 | capacity = US_STACKBUF_SIZE; | |
374ca955 | 133 | } else { |
46f4442e | 134 | capacity = oldLength + 20; |
374ca955 | 135 | } |
46f4442e | 136 | int32_t *bufferToDelete = 0; |
374ca955 A |
137 | if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { |
138 | return *this; | |
139 | } | |
140 | ||
374ca955 | 141 | // Case-map, and if the result is too long, then reallocate and repeat. |
46f4442e | 142 | int32_t newLength; |
374ca955 A |
143 | do { |
144 | errorCode = U_ZERO_ERROR; | |
145 | if(toWhichCase==TO_LOWER) { | |
46f4442e A |
146 | newLength = ustr_toLower(csp, getArrayStart(), getCapacity(), |
147 | oldArray, oldLength, | |
148 | locale, &errorCode); | |
374ca955 | 149 | } else if(toWhichCase==TO_UPPER) { |
46f4442e A |
150 | newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(), |
151 | oldArray, oldLength, | |
152 | locale, &errorCode); | |
374ca955 A |
153 | } else if(toWhichCase==TO_TITLE) { |
154 | #if UCONFIG_NO_BREAK_ITERATION | |
155 | errorCode=U_UNSUPPORTED_ERROR; | |
156 | #else | |
46f4442e A |
157 | newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(), |
158 | oldArray, oldLength, | |
159 | (UBreakIterator *)titleIter, locale, options, &errorCode); | |
374ca955 A |
160 | #endif |
161 | } else { | |
46f4442e A |
162 | newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(), |
163 | oldArray, oldLength, | |
164 | options, | |
165 | &errorCode); | |
374ca955 | 166 | } |
46f4442e A |
167 | setLength(newLength); |
168 | } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); | |
374ca955 A |
169 | |
170 | if (bufferToDelete) { | |
171 | uprv_free(bufferToDelete); | |
172 | } | |
173 | if(U_FAILURE(errorCode)) { | |
174 | setToBogus(); | |
175 | } | |
176 | return *this; | |
177 | } | |
178 | ||
179 | UnicodeString & | |
180 | UnicodeString::toLower() { | |
181 | return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER); | |
182 | } | |
183 | ||
184 | UnicodeString & | |
185 | UnicodeString::toLower(const Locale &locale) { | |
186 | return caseMap(0, locale.getName(), 0, TO_LOWER); | |
187 | } | |
188 | ||
189 | UnicodeString & | |
190 | UnicodeString::toUpper() { | |
191 | return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER); | |
192 | } | |
193 | ||
194 | UnicodeString & | |
195 | UnicodeString::toUpper(const Locale &locale) { | |
196 | return caseMap(0, locale.getName(), 0, TO_UPPER); | |
197 | } | |
198 | ||
199 | #if !UCONFIG_NO_BREAK_ITERATION | |
200 | ||
201 | UnicodeString & | |
202 | UnicodeString::toTitle(BreakIterator *titleIter) { | |
203 | return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE); | |
204 | } | |
205 | ||
206 | UnicodeString & | |
207 | UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { | |
208 | return caseMap(titleIter, locale.getName(), 0, TO_TITLE); | |
209 | } | |
210 | ||
46f4442e A |
211 | UnicodeString & |
212 | UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) { | |
213 | return caseMap(titleIter, locale.getName(), options, TO_TITLE); | |
214 | } | |
215 | ||
374ca955 A |
216 | #endif |
217 | ||
218 | UnicodeString & | |
219 | UnicodeString::foldCase(uint32_t options) { | |
220 | /* The Locale parameter isn't used. Use "" instead. */ | |
221 | return caseMap(0, "", options, FOLD_CASE); | |
222 | } | |
223 | ||
224 | U_NAMESPACE_END | |
73c04bcf A |
225 | |
226 | // Defined here to reduce dependencies on break iterator | |
227 | U_CAPI int32_t U_EXPORT2 | |
228 | uhash_hashCaselessUnicodeString(const UHashTok key) { | |
229 | U_NAMESPACE_USE | |
230 | const UnicodeString *str = (const UnicodeString*) key.pointer; | |
231 | if (str == NULL) { | |
232 | return 0; | |
233 | } | |
234 | // Inefficient; a better way would be to have a hash function in | |
235 | // UnicodeString that does case folding on the fly. | |
236 | UnicodeString copy(*str); | |
237 | return copy.foldCase().hashCode(); | |
238 | } | |
239 | ||
240 | // Defined here to reduce dependencies on break iterator | |
241 | U_CAPI UBool U_EXPORT2 | |
242 | uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) { | |
243 | U_NAMESPACE_USE | |
244 | const UnicodeString *str1 = (const UnicodeString*) key1.pointer; | |
245 | const UnicodeString *str2 = (const UnicodeString*) key2.pointer; | |
246 | if (str1 == str2) { | |
247 | return TRUE; | |
248 | } | |
249 | if (str1 == NULL || str2 == NULL) { | |
250 | return FALSE; | |
251 | } | |
252 | return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; | |
253 | } | |
254 |