]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr_case.cpp
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / common / unistr_case.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2005, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: unistr_case.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:2
12 *
13 * created on: 2004aug19
14 * created by: Markus W. Scherer
15 *
16 * Case-mapping functions moved here from unistr.cpp
17 */
18
19 #include "unicode/utypes.h"
20 #include "unicode/putil.h"
21 #include "unicode/locid.h"
22 #include "cstring.h"
23 #include "cmemory.h"
24 #include "unicode/ustring.h"
25 #include "unicode/unistr.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ubrk.h"
28 #include "ustr_imp.h"
29 #include "unormimp.h"
30 #include "uhash.h"
31
32 U_NAMESPACE_BEGIN
33
34 //========================================
35 // Read-only implementation
36 //========================================
37
38 int8_t
39 UnicodeString::doCaseCompare(int32_t start,
40 int32_t length,
41 const UChar *srcChars,
42 int32_t srcStart,
43 int32_t srcLength,
44 uint32_t options) const
45 {
46 // compare illegal string values
47 // treat const UChar *srcChars==NULL as an empty string
48 if(isBogus()) {
49 return -1;
50 }
51
52 // pin indices to legal values
53 pinIndices(start, length);
54
55 if(srcChars == NULL) {
56 srcStart = srcLength = 0;
57 }
58
59 // get the correct pointer
60 const UChar *chars = getArrayStart();
61
62 chars += start;
63 srcChars += srcStart;
64
65 if(chars != srcChars) {
66 UErrorCode errorCode=U_ZERO_ERROR;
67 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
68 options|U_COMPARE_IGNORE_CASE, &errorCode);
69 if(result!=0) {
70 return (int8_t)(result >> 24 | 1);
71 }
72 } else {
73 // get the srcLength if necessary
74 if(srcLength < 0) {
75 srcLength = u_strlen(srcChars + srcStart);
76 }
77 if(length != srcLength) {
78 return (int8_t)((length - srcLength) >> 24 | 1);
79 }
80 }
81 return 0;
82 }
83
84 //========================================
85 // Write implementation
86 //========================================
87
88 /*
89 * Implement argument checking and buffer handling
90 * for string case mapping as a common function.
91 */
92 enum {
93 TO_LOWER,
94 TO_UPPER,
95 TO_TITLE,
96 FOLD_CASE
97 };
98
99 UnicodeString &
100 UnicodeString::caseMap(BreakIterator *titleIter,
101 const char *locale,
102 uint32_t options,
103 int32_t toWhichCase) {
104 if(fLength <= 0) {
105 // nothing to do
106 return *this;
107 }
108
109 UErrorCode errorCode;
110
111 errorCode = U_ZERO_ERROR;
112 const UCaseProps *csp=ucase_getSingleton(&errorCode);
113 if(U_FAILURE(errorCode)) {
114 setToBogus();
115 return *this;
116 }
117
118 // We need to allocate a new buffer for the internal string case mapping function.
119 // This is very similar to how doReplace() below keeps the old array pointer
120 // and deletes the old array itself after it is done.
121 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
122 UChar *oldArray = fArray;
123 int32_t oldLength = fLength;
124 int32_t *bufferToDelete = 0;
125
126 // Make sure that if the string is in fStackBuffer we do not overwrite it!
127 int32_t capacity;
128 if(fLength <= US_STACKBUF_SIZE) {
129 if(fArray == fStackBuffer) {
130 capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer
131 } else {
132 capacity = US_STACKBUF_SIZE;
133 }
134 } else {
135 capacity = fLength + 20;
136 }
137 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
138 return *this;
139 }
140
141 #if !UCONFIG_NO_BREAK_ITERATION
142 // set up the titlecasing break iterator
143 UBreakIterator *cTitleIter = 0;
144
145 if(toWhichCase == TO_TITLE) {
146 errorCode = U_ZERO_ERROR;
147 if(titleIter != 0) {
148 cTitleIter = (UBreakIterator *)titleIter;
149 ubrk_setText(cTitleIter, oldArray, oldLength, &errorCode);
150 } else {
151 cTitleIter = ubrk_open(UBRK_WORD, locale,
152 oldArray, oldLength,
153 &errorCode);
154 }
155 if(U_FAILURE(errorCode)) {
156 uprv_free(bufferToDelete);
157 setToBogus();
158 return *this;
159 }
160 }
161 #endif
162
163 // Case-map, and if the result is too long, then reallocate and repeat.
164 do {
165 errorCode = U_ZERO_ERROR;
166 if(toWhichCase==TO_LOWER) {
167 fLength = ustr_toLower(csp, fArray, fCapacity,
168 oldArray, oldLength,
169 locale, &errorCode);
170 } else if(toWhichCase==TO_UPPER) {
171 fLength = ustr_toUpper(csp, fArray, fCapacity,
172 oldArray, oldLength,
173 locale, &errorCode);
174 } else if(toWhichCase==TO_TITLE) {
175 #if UCONFIG_NO_BREAK_ITERATION
176 errorCode=U_UNSUPPORTED_ERROR;
177 #else
178 fLength = ustr_toTitle(csp, fArray, fCapacity,
179 oldArray, oldLength,
180 cTitleIter, locale, &errorCode);
181 #endif
182 } else {
183 fLength = ustr_foldCase(csp, fArray, fCapacity,
184 oldArray, oldLength,
185 options,
186 &errorCode);
187 }
188 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
189
190 #if !UCONFIG_NO_BREAK_ITERATION
191 if(cTitleIter != 0 && titleIter == 0) {
192 ubrk_close(cTitleIter);
193 }
194 #endif
195
196 if (bufferToDelete) {
197 uprv_free(bufferToDelete);
198 }
199 if(U_FAILURE(errorCode)) {
200 setToBogus();
201 }
202 return *this;
203 }
204
205 UnicodeString &
206 UnicodeString::toLower() {
207 return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
208 }
209
210 UnicodeString &
211 UnicodeString::toLower(const Locale &locale) {
212 return caseMap(0, locale.getName(), 0, TO_LOWER);
213 }
214
215 UnicodeString &
216 UnicodeString::toUpper() {
217 return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
218 }
219
220 UnicodeString &
221 UnicodeString::toUpper(const Locale &locale) {
222 return caseMap(0, locale.getName(), 0, TO_UPPER);
223 }
224
225 #if !UCONFIG_NO_BREAK_ITERATION
226
227 UnicodeString &
228 UnicodeString::toTitle(BreakIterator *titleIter) {
229 return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
230 }
231
232 UnicodeString &
233 UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
234 return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
235 }
236
237 #endif
238
239 UnicodeString &
240 UnicodeString::foldCase(uint32_t options) {
241 /* The Locale parameter isn't used. Use "" instead. */
242 return caseMap(0, "", options, FOLD_CASE);
243 }
244
245 U_NAMESPACE_END
246
247 // Defined here to reduce dependencies on break iterator
248 U_CAPI int32_t U_EXPORT2
249 uhash_hashCaselessUnicodeString(const UHashTok key) {
250 U_NAMESPACE_USE
251 const UnicodeString *str = (const UnicodeString*) key.pointer;
252 if (str == NULL) {
253 return 0;
254 }
255 // Inefficient; a better way would be to have a hash function in
256 // UnicodeString that does case folding on the fly.
257 UnicodeString copy(*str);
258 return copy.foldCase().hashCode();
259 }
260
261 // Defined here to reduce dependencies on break iterator
262 U_CAPI UBool U_EXPORT2
263 uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
264 U_NAMESPACE_USE
265 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
266 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
267 if (str1 == str2) {
268 return TRUE;
269 }
270 if (str1 == NULL || str2 == NULL) {
271 return FALSE;
272 }
273 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
274 }
275