]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unistr_case.cpp
ICU-8.11.2.tar.gz
[apple/icu.git] / icuSources / common / unistr_case.cpp
CommitLineData
374ca955
A
1/*
2*******************************************************************************
3*
73c04bcf 4* Copyright (C) 1999-2005, International Business Machines
374ca955
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: unistr_case.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:2
12*
13* created on: 2004aug19
14* created by: Markus W. Scherer
15*
16* Case-mapping functions moved here from unistr.cpp
17*/
18
19#include "unicode/utypes.h"
20#include "unicode/putil.h"
21#include "unicode/locid.h"
22#include "cstring.h"
23#include "cmemory.h"
24#include "unicode/ustring.h"
25#include "unicode/unistr.h"
26#include "unicode/uchar.h"
27#include "unicode/ubrk.h"
28#include "ustr_imp.h"
29#include "unormimp.h"
73c04bcf 30#include "uhash.h"
374ca955
A
31
32U_NAMESPACE_BEGIN
33
34//========================================
35// Read-only implementation
36//========================================
37
38int8_t
39UnicodeString::doCaseCompare(int32_t start,
40 int32_t length,
41 const UChar *srcChars,
42 int32_t srcStart,
43 int32_t srcLength,
44 uint32_t options) const
45{
46 // compare illegal string values
47 // treat const UChar *srcChars==NULL as an empty string
48 if(isBogus()) {
49 return -1;
50 }
51
52 // pin indices to legal values
53 pinIndices(start, length);
54
55 if(srcChars == NULL) {
56 srcStart = srcLength = 0;
57 }
58
59 // get the correct pointer
60 const UChar *chars = getArrayStart();
61
62 chars += start;
63 srcChars += srcStart;
64
65 if(chars != srcChars) {
66 UErrorCode errorCode=U_ZERO_ERROR;
67 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
68 options|U_COMPARE_IGNORE_CASE, &errorCode);
69 if(result!=0) {
70 return (int8_t)(result >> 24 | 1);
71 }
72 } else {
73 // get the srcLength if necessary
74 if(srcLength < 0) {
75 srcLength = u_strlen(srcChars + srcStart);
76 }
77 if(length != srcLength) {
78 return (int8_t)((length - srcLength) >> 24 | 1);
79 }
80 }
81 return 0;
82}
83
84//========================================
85// Write implementation
86//========================================
87
88/*
89 * Implement argument checking and buffer handling
90 * for string case mapping as a common function.
91 */
92enum {
93 TO_LOWER,
94 TO_UPPER,
95 TO_TITLE,
96 FOLD_CASE
97};
98
99UnicodeString &
100UnicodeString::caseMap(BreakIterator *titleIter,
101 const char *locale,
102 uint32_t options,
103 int32_t toWhichCase) {
104 if(fLength <= 0) {
105 // nothing to do
106 return *this;
107 }
108
109 UErrorCode errorCode;
110
111 errorCode = U_ZERO_ERROR;
73c04bcf 112 const UCaseProps *csp=ucase_getSingleton(&errorCode);
374ca955
A
113 if(U_FAILURE(errorCode)) {
114 setToBogus();
115 return *this;
116 }
117
118 // We need to allocate a new buffer for the internal string case mapping function.
119 // This is very similar to how doReplace() below keeps the old array pointer
120 // and deletes the old array itself after it is done.
121 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
122 UChar *oldArray = fArray;
123 int32_t oldLength = fLength;
124 int32_t *bufferToDelete = 0;
125
126 // Make sure that if the string is in fStackBuffer we do not overwrite it!
127 int32_t capacity;
128 if(fLength <= US_STACKBUF_SIZE) {
129 if(fArray == fStackBuffer) {
130 capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer
131 } else {
132 capacity = US_STACKBUF_SIZE;
133 }
134 } else {
135 capacity = fLength + 20;
136 }
137 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
138 return *this;
139 }
140
141#if !UCONFIG_NO_BREAK_ITERATION
142 // set up the titlecasing break iterator
143 UBreakIterator *cTitleIter = 0;
144
145 if(toWhichCase == TO_TITLE) {
146 errorCode = U_ZERO_ERROR;
147 if(titleIter != 0) {
148 cTitleIter = (UBreakIterator *)titleIter;
149 ubrk_setText(cTitleIter, oldArray, oldLength, &errorCode);
150 } else {
151 cTitleIter = ubrk_open(UBRK_WORD, locale,
152 oldArray, oldLength,
153 &errorCode);
154 }
155 if(U_FAILURE(errorCode)) {
156 uprv_free(bufferToDelete);
157 setToBogus();
158 return *this;
159 }
160 }
161#endif
162
163 // Case-map, and if the result is too long, then reallocate and repeat.
164 do {
165 errorCode = U_ZERO_ERROR;
166 if(toWhichCase==TO_LOWER) {
167 fLength = ustr_toLower(csp, fArray, fCapacity,
168 oldArray, oldLength,
169 locale, &errorCode);
170 } else if(toWhichCase==TO_UPPER) {
171 fLength = ustr_toUpper(csp, fArray, fCapacity,
172 oldArray, oldLength,
173 locale, &errorCode);
174 } else if(toWhichCase==TO_TITLE) {
175#if UCONFIG_NO_BREAK_ITERATION
176 errorCode=U_UNSUPPORTED_ERROR;
177#else
178 fLength = ustr_toTitle(csp, fArray, fCapacity,
179 oldArray, oldLength,
180 cTitleIter, locale, &errorCode);
181#endif
182 } else {
183 fLength = ustr_foldCase(csp, fArray, fCapacity,
184 oldArray, oldLength,
185 options,
186 &errorCode);
187 }
188 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
189
190#if !UCONFIG_NO_BREAK_ITERATION
191 if(cTitleIter != 0 && titleIter == 0) {
192 ubrk_close(cTitleIter);
193 }
194#endif
195
196 if (bufferToDelete) {
197 uprv_free(bufferToDelete);
198 }
199 if(U_FAILURE(errorCode)) {
200 setToBogus();
201 }
202 return *this;
203}
204
205UnicodeString &
206UnicodeString::toLower() {
207 return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
208}
209
210UnicodeString &
211UnicodeString::toLower(const Locale &locale) {
212 return caseMap(0, locale.getName(), 0, TO_LOWER);
213}
214
215UnicodeString &
216UnicodeString::toUpper() {
217 return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
218}
219
220UnicodeString &
221UnicodeString::toUpper(const Locale &locale) {
222 return caseMap(0, locale.getName(), 0, TO_UPPER);
223}
224
225#if !UCONFIG_NO_BREAK_ITERATION
226
227UnicodeString &
228UnicodeString::toTitle(BreakIterator *titleIter) {
229 return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
230}
231
232UnicodeString &
233UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
234 return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
235}
236
237#endif
238
239UnicodeString &
240UnicodeString::foldCase(uint32_t options) {
241 /* The Locale parameter isn't used. Use "" instead. */
242 return caseMap(0, "", options, FOLD_CASE);
243}
244
245U_NAMESPACE_END
73c04bcf
A
246
247// Defined here to reduce dependencies on break iterator
248U_CAPI int32_t U_EXPORT2
249uhash_hashCaselessUnicodeString(const UHashTok key) {
250 U_NAMESPACE_USE
251 const UnicodeString *str = (const UnicodeString*) key.pointer;
252 if (str == NULL) {
253 return 0;
254 }
255 // Inefficient; a better way would be to have a hash function in
256 // UnicodeString that does case folding on the fly.
257 UnicodeString copy(*str);
258 return copy.foldCase().hashCode();
259}
260
261// Defined here to reduce dependencies on break iterator
262U_CAPI UBool U_EXPORT2
263uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
264 U_NAMESPACE_USE
265 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
266 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
267 if (str1 == str2) {
268 return TRUE;
269 }
270 if (str1 == NULL || str2 == NULL) {
271 return FALSE;
272 }
273 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
274}
275