]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unistr_case.cpp
ICU-400.37.tar.gz
[apple/icu.git] / icuSources / common / unistr_case.cpp
CommitLineData
374ca955
A
1/*
2*******************************************************************************
3*
46f4442e 4* Copyright (C) 1999-2007, International Business Machines
374ca955
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: unistr_case.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:2
12*
13* created on: 2004aug19
14* created by: Markus W. Scherer
15*
16* Case-mapping functions moved here from unistr.cpp
17*/
18
19#include "unicode/utypes.h"
20#include "unicode/putil.h"
21#include "unicode/locid.h"
22#include "cstring.h"
23#include "cmemory.h"
24#include "unicode/ustring.h"
25#include "unicode/unistr.h"
26#include "unicode/uchar.h"
27#include "unicode/ubrk.h"
28#include "ustr_imp.h"
29#include "unormimp.h"
73c04bcf 30#include "uhash.h"
374ca955
A
31
32U_NAMESPACE_BEGIN
33
34//========================================
35// Read-only implementation
36//========================================
37
38int8_t
39UnicodeString::doCaseCompare(int32_t start,
40 int32_t length,
41 const UChar *srcChars,
42 int32_t srcStart,
43 int32_t srcLength,
44 uint32_t options) const
45{
46 // compare illegal string values
47 // treat const UChar *srcChars==NULL as an empty string
48 if(isBogus()) {
49 return -1;
50 }
51
52 // pin indices to legal values
53 pinIndices(start, length);
54
55 if(srcChars == NULL) {
56 srcStart = srcLength = 0;
57 }
58
59 // get the correct pointer
60 const UChar *chars = getArrayStart();
61
62 chars += start;
63 srcChars += srcStart;
64
65 if(chars != srcChars) {
66 UErrorCode errorCode=U_ZERO_ERROR;
67 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
68 options|U_COMPARE_IGNORE_CASE, &errorCode);
69 if(result!=0) {
70 return (int8_t)(result >> 24 | 1);
71 }
72 } else {
73 // get the srcLength if necessary
74 if(srcLength < 0) {
75 srcLength = u_strlen(srcChars + srcStart);
76 }
77 if(length != srcLength) {
78 return (int8_t)((length - srcLength) >> 24 | 1);
79 }
80 }
81 return 0;
82}
83
84//========================================
85// Write implementation
86//========================================
87
88/*
89 * Implement argument checking and buffer handling
90 * for string case mapping as a common function.
91 */
374ca955
A
92
93UnicodeString &
94UnicodeString::caseMap(BreakIterator *titleIter,
95 const char *locale,
96 uint32_t options,
97 int32_t toWhichCase) {
46f4442e 98 if(isEmpty() || !isWritable()) {
374ca955
A
99 // nothing to do
100 return *this;
101 }
102
103 UErrorCode errorCode;
104
105 errorCode = U_ZERO_ERROR;
73c04bcf 106 const UCaseProps *csp=ucase_getSingleton(&errorCode);
374ca955
A
107 if(U_FAILURE(errorCode)) {
108 setToBogus();
109 return *this;
110 }
111
112 // We need to allocate a new buffer for the internal string case mapping function.
46f4442e 113 // This is very similar to how doReplace() keeps the old array pointer
374ca955
A
114 // and deletes the old array itself after it is done.
115 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
46f4442e
A
116 UChar oldStackBuffer[US_STACKBUF_SIZE];
117 UChar *oldArray;
118 int32_t oldLength;
119
120 if(fFlags&kUsingStackBuffer) {
121 // copy the stack buffer contents because it will be overwritten
122 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
123 oldArray = oldStackBuffer;
124 oldLength = fShortLength;
125 } else {
126 oldArray = getArrayStart();
127 oldLength = length();
128 }
374ca955 129
374ca955 130 int32_t capacity;
46f4442e
A
131 if(oldLength <= US_STACKBUF_SIZE) {
132 capacity = US_STACKBUF_SIZE;
374ca955 133 } else {
46f4442e 134 capacity = oldLength + 20;
374ca955 135 }
46f4442e 136 int32_t *bufferToDelete = 0;
374ca955
A
137 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
138 return *this;
139 }
140
374ca955 141 // Case-map, and if the result is too long, then reallocate and repeat.
46f4442e 142 int32_t newLength;
374ca955
A
143 do {
144 errorCode = U_ZERO_ERROR;
145 if(toWhichCase==TO_LOWER) {
46f4442e
A
146 newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
147 oldArray, oldLength,
148 locale, &errorCode);
374ca955 149 } else if(toWhichCase==TO_UPPER) {
46f4442e
A
150 newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
151 oldArray, oldLength,
152 locale, &errorCode);
374ca955
A
153 } else if(toWhichCase==TO_TITLE) {
154#if UCONFIG_NO_BREAK_ITERATION
155 errorCode=U_UNSUPPORTED_ERROR;
156#else
46f4442e
A
157 newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
158 oldArray, oldLength,
159 (UBreakIterator *)titleIter, locale, options, &errorCode);
374ca955
A
160#endif
161 } else {
46f4442e
A
162 newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
163 oldArray, oldLength,
164 options,
165 &errorCode);
374ca955 166 }
46f4442e
A
167 setLength(newLength);
168 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
374ca955
A
169
170 if (bufferToDelete) {
171 uprv_free(bufferToDelete);
172 }
173 if(U_FAILURE(errorCode)) {
174 setToBogus();
175 }
176 return *this;
177}
178
179UnicodeString &
180UnicodeString::toLower() {
181 return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
182}
183
184UnicodeString &
185UnicodeString::toLower(const Locale &locale) {
186 return caseMap(0, locale.getName(), 0, TO_LOWER);
187}
188
189UnicodeString &
190UnicodeString::toUpper() {
191 return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
192}
193
194UnicodeString &
195UnicodeString::toUpper(const Locale &locale) {
196 return caseMap(0, locale.getName(), 0, TO_UPPER);
197}
198
199#if !UCONFIG_NO_BREAK_ITERATION
200
201UnicodeString &
202UnicodeString::toTitle(BreakIterator *titleIter) {
203 return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
204}
205
206UnicodeString &
207UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
208 return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
209}
210
46f4442e
A
211UnicodeString &
212UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
213 return caseMap(titleIter, locale.getName(), options, TO_TITLE);
214}
215
374ca955
A
216#endif
217
218UnicodeString &
219UnicodeString::foldCase(uint32_t options) {
220 /* The Locale parameter isn't used. Use "" instead. */
221 return caseMap(0, "", options, FOLD_CASE);
222}
223
224U_NAMESPACE_END
73c04bcf
A
225
226// Defined here to reduce dependencies on break iterator
227U_CAPI int32_t U_EXPORT2
228uhash_hashCaselessUnicodeString(const UHashTok key) {
229 U_NAMESPACE_USE
230 const UnicodeString *str = (const UnicodeString*) key.pointer;
231 if (str == NULL) {
232 return 0;
233 }
234 // Inefficient; a better way would be to have a hash function in
235 // UnicodeString that does case folding on the fly.
236 UnicodeString copy(*str);
237 return copy.foldCase().hashCode();
238}
239
240// Defined here to reduce dependencies on break iterator
241U_CAPI UBool U_EXPORT2
242uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
243 U_NAMESPACE_USE
244 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
245 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
246 if (str1 == str2) {
247 return TRUE;
248 }
249 if (str1 == NULL || str2 == NULL) {
250 return FALSE;
251 }
252 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
253}
254