]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr_case.cpp
ICU-59131.0.1.tar.gz
[apple/icu.git] / icuSources / common / unistr_case.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1999-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: unistr_case.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:2
14 *
15 * created on: 2004aug19
16 * created by: Markus W. Scherer
17 *
18 * Case-mapping functions moved here from unistr.cpp
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/casemap.h"
23 #include "unicode/edits.h"
24 #include "unicode/putil.h"
25 #include "cstring.h"
26 #include "cmemory.h"
27 #include "unicode/ustring.h"
28 #include "unicode/unistr.h"
29 #include "unicode/uchar.h"
30 #include "uassert.h"
31 #include "ucasemap_imp.h"
32 #include "uelement.h"
33
34 U_NAMESPACE_BEGIN
35
36 //========================================
37 // Read-only implementation
38 //========================================
39
40 int8_t
41 UnicodeString::doCaseCompare(int32_t start,
42 int32_t length,
43 const UChar *srcChars,
44 int32_t srcStart,
45 int32_t srcLength,
46 uint32_t options) const
47 {
48 // compare illegal string values
49 // treat const UChar *srcChars==NULL as an empty string
50 if(isBogus()) {
51 return -1;
52 }
53
54 // pin indices to legal values
55 pinIndices(start, length);
56
57 if(srcChars == NULL) {
58 srcStart = srcLength = 0;
59 }
60
61 // get the correct pointer
62 const UChar *chars = getArrayStart();
63
64 chars += start;
65 if(srcStart!=0) {
66 srcChars += srcStart;
67 }
68
69 if(chars != srcChars) {
70 UErrorCode errorCode=U_ZERO_ERROR;
71 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
72 options|U_COMPARE_IGNORE_CASE, &errorCode);
73 if(result!=0) {
74 return (int8_t)(result >> 24 | 1);
75 }
76 } else {
77 // get the srcLength if necessary
78 if(srcLength < 0) {
79 srcLength = u_strlen(srcChars + srcStart);
80 }
81 if(length != srcLength) {
82 return (int8_t)((length - srcLength) >> 24 | 1);
83 }
84 }
85 return 0;
86 }
87
88 //========================================
89 // Write implementation
90 //========================================
91
92 UnicodeString &
93 UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
94 UStringCaseMapper *stringCaseMapper) {
95 if(isEmpty() || !isWritable()) {
96 // nothing to do
97 return *this;
98 }
99
100 UChar oldBuffer[2 * US_STACKBUF_SIZE];
101 UChar *oldArray;
102 int32_t oldLength = length();
103 int32_t newLength;
104 UBool writable = isBufferWritable();
105 UErrorCode errorCode = U_ZERO_ERROR;
106
107 // Try to avoid heap-allocating a new character array for this string.
108 if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) {
109 // Short string: Copy the contents into a temporary buffer and
110 // case-map back into the current array, or into the stack buffer.
111 UChar *buffer = getArrayStart();
112 int32_t capacity;
113 oldArray = oldBuffer;
114 u_memcpy(oldBuffer, buffer, oldLength);
115 if (writable) {
116 capacity = getCapacity();
117 } else {
118 // Switch from the read-only alias or shared heap buffer to the stack buffer.
119 if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) {
120 return *this;
121 }
122 U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer);
123 buffer = fUnion.fStackFields.fBuffer;
124 capacity = US_STACKBUF_SIZE;
125 }
126 newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
127 buffer, capacity,
128 oldArray, oldLength, NULL, errorCode);
129 if (U_SUCCESS(errorCode)) {
130 setLength(newLength);
131 return *this;
132 } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
133 // common overflow handling below
134 } else {
135 setToBogus();
136 return *this;
137 }
138 } else {
139 // Longer string or read-only buffer:
140 // Collect only changes and then apply them to this string.
141 // Case mapping often changes only small parts of a string,
142 // and often does not change its length.
143 oldArray = getArrayStart();
144 Edits edits;
145 UChar replacementChars[200];
146 stringCaseMapper(caseLocale, options | UCASEMAP_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR
147 replacementChars, UPRV_LENGTHOF(replacementChars),
148 oldArray, oldLength, &edits, errorCode);
149 if (U_SUCCESS(errorCode)) {
150 // Grow the buffer at most once, not for multiple doReplace() calls.
151 newLength = oldLength + edits.lengthDelta();
152 if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) {
153 return *this;
154 }
155 for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) {
156 doReplace(ei.destinationIndex(), ei.oldLength(),
157 replacementChars, ei.replacementIndex(), ei.newLength());
158 }
159 if (U_FAILURE(errorCode)) {
160 setToBogus();
161 }
162 return *this;
163 } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
164 // common overflow handling below
165 newLength = oldLength + edits.lengthDelta();
166 } else {
167 setToBogus();
168 return *this;
169 }
170 }
171
172 // Handle buffer overflow, newLength is known.
173 // We need to allocate a new buffer for the internal string case mapping function.
174 // This is very similar to how doReplace() keeps the old array pointer
175 // and deletes the old array itself after it is done.
176 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
177 int32_t *bufferToDelete = 0;
178 if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) {
179 return *this;
180 }
181 errorCode = U_ZERO_ERROR;
182 newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
183 getArrayStart(), getCapacity(),
184 oldArray, oldLength, NULL, errorCode);
185 if (bufferToDelete) {
186 uprv_free(bufferToDelete);
187 }
188 if (U_SUCCESS(errorCode)) {
189 setLength(newLength);
190 } else {
191 setToBogus();
192 }
193 return *this;
194 }
195
196 UnicodeString &
197 UnicodeString::foldCase(uint32_t options) {
198 return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
199 }
200
201 U_NAMESPACE_END
202
203 // Defined here to reduce dependencies on break iterator
204 U_CAPI int32_t U_EXPORT2
205 uhash_hashCaselessUnicodeString(const UElement key) {
206 U_NAMESPACE_USE
207 const UnicodeString *str = (const UnicodeString*) key.pointer;
208 if (str == NULL) {
209 return 0;
210 }
211 // Inefficient; a better way would be to have a hash function in
212 // UnicodeString that does case folding on the fly.
213 UnicodeString copy(*str);
214 return copy.foldCase().hashCode();
215 }
216
217 // Defined here to reduce dependencies on break iterator
218 U_CAPI UBool U_EXPORT2
219 uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
220 U_NAMESPACE_USE
221 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
222 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
223 if (str1 == str2) {
224 return TRUE;
225 }
226 if (str1 == NULL || str2 == NULL) {
227 return FALSE;
228 }
229 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
230 }