]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
374ca955 A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
b331163b | 6 | * Copyright (C) 1999-2014, International Business Machines |
374ca955 A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: unistr_case.cpp | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
374ca955 A |
12 | * tab size: 8 (not used) |
13 | * indentation:2 | |
14 | * | |
15 | * created on: 2004aug19 | |
16 | * created by: Markus W. Scherer | |
17 | * | |
18 | * Case-mapping functions moved here from unistr.cpp | |
19 | */ | |
20 | ||
21 | #include "unicode/utypes.h" | |
f3c0d7a5 A |
22 | #include "unicode/casemap.h" |
23 | #include "unicode/edits.h" | |
374ca955 | 24 | #include "unicode/putil.h" |
374ca955 A |
25 | #include "cstring.h" |
26 | #include "cmemory.h" | |
27 | #include "unicode/ustring.h" | |
28 | #include "unicode/unistr.h" | |
29 | #include "unicode/uchar.h" | |
f3c0d7a5 A |
30 | #include "uassert.h" |
31 | #include "ucasemap_imp.h" | |
4388f060 | 32 | #include "uelement.h" |
374ca955 A |
33 | |
34 | U_NAMESPACE_BEGIN | |
35 | ||
36 | //======================================== | |
37 | // Read-only implementation | |
38 | //======================================== | |
39 | ||
40 | int8_t | |
41 | UnicodeString::doCaseCompare(int32_t start, | |
42 | int32_t length, | |
43 | const UChar *srcChars, | |
44 | int32_t srcStart, | |
45 | int32_t srcLength, | |
46 | uint32_t options) const | |
47 | { | |
48 | // compare illegal string values | |
49 | // treat const UChar *srcChars==NULL as an empty string | |
50 | if(isBogus()) { | |
51 | return -1; | |
52 | } | |
53 | ||
54 | // pin indices to legal values | |
55 | pinIndices(start, length); | |
56 | ||
57 | if(srcChars == NULL) { | |
58 | srcStart = srcLength = 0; | |
59 | } | |
60 | ||
61 | // get the correct pointer | |
62 | const UChar *chars = getArrayStart(); | |
63 | ||
64 | chars += start; | |
4388f060 A |
65 | if(srcStart!=0) { |
66 | srcChars += srcStart; | |
67 | } | |
374ca955 A |
68 | |
69 | if(chars != srcChars) { | |
70 | UErrorCode errorCode=U_ZERO_ERROR; | |
71 | int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, | |
72 | options|U_COMPARE_IGNORE_CASE, &errorCode); | |
73 | if(result!=0) { | |
74 | return (int8_t)(result >> 24 | 1); | |
75 | } | |
76 | } else { | |
77 | // get the srcLength if necessary | |
78 | if(srcLength < 0) { | |
79 | srcLength = u_strlen(srcChars + srcStart); | |
80 | } | |
81 | if(length != srcLength) { | |
82 | return (int8_t)((length - srcLength) >> 24 | 1); | |
83 | } | |
84 | } | |
85 | return 0; | |
86 | } | |
87 | ||
88 | //======================================== | |
89 | // Write implementation | |
90 | //======================================== | |
91 | ||
374ca955 | 92 | UnicodeString & |
f3c0d7a5 | 93 | UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM |
4388f060 | 94 | UStringCaseMapper *stringCaseMapper) { |
46f4442e | 95 | if(isEmpty() || !isWritable()) { |
374ca955 A |
96 | // nothing to do |
97 | return *this; | |
98 | } | |
99 | ||
f3c0d7a5 | 100 | UChar oldBuffer[2 * US_STACKBUF_SIZE]; |
46f4442e | 101 | UChar *oldArray; |
f3c0d7a5 A |
102 | int32_t oldLength = length(); |
103 | int32_t newLength; | |
104 | UBool writable = isBufferWritable(); | |
105 | UErrorCode errorCode = U_ZERO_ERROR; | |
106 | ||
107 | // Try to avoid heap-allocating a new character array for this string. | |
108 | if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) { | |
109 | // Short string: Copy the contents into a temporary buffer and | |
110 | // case-map back into the current array, or into the stack buffer. | |
111 | UChar *buffer = getArrayStart(); | |
112 | int32_t capacity; | |
113 | oldArray = oldBuffer; | |
114 | u_memcpy(oldBuffer, buffer, oldLength); | |
115 | if (writable) { | |
116 | capacity = getCapacity(); | |
117 | } else { | |
118 | // Switch from the read-only alias or shared heap buffer to the stack buffer. | |
119 | if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) { | |
120 | return *this; | |
121 | } | |
122 | U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer); | |
123 | buffer = fUnion.fStackFields.fBuffer; | |
124 | capacity = US_STACKBUF_SIZE; | |
125 | } | |
126 | newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR | |
127 | buffer, capacity, | |
128 | oldArray, oldLength, NULL, errorCode); | |
129 | if (U_SUCCESS(errorCode)) { | |
130 | setLength(newLength); | |
131 | return *this; | |
132 | } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { | |
133 | // common overflow handling below | |
134 | } else { | |
135 | setToBogus(); | |
136 | return *this; | |
137 | } | |
46f4442e | 138 | } else { |
f3c0d7a5 A |
139 | // Longer string or read-only buffer: |
140 | // Collect only changes and then apply them to this string. | |
141 | // Case mapping often changes only small parts of a string, | |
142 | // and often does not change its length. | |
46f4442e | 143 | oldArray = getArrayStart(); |
f3c0d7a5 A |
144 | Edits edits; |
145 | UChar replacementChars[200]; | |
146 | stringCaseMapper(caseLocale, options | UCASEMAP_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR | |
147 | replacementChars, UPRV_LENGTHOF(replacementChars), | |
148 | oldArray, oldLength, &edits, errorCode); | |
149 | if (U_SUCCESS(errorCode)) { | |
150 | // Grow the buffer at most once, not for multiple doReplace() calls. | |
151 | newLength = oldLength + edits.lengthDelta(); | |
152 | if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) { | |
153 | return *this; | |
154 | } | |
155 | for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) { | |
156 | doReplace(ei.destinationIndex(), ei.oldLength(), | |
157 | replacementChars, ei.replacementIndex(), ei.newLength()); | |
158 | } | |
159 | if (U_FAILURE(errorCode)) { | |
160 | setToBogus(); | |
161 | } | |
162 | return *this; | |
163 | } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { | |
164 | // common overflow handling below | |
165 | newLength = oldLength + edits.lengthDelta(); | |
166 | } else { | |
167 | setToBogus(); | |
168 | return *this; | |
169 | } | |
46f4442e | 170 | } |
374ca955 | 171 | |
f3c0d7a5 A |
172 | // Handle buffer overflow, newLength is known. |
173 | // We need to allocate a new buffer for the internal string case mapping function. | |
174 | // This is very similar to how doReplace() keeps the old array pointer | |
175 | // and deletes the old array itself after it is done. | |
176 | // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. | |
46f4442e | 177 | int32_t *bufferToDelete = 0; |
f3c0d7a5 | 178 | if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) { |
374ca955 A |
179 | return *this; |
180 | } | |
f3c0d7a5 A |
181 | errorCode = U_ZERO_ERROR; |
182 | newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR | |
183 | getArrayStart(), getCapacity(), | |
184 | oldArray, oldLength, NULL, errorCode); | |
374ca955 A |
185 | if (bufferToDelete) { |
186 | uprv_free(bufferToDelete); | |
187 | } | |
f3c0d7a5 A |
188 | if (U_SUCCESS(errorCode)) { |
189 | setLength(newLength); | |
190 | } else { | |
374ca955 A |
191 | setToBogus(); |
192 | } | |
193 | return *this; | |
194 | } | |
195 | ||
374ca955 A |
196 | UnicodeString & |
197 | UnicodeString::foldCase(uint32_t options) { | |
f3c0d7a5 | 198 | return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); |
374ca955 A |
199 | } |
200 | ||
201 | U_NAMESPACE_END | |
73c04bcf A |
202 | |
203 | // Defined here to reduce dependencies on break iterator | |
204 | U_CAPI int32_t U_EXPORT2 | |
4388f060 | 205 | uhash_hashCaselessUnicodeString(const UElement key) { |
73c04bcf A |
206 | U_NAMESPACE_USE |
207 | const UnicodeString *str = (const UnicodeString*) key.pointer; | |
208 | if (str == NULL) { | |
209 | return 0; | |
210 | } | |
211 | // Inefficient; a better way would be to have a hash function in | |
212 | // UnicodeString that does case folding on the fly. | |
213 | UnicodeString copy(*str); | |
214 | return copy.foldCase().hashCode(); | |
215 | } | |
216 | ||
217 | // Defined here to reduce dependencies on break iterator | |
218 | U_CAPI UBool U_EXPORT2 | |
4388f060 | 219 | uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { |
73c04bcf A |
220 | U_NAMESPACE_USE |
221 | const UnicodeString *str1 = (const UnicodeString*) key1.pointer; | |
222 | const UnicodeString *str2 = (const UnicodeString*) key2.pointer; | |
223 | if (str1 == str2) { | |
224 | return TRUE; | |
225 | } | |
226 | if (str1 == NULL || str2 == NULL) { | |
227 | return FALSE; | |
228 | } | |
229 | return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; | |
230 | } |