]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unistr_case.cpp
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / unistr_case.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
374ca955
A
3/*
4*******************************************************************************
5*
b331163b 6* Copyright (C) 1999-2014, International Business Machines
374ca955
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: unistr_case.cpp
f3c0d7a5 11* encoding: UTF-8
374ca955
A
12* tab size: 8 (not used)
13* indentation:2
14*
15* created on: 2004aug19
16* created by: Markus W. Scherer
17*
18* Case-mapping functions moved here from unistr.cpp
19*/
20
21#include "unicode/utypes.h"
f3c0d7a5
A
22#include "unicode/casemap.h"
23#include "unicode/edits.h"
374ca955 24#include "unicode/putil.h"
374ca955
A
25#include "cstring.h"
26#include "cmemory.h"
27#include "unicode/ustring.h"
28#include "unicode/unistr.h"
29#include "unicode/uchar.h"
f3c0d7a5
A
30#include "uassert.h"
31#include "ucasemap_imp.h"
4388f060 32#include "uelement.h"
374ca955
A
33
34U_NAMESPACE_BEGIN
35
36//========================================
37// Read-only implementation
38//========================================
39
40int8_t
41UnicodeString::doCaseCompare(int32_t start,
42 int32_t length,
43 const UChar *srcChars,
44 int32_t srcStart,
45 int32_t srcLength,
46 uint32_t options) const
47{
48 // compare illegal string values
49 // treat const UChar *srcChars==NULL as an empty string
50 if(isBogus()) {
51 return -1;
52 }
53
54 // pin indices to legal values
55 pinIndices(start, length);
56
57 if(srcChars == NULL) {
58 srcStart = srcLength = 0;
59 }
60
61 // get the correct pointer
62 const UChar *chars = getArrayStart();
63
64 chars += start;
4388f060
A
65 if(srcStart!=0) {
66 srcChars += srcStart;
67 }
374ca955
A
68
69 if(chars != srcChars) {
70 UErrorCode errorCode=U_ZERO_ERROR;
71 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
72 options|U_COMPARE_IGNORE_CASE, &errorCode);
73 if(result!=0) {
74 return (int8_t)(result >> 24 | 1);
75 }
76 } else {
77 // get the srcLength if necessary
78 if(srcLength < 0) {
79 srcLength = u_strlen(srcChars + srcStart);
80 }
81 if(length != srcLength) {
82 return (int8_t)((length - srcLength) >> 24 | 1);
83 }
84 }
85 return 0;
86}
87
88//========================================
89// Write implementation
90//========================================
91
374ca955 92UnicodeString &
f3c0d7a5 93UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
4388f060 94 UStringCaseMapper *stringCaseMapper) {
46f4442e 95 if(isEmpty() || !isWritable()) {
374ca955
A
96 // nothing to do
97 return *this;
98 }
99
f3c0d7a5 100 UChar oldBuffer[2 * US_STACKBUF_SIZE];
46f4442e 101 UChar *oldArray;
f3c0d7a5
A
102 int32_t oldLength = length();
103 int32_t newLength;
104 UBool writable = isBufferWritable();
105 UErrorCode errorCode = U_ZERO_ERROR;
106
107 // Try to avoid heap-allocating a new character array for this string.
108 if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) {
109 // Short string: Copy the contents into a temporary buffer and
110 // case-map back into the current array, or into the stack buffer.
111 UChar *buffer = getArrayStart();
112 int32_t capacity;
113 oldArray = oldBuffer;
114 u_memcpy(oldBuffer, buffer, oldLength);
115 if (writable) {
116 capacity = getCapacity();
117 } else {
118 // Switch from the read-only alias or shared heap buffer to the stack buffer.
119 if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) {
120 return *this;
121 }
122 U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer);
123 buffer = fUnion.fStackFields.fBuffer;
124 capacity = US_STACKBUF_SIZE;
125 }
126 newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
127 buffer, capacity,
128 oldArray, oldLength, NULL, errorCode);
129 if (U_SUCCESS(errorCode)) {
130 setLength(newLength);
131 return *this;
132 } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
133 // common overflow handling below
134 } else {
135 setToBogus();
136 return *this;
137 }
46f4442e 138 } else {
f3c0d7a5
A
139 // Longer string or read-only buffer:
140 // Collect only changes and then apply them to this string.
141 // Case mapping often changes only small parts of a string,
142 // and often does not change its length.
46f4442e 143 oldArray = getArrayStart();
f3c0d7a5
A
144 Edits edits;
145 UChar replacementChars[200];
146 stringCaseMapper(caseLocale, options | UCASEMAP_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR
147 replacementChars, UPRV_LENGTHOF(replacementChars),
148 oldArray, oldLength, &edits, errorCode);
149 if (U_SUCCESS(errorCode)) {
150 // Grow the buffer at most once, not for multiple doReplace() calls.
151 newLength = oldLength + edits.lengthDelta();
152 if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) {
153 return *this;
154 }
155 for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) {
156 doReplace(ei.destinationIndex(), ei.oldLength(),
157 replacementChars, ei.replacementIndex(), ei.newLength());
158 }
159 if (U_FAILURE(errorCode)) {
160 setToBogus();
161 }
162 return *this;
163 } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
164 // common overflow handling below
165 newLength = oldLength + edits.lengthDelta();
166 } else {
167 setToBogus();
168 return *this;
169 }
46f4442e 170 }
374ca955 171
f3c0d7a5
A
172 // Handle buffer overflow, newLength is known.
173 // We need to allocate a new buffer for the internal string case mapping function.
174 // This is very similar to how doReplace() keeps the old array pointer
175 // and deletes the old array itself after it is done.
176 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
46f4442e 177 int32_t *bufferToDelete = 0;
f3c0d7a5 178 if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) {
374ca955
A
179 return *this;
180 }
f3c0d7a5
A
181 errorCode = U_ZERO_ERROR;
182 newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
183 getArrayStart(), getCapacity(),
184 oldArray, oldLength, NULL, errorCode);
374ca955
A
185 if (bufferToDelete) {
186 uprv_free(bufferToDelete);
187 }
f3c0d7a5
A
188 if (U_SUCCESS(errorCode)) {
189 setLength(newLength);
190 } else {
374ca955
A
191 setToBogus();
192 }
193 return *this;
194}
195
374ca955
A
196UnicodeString &
197UnicodeString::foldCase(uint32_t options) {
f3c0d7a5 198 return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
374ca955
A
199}
200
201U_NAMESPACE_END
73c04bcf
A
202
203// Defined here to reduce dependencies on break iterator
204U_CAPI int32_t U_EXPORT2
4388f060 205uhash_hashCaselessUnicodeString(const UElement key) {
73c04bcf
A
206 U_NAMESPACE_USE
207 const UnicodeString *str = (const UnicodeString*) key.pointer;
208 if (str == NULL) {
209 return 0;
210 }
211 // Inefficient; a better way would be to have a hash function in
212 // UnicodeString that does case folding on the fly.
213 UnicodeString copy(*str);
214 return copy.foldCase().hashCode();
215}
216
217// Defined here to reduce dependencies on break iterator
218U_CAPI UBool U_EXPORT2
4388f060 219uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
73c04bcf
A
220 U_NAMESPACE_USE
221 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
222 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
223 if (str1 == str2) {
224 return TRUE;
225 }
226 if (str1 == NULL || str2 == NULL) {
227 return FALSE;
228 }
229 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
230}