1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1999-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: unistr_case.cpp
12 * tab size: 8 (not used)
15 * created on: 2004aug19
16 * created by: Markus W. Scherer
18 * Case-mapping functions moved here from unistr.cpp
21 #include "unicode/utypes.h"
22 #include "unicode/casemap.h"
23 #include "unicode/edits.h"
24 #include "unicode/putil.h"
27 #include "unicode/ustring.h"
28 #include "unicode/unistr.h"
29 #include "unicode/uchar.h"
31 #include "ucasemap_imp.h"
36 //========================================
37 // Read-only implementation
38 //========================================
41 UnicodeString::doCaseCompare(int32_t start
,
43 const UChar
*srcChars
,
46 uint32_t options
) const
48 // compare illegal string values
49 // treat const UChar *srcChars==NULL as an empty string
54 // pin indices to legal values
55 pinIndices(start
, length
);
57 if(srcChars
== NULL
) {
58 srcStart
= srcLength
= 0;
61 // get the correct pointer
62 const UChar
*chars
= getArrayStart();
69 if(chars
!= srcChars
) {
70 UErrorCode errorCode
=U_ZERO_ERROR
;
71 int32_t result
=u_strcmpFold(chars
, length
, srcChars
, srcLength
,
72 options
|U_COMPARE_IGNORE_CASE
, &errorCode
);
74 return (int8_t)(result
>> 24 | 1);
77 // get the srcLength if necessary
79 srcLength
= u_strlen(srcChars
+ srcStart
);
81 if(length
!= srcLength
) {
82 return (int8_t)((length
- srcLength
) >> 24 | 1);
88 //========================================
89 // Write implementation
90 //========================================
93 UnicodeString::caseMap(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_PARAM
94 UStringCaseMapper
*stringCaseMapper
) {
95 if(isEmpty() || !isWritable()) {
100 UChar oldBuffer
[2 * US_STACKBUF_SIZE
];
102 int32_t oldLength
= length();
104 UBool writable
= isBufferWritable();
105 UErrorCode errorCode
= U_ZERO_ERROR
;
107 // Try to avoid heap-allocating a new character array for this string.
108 if (writable
? oldLength
<= UPRV_LENGTHOF(oldBuffer
) : oldLength
< US_STACKBUF_SIZE
) {
109 // Short string: Copy the contents into a temporary buffer and
110 // case-map back into the current array, or into the stack buffer.
111 UChar
*buffer
= getArrayStart();
113 oldArray
= oldBuffer
;
114 u_memcpy(oldBuffer
, buffer
, oldLength
);
116 capacity
= getCapacity();
118 // Switch from the read-only alias or shared heap buffer to the stack buffer.
119 if (!cloneArrayIfNeeded(US_STACKBUF_SIZE
, US_STACKBUF_SIZE
, /* doCopyArray= */ FALSE
)) {
122 U_ASSERT(fUnion
.fFields
.fLengthAndFlags
& kUsingStackBuffer
);
123 buffer
= fUnion
.fStackFields
.fBuffer
;
124 capacity
= US_STACKBUF_SIZE
;
126 newLength
= stringCaseMapper(caseLocale
, options
, UCASEMAP_BREAK_ITERATOR
128 oldArray
, oldLength
, NULL
, errorCode
);
129 if (U_SUCCESS(errorCode
)) {
130 setLength(newLength
);
132 } else if (errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
133 // common overflow handling below
139 // Longer string or read-only buffer:
140 // Collect only changes and then apply them to this string.
141 // Case mapping often changes only small parts of a string,
142 // and often does not change its length.
143 oldArray
= getArrayStart();
145 UChar replacementChars
[200];
146 stringCaseMapper(caseLocale
, options
| UCASEMAP_OMIT_UNCHANGED_TEXT
, UCASEMAP_BREAK_ITERATOR
147 replacementChars
, UPRV_LENGTHOF(replacementChars
),
148 oldArray
, oldLength
, &edits
, errorCode
);
149 if (U_SUCCESS(errorCode
)) {
150 // Grow the buffer at most once, not for multiple doReplace() calls.
151 newLength
= oldLength
+ edits
.lengthDelta();
152 if (newLength
> oldLength
&& !cloneArrayIfNeeded(newLength
, newLength
)) {
155 for (Edits::Iterator ei
= edits
.getCoarseChangesIterator(); ei
.next(errorCode
);) {
156 doReplace(ei
.destinationIndex(), ei
.oldLength(),
157 replacementChars
, ei
.replacementIndex(), ei
.newLength());
159 if (U_FAILURE(errorCode
)) {
163 } else if (errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
164 // common overflow handling below
165 newLength
= oldLength
+ edits
.lengthDelta();
172 // Handle buffer overflow, newLength is known.
173 // We need to allocate a new buffer for the internal string case mapping function.
174 // This is very similar to how doReplace() keeps the old array pointer
175 // and deletes the old array itself after it is done.
176 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
177 int32_t *bufferToDelete
= 0;
178 if (!cloneArrayIfNeeded(newLength
, newLength
, FALSE
, &bufferToDelete
, TRUE
)) {
181 errorCode
= U_ZERO_ERROR
;
182 newLength
= stringCaseMapper(caseLocale
, options
, UCASEMAP_BREAK_ITERATOR
183 getArrayStart(), getCapacity(),
184 oldArray
, oldLength
, NULL
, errorCode
);
185 if (bufferToDelete
) {
186 uprv_free(bufferToDelete
);
188 if (U_SUCCESS(errorCode
)) {
189 setLength(newLength
);
197 UnicodeString::foldCase(uint32_t options
) {
198 return caseMap(UCASE_LOC_ROOT
, options
, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold
);
203 // Defined here to reduce dependencies on break iterator
204 U_CAPI
int32_t U_EXPORT2
205 uhash_hashCaselessUnicodeString(const UElement key
) {
207 const UnicodeString
*str
= (const UnicodeString
*) key
.pointer
;
211 // Inefficient; a better way would be to have a hash function in
212 // UnicodeString that does case folding on the fly.
213 UnicodeString
copy(*str
);
214 return copy
.foldCase().hashCode();
217 // Defined here to reduce dependencies on break iterator
218 U_CAPI UBool U_EXPORT2
219 uhash_compareCaselessUnicodeString(const UElement key1
, const UElement key2
) {
221 const UnicodeString
*str1
= (const UnicodeString
*) key1
.pointer
;
222 const UnicodeString
*str2
= (const UnicodeString
*) key2
.pointer
;
226 if (str1
== NULL
|| str2
== NULL
) {
229 return str1
->caseCompare(*str2
, U_FOLD_CASE_DEFAULT
) == 0;