1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1999-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: unistr_case.cpp
12 * tab size: 8 (not used)
15 * created on: 2004aug19
16 * created by: Markus W. Scherer
18 * Case-mapping functions moved here from unistr.cpp
21 #include "unicode/utypes.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/casemap.h"
24 #include "unicode/edits.h"
25 #include "unicode/putil.h"
28 #include "unicode/ustring.h"
29 #include "unicode/unistr.h"
30 #include "unicode/uchar.h"
32 #include "ucasemap_imp.h"
37 //========================================
38 // Read-only implementation
39 //========================================
42 UnicodeString::doCaseCompare(int32_t start
,
44 const UChar
*srcChars
,
47 uint32_t options
) const
49 // compare illegal string values
50 // treat const UChar *srcChars==NULL as an empty string
55 // pin indices to legal values
56 pinIndices(start
, length
);
58 if(srcChars
== NULL
) {
59 srcStart
= srcLength
= 0;
62 // get the correct pointer
63 const UChar
*chars
= getArrayStart();
70 if(chars
!= srcChars
) {
71 UErrorCode errorCode
=U_ZERO_ERROR
;
72 int32_t result
=u_strcmpFold(chars
, length
, srcChars
, srcLength
,
73 options
|U_COMPARE_IGNORE_CASE
, &errorCode
);
75 return (int8_t)(result
>> 24 | 1);
78 // get the srcLength if necessary
80 srcLength
= u_strlen(srcChars
+ srcStart
);
82 if(length
!= srcLength
) {
83 return (int8_t)((length
- srcLength
) >> 24 | 1);
89 //========================================
90 // Write implementation
91 //========================================
94 UnicodeString::caseMap(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_PARAM
95 UStringCaseMapper
*stringCaseMapper
) {
96 if(isEmpty() || !isWritable()) {
101 UChar oldBuffer
[2 * US_STACKBUF_SIZE
];
103 int32_t oldLength
= length();
105 UBool writable
= isBufferWritable();
106 UErrorCode errorCode
= U_ZERO_ERROR
;
108 #if !UCONFIG_NO_BREAK_ITERATION
109 // Read-only alias to the original string contents for the titlecasing BreakIterator.
110 // We cannot set the iterator simply to *this because *this is being modified.
111 UnicodeString oldString
;
114 // Try to avoid heap-allocating a new character array for this string.
115 if (writable
? oldLength
<= UPRV_LENGTHOF(oldBuffer
) : oldLength
< US_STACKBUF_SIZE
) {
116 // Short string: Copy the contents into a temporary buffer and
117 // case-map back into the current array, or into the stack buffer.
118 UChar
*buffer
= getArrayStart();
120 oldArray
= oldBuffer
;
121 u_memcpy(oldBuffer
, buffer
, oldLength
);
123 capacity
= getCapacity();
125 // Switch from the read-only alias or shared heap buffer to the stack buffer.
126 if (!cloneArrayIfNeeded(US_STACKBUF_SIZE
, US_STACKBUF_SIZE
, /* doCopyArray= */ FALSE
)) {
129 U_ASSERT(fUnion
.fFields
.fLengthAndFlags
& kUsingStackBuffer
);
130 buffer
= fUnion
.fStackFields
.fBuffer
;
131 capacity
= US_STACKBUF_SIZE
;
133 #if !UCONFIG_NO_BREAK_ITERATION
134 if (iter
!= nullptr) {
135 oldString
.setTo(FALSE
, oldArray
, oldLength
);
136 iter
->setText(oldString
);
139 newLength
= stringCaseMapper(caseLocale
, options
, UCASEMAP_BREAK_ITERATOR
141 oldArray
, oldLength
, NULL
, errorCode
);
142 if (U_SUCCESS(errorCode
)) {
143 setLength(newLength
);
145 } else if (errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
146 // common overflow handling below
152 // Longer string or read-only buffer:
153 // Collect only changes and then apply them to this string.
154 // Case mapping often changes only small parts of a string,
155 // and often does not change its length.
156 oldArray
= getArrayStart();
158 UChar replacementChars
[200];
159 #if !UCONFIG_NO_BREAK_ITERATION
160 if (iter
!= nullptr) {
161 oldString
.setTo(FALSE
, oldArray
, oldLength
);
162 iter
->setText(oldString
);
165 stringCaseMapper(caseLocale
, options
| U_OMIT_UNCHANGED_TEXT
, UCASEMAP_BREAK_ITERATOR
166 replacementChars
, UPRV_LENGTHOF(replacementChars
),
167 oldArray
, oldLength
, &edits
, errorCode
);
168 if (U_SUCCESS(errorCode
)) {
169 // Grow the buffer at most once, not for multiple doReplace() calls.
170 newLength
= oldLength
+ edits
.lengthDelta();
171 if (newLength
> oldLength
&& !cloneArrayIfNeeded(newLength
, newLength
)) {
174 for (Edits::Iterator ei
= edits
.getCoarseChangesIterator(); ei
.next(errorCode
);) {
175 doReplace(ei
.destinationIndex(), ei
.oldLength(),
176 replacementChars
, ei
.replacementIndex(), ei
.newLength());
178 if (U_FAILURE(errorCode
)) {
182 } else if (errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
183 // common overflow handling below
184 newLength
= oldLength
+ edits
.lengthDelta();
191 // Handle buffer overflow, newLength is known.
192 // We need to allocate a new buffer for the internal string case mapping function.
193 // This is very similar to how doReplace() keeps the old array pointer
194 // and deletes the old array itself after it is done.
195 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
196 int32_t *bufferToDelete
= 0;
197 if (!cloneArrayIfNeeded(newLength
, newLength
, FALSE
, &bufferToDelete
, TRUE
)) {
200 errorCode
= U_ZERO_ERROR
;
201 // No need to iter->setText() again: The case mapper restarts via iter->first().
202 newLength
= stringCaseMapper(caseLocale
, options
, UCASEMAP_BREAK_ITERATOR
203 getArrayStart(), getCapacity(),
204 oldArray
, oldLength
, NULL
, errorCode
);
205 if (bufferToDelete
) {
206 uprv_free(bufferToDelete
);
208 if (U_SUCCESS(errorCode
)) {
209 setLength(newLength
);
217 UnicodeString::foldCase(uint32_t options
) {
218 return caseMap(UCASE_LOC_ROOT
, options
, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold
);
223 // Defined here to reduce dependencies on break iterator
224 U_CAPI
int32_t U_EXPORT2
225 uhash_hashCaselessUnicodeString(const UElement key
) {
227 const UnicodeString
*str
= (const UnicodeString
*) key
.pointer
;
231 // Inefficient; a better way would be to have a hash function in
232 // UnicodeString that does case folding on the fly.
233 UnicodeString
copy(*str
);
234 return copy
.foldCase().hashCode();
237 // Defined here to reduce dependencies on break iterator
238 U_CAPI UBool U_EXPORT2
239 uhash_compareCaselessUnicodeString(const UElement key1
, const UElement key2
) {
241 const UnicodeString
*str1
= (const UnicodeString
*) key1
.pointer
;
242 const UnicodeString
*str2
= (const UnicodeString
*) key2
.pointer
;
246 if (str1
== NULL
|| str2
== NULL
) {
249 return str1
->caseCompare(*str2
, U_FOLD_CASE_DEFAULT
) == 0;