]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unistr.cpp
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / unistr.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4******************************************************************************
2ca993e8 5* Copyright (C) 1999-2016, International Business Machines Corporation and
4388f060 6* others. All Rights Reserved.
b75a7d8f
A
7******************************************************************************
8*
9* File unistr.cpp
10*
11* Modification History:
12*
13* Date Name Description
14* 09/25/98 stephen Creation.
15* 04/20/99 stephen Overhauled per 4/16 code review.
16* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17* 11/18/99 aliu Added handleReplaceBetween() to make inherit from
18* Replaceable.
19* 06/25/01 grhoten Removed the dependency on iostream
20******************************************************************************
21*/
22
23#include "unicode/utypes.h"
4388f060 24#include "unicode/appendable.h"
b75a7d8f 25#include "unicode/putil.h"
b75a7d8f
A
26#include "cstring.h"
27#include "cmemory.h"
28#include "unicode/ustring.h"
29#include "unicode/unistr.h"
4388f060
A
30#include "unicode/utf.h"
31#include "unicode/utf16.h"
32#include "uelement.h"
b75a7d8f 33#include "ustr_imp.h"
b75a7d8f 34#include "umutex.h"
4388f060 35#include "uassert.h"
b75a7d8f
A
36
37#if 0
38
b75a7d8f
A
39#include <iostream>
40using namespace std;
b75a7d8f
A
41
42//DEBUGGING
43void
44print(const UnicodeString& s,
45 const char *name)
46{
47 UChar c;
48 cout << name << ":|";
49 for(int i = 0; i < s.length(); ++i) {
50 c = s[i];
51 if(c>= 0x007E || c < 0x0020)
52 cout << "[0x" << hex << s[i] << "]";
53 else
54 cout << (char) s[i];
55 }
56 cout << '|' << endl;
57}
58
59void
60print(const UChar *s,
61 int32_t len,
62 const char *name)
63{
64 UChar c;
65 cout << name << ":|";
66 for(int i = 0; i < len; ++i) {
67 c = s[i];
68 if(c>= 0x007E || c < 0x0020)
69 cout << "[0x" << hex << s[i] << "]";
70 else
71 cout << (char) s[i];
72 }
73 cout << '|' << endl;
74}
75// END DEBUGGING
76#endif
77
78// Local function definitions for now
79
80// need to copy areas that may overlap
81static
82inline void
83us_arrayCopy(const UChar *src, int32_t srcStart,
84 UChar *dst, int32_t dstStart, int32_t count)
85{
86 if(count>0) {
a62d09fc 87 uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
b75a7d8f
A
88 }
89}
90
91// u_unescapeAt() callback to get a UChar from a UnicodeString
92U_CDECL_BEGIN
93static UChar U_CALLCONV
94UnicodeString_charAt(int32_t offset, void *context) {
4388f060 95 return ((icu::UnicodeString*) context)->charAt(offset);
b75a7d8f
A
96}
97U_CDECL_END
98
99U_NAMESPACE_BEGIN
100
374ca955
A
101/* The Replaceable virtual destructor can't be defined in the header
102 due to how AIX works with multiple definitions of virtual functions.
103*/
104Replaceable::~Replaceable() {}
51004dcb 105
374ca955
A
106UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
107
108UnicodeString U_EXPORT2
109operator+ (const UnicodeString &s1, const UnicodeString &s2) {
110 return
111 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
112 append(s1).
113 append(s2);
114}
b75a7d8f
A
115
116//========================================
117// Reference Counting functions, put at top of file so that optimizing compilers
118// have a chance to automatically inline.
119//========================================
120
121void
57a6839d
A
122UnicodeString::addRef() {
123 umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
124}
b75a7d8f
A
125
126int32_t
57a6839d
A
127UnicodeString::removeRef() {
128 return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
129}
b75a7d8f
A
130
131int32_t
57a6839d
A
132UnicodeString::refCount() const {
133 return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
134}
b75a7d8f
A
135
136void
137UnicodeString::releaseArray() {
b331163b 138 if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
46f4442e 139 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
b75a7d8f
A
140 }
141}
142
143
144
145//========================================
146// Constructors
147//========================================
51004dcb
A
148
149// The default constructor is inline in unistr.h.
b75a7d8f 150
b331163b
A
151UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
152 fUnion.fFields.fLengthAndFlags = 0;
b75a7d8f
A
153 if(count <= 0 || (uint32_t)c > 0x10ffff) {
154 // just allocate and do not do anything else
155 allocate(capacity);
a62d09fc
A
156 } else if(c <= 0xffff) {
157 int32_t length = count;
b75a7d8f
A
158 if(capacity < length) {
159 capacity = length;
160 }
161 if(allocate(capacity)) {
46f4442e 162 UChar *array = getArrayStart();
a62d09fc
A
163 UChar unit = (UChar)c;
164 for(int32_t i = 0; i < length; ++i) {
165 array[i] = unit;
166 }
167 setLength(length);
168 }
169 } else { // supplementary code point, write surrogate pairs
170 if(count > (INT32_MAX / 2)) {
171 // We would get more than 2G UChars.
172 allocate(capacity);
173 return;
174 }
175 int32_t length = count * 2;
176 if(capacity < length) {
177 capacity = length;
178 }
179 if(allocate(capacity)) {
180 UChar *array = getArrayStart();
181 UChar lead = U16_LEAD(c);
182 UChar trail = U16_TRAIL(c);
183 for(int32_t i = 0; i < length; i += 2) {
184 array[i] = lead;
185 array[i + 1] = trail;
b75a7d8f 186 }
a62d09fc 187 setLength(length);
b75a7d8f 188 }
b75a7d8f
A
189 }
190}
191
b331163b
A
192UnicodeString::UnicodeString(UChar ch) {
193 fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
194 fUnion.fStackFields.fBuffer[0] = ch;
b75a7d8f
A
195}
196
b331163b
A
197UnicodeString::UnicodeString(UChar32 ch) {
198 fUnion.fFields.fLengthAndFlags = kShortString;
b75a7d8f
A
199 int32_t i = 0;
200 UBool isError = FALSE;
b331163b 201 U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
4388f060
A
202 // We test isError so that the compiler does not complain that we don't.
203 // If isError then i==0 which is what we want anyway.
204 if(!isError) {
b331163b 205 setShortLength(i);
4388f060 206 }
b75a7d8f
A
207}
208
b331163b
A
209UnicodeString::UnicodeString(const UChar *text) {
210 fUnion.fFields.fLengthAndFlags = kShortString;
2ca993e8 211 doAppend(text, 0, -1);
b75a7d8f
A
212}
213
214UnicodeString::UnicodeString(const UChar *text,
b331163b
A
215 int32_t textLength) {
216 fUnion.fFields.fLengthAndFlags = kShortString;
2ca993e8 217 doAppend(text, 0, textLength);
b75a7d8f
A
218}
219
220UnicodeString::UnicodeString(UBool isTerminated,
f3c0d7a5 221 ConstChar16Ptr textPtr,
b331163b
A
222 int32_t textLength) {
223 fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
f3c0d7a5 224 const UChar *text = textPtr;
b75a7d8f
A
225 if(text == NULL) {
226 // treat as an empty string, do not alias
46f4442e 227 setToEmpty();
b75a7d8f
A
228 } else if(textLength < -1 ||
229 (textLength == -1 && !isTerminated) ||
230 (textLength >= 0 && isTerminated && text[textLength] != 0)
231 ) {
232 setToBogus();
46f4442e
A
233 } else {
234 if(textLength == -1) {
235 // text is terminated, or else it would have failed the above test
236 textLength = u_strlen(text);
237 }
f3c0d7a5
A
238 setArray(const_cast<UChar *>(text), textLength,
239 isTerminated ? textLength + 1 : textLength);
b75a7d8f
A
240 }
241}
242
243UnicodeString::UnicodeString(UChar *buff,
244 int32_t buffLength,
b331163b
A
245 int32_t buffCapacity) {
246 fUnion.fFields.fLengthAndFlags = kWritableAlias;
b75a7d8f
A
247 if(buff == NULL) {
248 // treat as an empty string, do not alias
46f4442e 249 setToEmpty();
374ca955 250 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
b75a7d8f 251 setToBogus();
46f4442e
A
252 } else {
253 if(buffLength == -1) {
254 // fLength = u_strlen(buff); but do not look beyond buffCapacity
255 const UChar *p = buff, *limit = buff + buffCapacity;
256 while(p != limit && *p != 0) {
257 ++p;
258 }
259 buffLength = (int32_t)(p - buff);
b75a7d8f 260 }
46f4442e 261 setArray(buff, buffLength, buffCapacity);
b75a7d8f
A
262 }
263}
264
b331163b
A
265UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
266 fUnion.fFields.fLengthAndFlags = kShortString;
374ca955
A
267 if(src==NULL) {
268 // treat as an empty string
269 } else {
270 if(length<0) {
73c04bcf 271 length=(int32_t)uprv_strlen(src);
b75a7d8f 272 }
374ca955
A
273 if(cloneArrayIfNeeded(length, length, FALSE)) {
274 u_charsToUChars(src, getArrayStart(), length);
46f4442e 275 setLength(length);
374ca955 276 } else {
b75a7d8f
A
277 setToBogus();
278 }
279 }
280}
281
729e4ab9
A
282#if U_CHARSET_IS_UTF8
283
b331163b
A
284UnicodeString::UnicodeString(const char *codepageData) {
285 fUnion.fFields.fLengthAndFlags = kShortString;
729e4ab9
A
286 if(codepageData != 0) {
287 setToUTF8(codepageData);
288 }
289}
290
b331163b
A
291UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
292 fUnion.fFields.fLengthAndFlags = kShortString;
729e4ab9
A
293 // if there's nothing to convert, do nothing
294 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
295 return;
296 }
297 if(dataLength == -1) {
298 dataLength = (int32_t)uprv_strlen(codepageData);
299 }
300 setToUTF8(StringPiece(codepageData, dataLength));
301}
302
303// else see unistr_cnv.cpp
304#endif
305
b331163b
A
306UnicodeString::UnicodeString(const UnicodeString& that) {
307 fUnion.fFields.fLengthAndFlags = kShortString;
b75a7d8f
A
308 copyFrom(that);
309}
310
2ca993e8
A
311#if U_HAVE_RVALUE_REFERENCES
312UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
313 fUnion.fFields.fLengthAndFlags = kShortString;
314 moveFrom(src);
315}
316#endif
317
b75a7d8f 318UnicodeString::UnicodeString(const UnicodeString& that,
b331163b
A
319 int32_t srcStart) {
320 fUnion.fFields.fLengthAndFlags = kShortString;
b75a7d8f
A
321 setTo(that, srcStart);
322}
323
324UnicodeString::UnicodeString(const UnicodeString& that,
325 int32_t srcStart,
b331163b
A
326 int32_t srcLength) {
327 fUnion.fFields.fLengthAndFlags = kShortString;
b75a7d8f
A
328 setTo(that, srcStart, srcLength);
329}
330
331// Replaceable base class clone() default implementation, does not clone
332Replaceable *
333Replaceable::clone() const {
334 return NULL;
335}
336
337// UnicodeString overrides clone() with a real implementation
338Replaceable *
339UnicodeString::clone() const {
340 return new UnicodeString(*this);
341}
342
343//========================================
344// array allocation
345//========================================
346
a62d09fc
A
347namespace {
348
349const int32_t kGrowSize = 128;
350
351// The number of bytes for one int32_t reference counter and capacity UChars
352// must fit into a 32-bit size_t (at least when on a 32-bit platform).
353// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
354// and round up to a multiple of 16 bytes.
355// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
356// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
357// but that does not seem worth it.)
358const int32_t kMaxCapacity = 0x7ffffff5;
359
360int32_t getGrowCapacity(int32_t newLength) {
361 int32_t growSize = (newLength >> 2) + kGrowSize;
362 if(growSize <= (kMaxCapacity - newLength)) {
363 return newLength + growSize;
364 } else {
365 return kMaxCapacity;
366 }
367}
368
369} // namespace
370
b75a7d8f
A
371UBool
372UnicodeString::allocate(int32_t capacity) {
373 if(capacity <= US_STACKBUF_SIZE) {
b331163b 374 fUnion.fFields.fLengthAndFlags = kShortString;
a62d09fc
A
375 return TRUE;
376 }
377 if(capacity <= kMaxCapacity) {
378 ++capacity; // for the NUL
379 // Switch to size_t which is unsigned so that we can allocate up to 4GB.
380 // Reference counter + UChars.
381 size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
382 // Round up to a multiple of 16.
383 numBytes = (numBytes + 15) & ~15;
384 int32_t *array = (int32_t *) uprv_malloc(numBytes);
385 if(array != NULL) {
b75a7d8f
A
386 // set initial refCount and point behind the refCount
387 *array++ = 1;
a62d09fc 388 numBytes -= sizeof(int32_t);
b75a7d8f
A
389
390 // have fArray point to the first UChar
46f4442e 391 fUnion.fFields.fArray = (UChar *)array;
a62d09fc 392 fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
b331163b 393 fUnion.fFields.fLengthAndFlags = kLongString;
a62d09fc 394 return TRUE;
b75a7d8f
A
395 }
396 }
a62d09fc
A
397 fUnion.fFields.fLengthAndFlags = kIsBogus;
398 fUnion.fFields.fArray = 0;
399 fUnion.fFields.fCapacity = 0;
400 return FALSE;
b75a7d8f
A
401}
402
403//========================================
404// Destructor
405//========================================
2ca993e8
A
406
407#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
408static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
409static u_atomic_int32_t beyondCount(0);
410
411U_CAPI void unistr_printLengths() {
412 int32_t i;
413 for(i = 0; i <= 59; ++i) {
414 printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
415 }
416 int32_t beyond = beyondCount;
417 for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
418 beyond += finalLengthCounts[i];
419 }
420 printf(">59, %9d\n", beyond);
421}
422#endif
423
b75a7d8f
A
424UnicodeString::~UnicodeString()
425{
2ca993e8
A
426#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
427 // Count lengths of strings at the end of their lifetime.
428 // Useful for discussion of a desirable stack buffer size.
429 // Count the contents length, not the optional NUL terminator nor further capacity.
430 // Ignore open-buffer strings and strings which alias external storage.
431 if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
432 if(hasShortLength()) {
433 umtx_atomic_inc(finalLengthCounts + getShortLength());
434 } else {
435 umtx_atomic_inc(&beyondCount);
436 }
437 }
438#endif
439
b75a7d8f
A
440 releaseArray();
441}
442
729e4ab9
A
443//========================================
444// Factory methods
445//========================================
446
f3c0d7a5 447UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
729e4ab9
A
448 UnicodeString result;
449 result.setToUTF8(utf8);
450 return result;
451}
452
453UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
454 UnicodeString result;
455 int32_t capacity;
456 // Most UTF-32 strings will be BMP-only and result in a same-length
457 // UTF-16 string. We overestimate the capacity just slightly,
458 // just in case there are a few supplementary characters.
459 if(length <= US_STACKBUF_SIZE) {
460 capacity = US_STACKBUF_SIZE;
461 } else {
462 capacity = length + (length >> 4) + 4;
463 }
464 do {
465 UChar *utf16 = result.getBuffer(capacity);
466 int32_t length16;
467 UErrorCode errorCode = U_ZERO_ERROR;
468 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
469 utf32, length,
470 0xfffd, // Substitution character.
471 NULL, // Don't care about number of substitutions.
472 &errorCode);
473 result.releaseBuffer(length16);
474 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
475 capacity = length16 + 1; // +1 for the terminating NUL.
476 continue;
477 } else if(U_FAILURE(errorCode)) {
478 result.setToBogus();
479 }
480 break;
481 } while(TRUE);
482 return result;
483}
b75a7d8f
A
484
485//========================================
486// Assignment
487//========================================
488
489UnicodeString &
490UnicodeString::operator=(const UnicodeString &src) {
491 return copyFrom(src);
492}
493
494UnicodeString &
495UnicodeString::fastCopyFrom(const UnicodeString &src) {
496 return copyFrom(src, TRUE);
497}
498
499UnicodeString &
500UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
501 // if assigning to ourselves, do nothing
b331163b 502 if(this == &src) {
b75a7d8f
A
503 return *this;
504 }
505
506 // is the right side bogus?
b331163b 507 if(src.isBogus()) {
b75a7d8f
A
508 setToBogus();
509 return *this;
510 }
511
512 // delete the current contents
513 releaseArray();
514
46f4442e 515 if(src.isEmpty()) {
b75a7d8f 516 // empty string - use the stack buffer
46f4442e 517 setToEmpty();
b75a7d8f
A
518 return *this;
519 }
520
521 // fLength>0 and not an "open" src.getBuffer(minCapacity)
b331163b
A
522 fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
523 switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
b75a7d8f
A
524 case kShortString:
525 // short string using the stack buffer, do the same
b331163b
A
526 uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
527 getShortLength() * U_SIZEOF_UCHAR);
b75a7d8f
A
528 break;
529 case kLongString:
530 // src uses a refCounted string buffer, use that buffer with refCount
b331163b 531 // src is const, use a cast - we don't actually change it
b75a7d8f
A
532 ((UnicodeString &)src).addRef();
533 // copy all fields, share the reference-counted buffer
46f4442e
A
534 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
535 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
b331163b
A
536 if(!hasShortLength()) {
537 fUnion.fFields.fLength = src.fUnion.fFields.fLength;
538 }
b75a7d8f
A
539 break;
540 case kReadonlyAlias:
541 if(fastCopy) {
542 // src is a readonly alias, do the same
543 // -> maintain the readonly alias as such
46f4442e
A
544 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
545 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
b331163b
A
546 if(!hasShortLength()) {
547 fUnion.fFields.fLength = src.fUnion.fFields.fLength;
548 }
b75a7d8f
A
549 break;
550 }
551 // else if(!fastCopy) fall through to case kWritableAlias
552 // -> allocate a new buffer and copy the contents
2ca993e8 553 U_FALLTHROUGH;
b331163b 554 case kWritableAlias: {
b75a7d8f 555 // src is a writable alias; we make a copy of that instead
b331163b 556 int32_t srcLength = src.length();
46f4442e 557 if(allocate(srcLength)) {
a62d09fc 558 u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
b331163b 559 setLength(srcLength);
b75a7d8f
A
560 break;
561 }
562 // if there is not enough memory, then fall through to setting to bogus
2ca993e8 563 U_FALLTHROUGH;
b331163b 564 }
b75a7d8f
A
565 default:
566 // if src is bogus, set ourselves to bogus
b331163b
A
567 // do not call setToBogus() here because fArray and flags are not consistent here
568 fUnion.fFields.fLengthAndFlags = kIsBogus;
46f4442e
A
569 fUnion.fFields.fArray = 0;
570 fUnion.fFields.fCapacity = 0;
b75a7d8f
A
571 break;
572 }
573
574 return *this;
575}
576
2ca993e8
A
577UnicodeString &UnicodeString::moveFrom(UnicodeString &src) U_NOEXCEPT {
578 // No explicit check for self move assignment, consistent with standard library.
579 // Self move assignment causes no crash nor leak but might make the object bogus.
580 releaseArray();
581 copyFieldsFrom(src, TRUE);
582 return *this;
583}
584
585// Same as moveFrom() except without memory management.
586void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
587 int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
588 if(lengthAndFlags & kUsingStackBuffer) {
589 // Short string using the stack buffer, copy the contents.
590 // Check for self assignment to prevent "overlap in memcpy" warnings,
591 // although it should be harmless to copy a buffer to itself exactly.
592 if(this != &src) {
593 uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
594 getShortLength() * U_SIZEOF_UCHAR);
595 }
596 } else {
597 // In all other cases, copy all fields.
598 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
599 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
600 if(!hasShortLength()) {
601 fUnion.fFields.fLength = src.fUnion.fFields.fLength;
602 }
603 if(setSrcToBogus) {
604 // Set src to bogus without releasing any memory.
605 src.fUnion.fFields.fLengthAndFlags = kIsBogus;
606 src.fUnion.fFields.fArray = NULL;
607 src.fUnion.fFields.fCapacity = 0;
608 }
609 }
610}
611
612void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
613 UnicodeString temp; // Empty short string: Known not to need releaseArray().
614 // Copy fields without resetting source values in between.
615 temp.copyFieldsFrom(*this, FALSE);
616 this->copyFieldsFrom(other, FALSE);
617 other.copyFieldsFrom(temp, FALSE);
618 // Set temp to an empty string so that other's memory is not released twice.
619 temp.fUnion.fFields.fLengthAndFlags = kShortString;
620}
621
b75a7d8f
A
622//========================================
623// Miscellaneous operations
624//========================================
625
626UnicodeString UnicodeString::unescape() const {
46f4442e 627 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
2ca993e8
A
628 if (result.isBogus()) {
629 return result;
630 }
46f4442e
A
631 const UChar *array = getBuffer();
632 int32_t len = length();
633 int32_t prev = 0;
634 for (int32_t i=0;;) {
635 if (i == len) {
636 result.append(array, prev, len - prev);
637 break;
638 }
639 if (array[i++] == 0x5C /*'\\'*/) {
640 result.append(array, prev, (i - 1) - prev);
641 UChar32 c = unescapeAt(i); // advances i
642 if (c < 0) {
b75a7d8f
A
643 result.remove(); // return empty string
644 break; // invalid escape sequence
645 }
46f4442e
A
646 result.append(c);
647 prev = i;
b75a7d8f 648 }
b75a7d8f
A
649 }
650 return result;
651}
652
653UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
654 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
655}
656
657//========================================
658// Read-only implementation
659//========================================
51004dcb
A
660UBool
661UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
662 // Requires: this & text not bogus and have same lengths.
663 // Byte-wise comparison works for equality regardless of endianness.
664 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
665}
666
b75a7d8f
A
667int8_t
668UnicodeString::doCompare( int32_t start,
669 int32_t length,
670 const UChar *srcChars,
671 int32_t srcStart,
672 int32_t srcLength) const
673{
674 // compare illegal string values
b75a7d8f
A
675 if(isBogus()) {
676 return -1;
677 }
678
679 // pin indices to legal values
680 pinIndices(start, length);
681
682 if(srcChars == NULL) {
4388f060
A
683 // treat const UChar *srcChars==NULL as an empty string
684 return length == 0 ? 0 : 1;
b75a7d8f
A
685 }
686
687 // get the correct pointer
688 const UChar *chars = getArrayStart();
689
690 chars += start;
691 srcChars += srcStart;
692
693 int32_t minLength;
694 int8_t lengthResult;
695
696 // get the srcLength if necessary
697 if(srcLength < 0) {
698 srcLength = u_strlen(srcChars + srcStart);
699 }
700
701 // are we comparing different lengths?
702 if(length != srcLength) {
703 if(length < srcLength) {
704 minLength = length;
705 lengthResult = -1;
706 } else {
707 minLength = srcLength;
708 lengthResult = 1;
709 }
710 } else {
711 minLength = length;
712 lengthResult = 0;
713 }
714
715 /*
716 * note that uprv_memcmp() returns an int but we return an int8_t;
717 * we need to take care not to truncate the result -
718 * one way to do this is to right-shift the value to
719 * move the sign bit into the lower 8 bits and making sure that this
720 * does not become 0 itself
721 */
722
723 if(minLength > 0 && chars != srcChars) {
724 int32_t result;
725
726# if U_IS_BIG_ENDIAN
727 // big-endian: byte comparison works
728 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
729 if(result != 0) {
730 return (int8_t)(result >> 15 | 1);
731 }
732# else
733 // little-endian: compare UChar units
734 do {
735 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
736 if(result != 0) {
737 return (int8_t)(result >> 15 | 1);
738 }
739 } while(--minLength > 0);
740# endif
741 }
742 return lengthResult;
743}
744
745/* String compare in code point order - doCompare() compares in code unit order. */
746int8_t
747UnicodeString::doCompareCodePointOrder(int32_t start,
748 int32_t length,
749 const UChar *srcChars,
750 int32_t srcStart,
751 int32_t srcLength) const
752{
753 // compare illegal string values
754 // treat const UChar *srcChars==NULL as an empty string
755 if(isBogus()) {
756 return -1;
757 }
758
759 // pin indices to legal values
760 pinIndices(start, length);
761
762 if(srcChars == NULL) {
763 srcStart = srcLength = 0;
764 }
765
4388f060 766 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
b75a7d8f
A
767 /* translate the 32-bit result into an 8-bit one */
768 if(diff!=0) {
769 return (int8_t)(diff >> 15 | 1);
770 } else {
771 return 0;
772 }
773}
774
b75a7d8f
A
775int32_t
776UnicodeString::getLength() const {
777 return length();
778}
779
780UChar
781UnicodeString::getCharAt(int32_t offset) const {
782 return charAt(offset);
783}
784
785UChar32
786UnicodeString::getChar32At(int32_t offset) const {
787 return char32At(offset);
788}
789
4388f060
A
790UChar32
791UnicodeString::char32At(int32_t offset) const
792{
793 int32_t len = length();
794 if((uint32_t)offset < (uint32_t)len) {
795 const UChar *array = getArrayStart();
796 UChar32 c;
797 U16_GET(array, 0, offset, len, c);
798 return c;
799 } else {
800 return kInvalidUChar;
801 }
802}
803
804int32_t
805UnicodeString::getChar32Start(int32_t offset) const {
806 if((uint32_t)offset < (uint32_t)length()) {
807 const UChar *array = getArrayStart();
808 U16_SET_CP_START(array, 0, offset);
809 return offset;
810 } else {
811 return 0;
812 }
813}
814
815int32_t
816UnicodeString::getChar32Limit(int32_t offset) const {
817 int32_t len = length();
818 if((uint32_t)offset < (uint32_t)len) {
819 const UChar *array = getArrayStart();
820 U16_SET_CP_LIMIT(array, 0, offset, len);
821 return offset;
822 } else {
823 return len;
824 }
825}
826
b75a7d8f
A
827int32_t
828UnicodeString::countChar32(int32_t start, int32_t length) const {
829 pinIndices(start, length);
830 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
46f4442e 831 return u_countChar32(getArrayStart()+start, length);
b75a7d8f
A
832}
833
834UBool
835UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
836 pinIndices(start, length);
837 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
46f4442e 838 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
b75a7d8f
A
839}
840
841int32_t
842UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
843 // pin index
46f4442e 844 int32_t len = length();
b75a7d8f
A
845 if(index<0) {
846 index=0;
46f4442e
A
847 } else if(index>len) {
848 index=len;
b75a7d8f
A
849 }
850
46f4442e 851 const UChar *array = getArrayStart();
b75a7d8f 852 if(delta>0) {
4388f060 853 U16_FWD_N(array, index, len, delta);
b75a7d8f 854 } else {
4388f060 855 U16_BACK_N(array, 0, index, -delta);
b75a7d8f
A
856 }
857
858 return index;
859}
860
861void
862UnicodeString::doExtract(int32_t start,
863 int32_t length,
864 UChar *dst,
865 int32_t dstStart) const
866{
867 // pin indices to legal values
868 pinIndices(start, length);
869
870 // do not copy anything if we alias dst itself
46f4442e
A
871 const UChar *array = getArrayStart();
872 if(array + start != dst + dstStart) {
873 us_arrayCopy(array, start, dst, dstStart, length);
b75a7d8f
A
874 }
875}
876
877int32_t
f3c0d7a5 878UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
b75a7d8f 879 UErrorCode &errorCode) const {
46f4442e 880 int32_t len = length();
b75a7d8f
A
881 if(U_SUCCESS(errorCode)) {
882 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
883 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
884 } else {
46f4442e
A
885 const UChar *array = getArrayStart();
886 if(len>0 && len<=destCapacity && array!=dest) {
a62d09fc 887 u_memcpy(dest, array, len);
b75a7d8f 888 }
46f4442e 889 return u_terminateUChars(dest, destCapacity, len, &errorCode);
b75a7d8f
A
890 }
891 }
892
46f4442e 893 return len;
b75a7d8f
A
894}
895
374ca955
A
896int32_t
897UnicodeString::extract(int32_t start,
898 int32_t length,
899 char *target,
900 int32_t targetCapacity,
901 enum EInvariant) const
902{
903 // if the arguments are illegal, then do nothing
904 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
905 return 0;
906 }
907
908 // pin the indices to legal values
909 pinIndices(start, length);
910
911 if(length <= targetCapacity) {
912 u_UCharsToChars(getArrayStart() + start, target, length);
913 }
914 UErrorCode status = U_ZERO_ERROR;
915 return u_terminateChars(target, targetCapacity, length, &status);
916}
917
729e4ab9
A
918UnicodeString
919UnicodeString::tempSubString(int32_t start, int32_t len) const {
920 pinIndices(start, len);
921 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
922 if(array==NULL) {
b331163b 923 array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string
729e4ab9
A
924 len=-2; // bogus result string
925 }
926 return UnicodeString(FALSE, array + start, len);
927}
928
929int32_t
930UnicodeString::toUTF8(int32_t start, int32_t len,
931 char *target, int32_t capacity) const {
932 pinIndices(start, len);
933 int32_t length8;
934 UErrorCode errorCode = U_ZERO_ERROR;
935 u_strToUTF8WithSub(target, capacity, &length8,
936 getBuffer() + start, len,
937 0xFFFD, // Standard substitution character.
938 NULL, // Don't care about number of substitutions.
939 &errorCode);
940 return length8;
941}
942
943#if U_CHARSET_IS_UTF8
944
945int32_t
946UnicodeString::extract(int32_t start, int32_t len,
947 char *target, uint32_t dstSize) const {
948 // if the arguments are illegal, then do nothing
949 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
950 return 0;
951 }
952 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
953}
954
955// else see unistr_cnv.cpp
956#endif
957
374ca955
A
958void
959UnicodeString::extractBetween(int32_t start,
960 int32_t limit,
961 UnicodeString& target) const {
962 pinIndex(start);
963 pinIndex(limit);
964 doExtract(start, limit - start, target);
965}
966
729e4ab9
A
967// When converting from UTF-16 to UTF-8, the result will have at most 3 times
968// as many bytes as the source has UChars.
969// The "worst cases" are writing systems like Indic, Thai and CJK with
970// 3:1 bytes:UChars.
971void
972UnicodeString::toUTF8(ByteSink &sink) const {
973 int32_t length16 = length();
974 if(length16 != 0) {
975 char stackBuffer[1024];
976 int32_t capacity = (int32_t)sizeof(stackBuffer);
977 UBool utf8IsOwned = FALSE;
978 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
979 3*length16,
980 stackBuffer, capacity,
981 &capacity);
982 int32_t length8 = 0;
983 UErrorCode errorCode = U_ZERO_ERROR;
984 u_strToUTF8WithSub(utf8, capacity, &length8,
985 getBuffer(), length16,
986 0xFFFD, // Standard substitution character.
987 NULL, // Don't care about number of substitutions.
988 &errorCode);
989 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
990 utf8 = (char *)uprv_malloc(length8);
991 if(utf8 != NULL) {
992 utf8IsOwned = TRUE;
993 errorCode = U_ZERO_ERROR;
994 u_strToUTF8WithSub(utf8, length8, &length8,
995 getBuffer(), length16,
996 0xFFFD, // Standard substitution character.
997 NULL, // Don't care about number of substitutions.
998 &errorCode);
999 } else {
1000 errorCode = U_MEMORY_ALLOCATION_ERROR;
1001 }
1002 }
1003 if(U_SUCCESS(errorCode)) {
1004 sink.Append(utf8, length8);
1005 sink.Flush();
1006 }
1007 if(utf8IsOwned) {
1008 uprv_free(utf8);
1009 }
1010 }
1011}
1012
1013int32_t
1014UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
1015 int32_t length32=0;
1016 if(U_SUCCESS(errorCode)) {
1017 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1018 u_strToUTF32WithSub(utf32, capacity, &length32,
1019 getBuffer(), length(),
1020 0xfffd, // Substitution character.
1021 NULL, // Don't care about number of substitutions.
1022 &errorCode);
1023 }
1024 return length32;
1025}
1026
b75a7d8f
A
1027int32_t
1028UnicodeString::indexOf(const UChar *srcChars,
1029 int32_t srcStart,
1030 int32_t srcLength,
1031 int32_t start,
1032 int32_t length) const
1033{
1034 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1035 return -1;
1036 }
1037
1038 // UnicodeString does not find empty substrings
1039 if(srcLength < 0 && srcChars[srcStart] == 0) {
1040 return -1;
1041 }
1042
1043 // get the indices within bounds
1044 pinIndices(start, length);
1045
1046 // find the first occurrence of the substring
46f4442e
A
1047 const UChar *array = getArrayStart();
1048 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
b75a7d8f
A
1049 if(match == NULL) {
1050 return -1;
1051 } else {
46f4442e 1052 return (int32_t)(match - array);
b75a7d8f
A
1053 }
1054}
1055
1056int32_t
1057UnicodeString::doIndexOf(UChar c,
1058 int32_t start,
1059 int32_t length) const
1060{
1061 // pin indices
1062 pinIndices(start, length);
1063
1064 // find the first occurrence of c
46f4442e
A
1065 const UChar *array = getArrayStart();
1066 const UChar *match = u_memchr(array + start, c, length);
b75a7d8f
A
1067 if(match == NULL) {
1068 return -1;
1069 } else {
46f4442e 1070 return (int32_t)(match - array);
b75a7d8f
A
1071 }
1072}
1073
1074int32_t
1075UnicodeString::doIndexOf(UChar32 c,
1076 int32_t start,
1077 int32_t length) const {
1078 // pin indices
1079 pinIndices(start, length);
1080
1081 // find the first occurrence of c
46f4442e
A
1082 const UChar *array = getArrayStart();
1083 const UChar *match = u_memchr32(array + start, c, length);
b75a7d8f
A
1084 if(match == NULL) {
1085 return -1;
1086 } else {
46f4442e 1087 return (int32_t)(match - array);
b75a7d8f
A
1088 }
1089}
1090
1091int32_t
1092UnicodeString::lastIndexOf(const UChar *srcChars,
1093 int32_t srcStart,
1094 int32_t srcLength,
1095 int32_t start,
1096 int32_t length) const
1097{
1098 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1099 return -1;
1100 }
1101
1102 // UnicodeString does not find empty substrings
1103 if(srcLength < 0 && srcChars[srcStart] == 0) {
1104 return -1;
1105 }
1106
1107 // get the indices within bounds
1108 pinIndices(start, length);
1109
1110 // find the last occurrence of the substring
46f4442e
A
1111 const UChar *array = getArrayStart();
1112 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
b75a7d8f
A
1113 if(match == NULL) {
1114 return -1;
1115 } else {
46f4442e 1116 return (int32_t)(match - array);
b75a7d8f
A
1117 }
1118}
1119
1120int32_t
1121UnicodeString::doLastIndexOf(UChar c,
1122 int32_t start,
1123 int32_t length) const
1124{
1125 if(isBogus()) {
1126 return -1;
1127 }
1128
1129 // pin indices
1130 pinIndices(start, length);
1131
1132 // find the last occurrence of c
46f4442e
A
1133 const UChar *array = getArrayStart();
1134 const UChar *match = u_memrchr(array + start, c, length);
b75a7d8f
A
1135 if(match == NULL) {
1136 return -1;
1137 } else {
46f4442e 1138 return (int32_t)(match - array);
b75a7d8f
A
1139 }
1140}
1141
1142int32_t
1143UnicodeString::doLastIndexOf(UChar32 c,
1144 int32_t start,
1145 int32_t length) const {
1146 // pin indices
1147 pinIndices(start, length);
1148
1149 // find the last occurrence of c
46f4442e
A
1150 const UChar *array = getArrayStart();
1151 const UChar *match = u_memrchr32(array + start, c, length);
b75a7d8f
A
1152 if(match == NULL) {
1153 return -1;
1154 } else {
46f4442e 1155 return (int32_t)(match - array);
b75a7d8f
A
1156 }
1157}
1158
1159//========================================
1160// Write implementation
1161//========================================
1162
1163UnicodeString&
1164UnicodeString::findAndReplace(int32_t start,
1165 int32_t length,
1166 const UnicodeString& oldText,
1167 int32_t oldStart,
1168 int32_t oldLength,
1169 const UnicodeString& newText,
1170 int32_t newStart,
1171 int32_t newLength)
1172{
1173 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1174 return *this;
1175 }
1176
1177 pinIndices(start, length);
1178 oldText.pinIndices(oldStart, oldLength);
1179 newText.pinIndices(newStart, newLength);
1180
1181 if(oldLength == 0) {
1182 return *this;
1183 }
1184
1185 while(length > 0 && length >= oldLength) {
1186 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1187 if(pos < 0) {
1188 // no more oldText's here: done
1189 break;
1190 } else {
1191 // we found oldText, replace it by newText and go beyond it
1192 replace(pos, oldLength, newText, newStart, newLength);
1193 length -= pos + oldLength - start;
1194 start = pos + newLength;
1195 }
1196 }
1197
1198 return *this;
1199}
1200
1201
1202void
1203UnicodeString::setToBogus()
1204{
1205 releaseArray();
1206
b331163b 1207 fUnion.fFields.fLengthAndFlags = kIsBogus;
46f4442e
A
1208 fUnion.fFields.fArray = 0;
1209 fUnion.fFields.fCapacity = 0;
b75a7d8f
A
1210}
1211
1212// turn a bogus string into an empty one
1213void
1214UnicodeString::unBogus() {
b331163b 1215 if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
46f4442e 1216 setToEmpty();
b75a7d8f
A
1217 }
1218}
1219
f3c0d7a5 1220const char16_t *
57a6839d
A
1221UnicodeString::getTerminatedBuffer() {
1222 if(!isWritable()) {
f3c0d7a5 1223 return nullptr;
57a6839d
A
1224 }
1225 UChar *array = getArrayStart();
1226 int32_t len = length();
1227 if(len < getCapacity()) {
b331163b 1228 if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
57a6839d
A
1229 // If len<capacity on a read-only alias, then array[len] is
1230 // either the original NUL (if constructed with (TRUE, s, length))
1231 // or one of the original string contents characters (if later truncated),
1232 // therefore we can assume that array[len] is initialized memory.
1233 if(array[len] == 0) {
1234 return array;
1235 }
b331163b 1236 } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
57a6839d
A
1237 // kRefCounted: Do not write the NUL if the buffer is shared.
1238 // That is mostly safe, except when the length of one copy was modified
1239 // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1240 // Then the NUL would be written into the middle of another copy's string.
1241
1242 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1243 // Do not test if there is a NUL already because it might be uninitialized memory.
1244 // (That would be safe, but tools like valgrind & Purify would complain.)
1245 array[len] = 0;
1246 return array;
1247 }
1248 }
a62d09fc 1249 if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
57a6839d
A
1250 array = getArrayStart();
1251 array[len] = 0;
1252 return array;
1253 } else {
f3c0d7a5 1254 return nullptr;
57a6839d
A
1255 }
1256}
1257
b75a7d8f
A
1258// setTo() analogous to the readonly-aliasing constructor with the same signature
1259UnicodeString &
1260UnicodeString::setTo(UBool isTerminated,
f3c0d7a5 1261 ConstChar16Ptr textPtr,
b75a7d8f
A
1262 int32_t textLength)
1263{
b331163b 1264 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
b75a7d8f
A
1265 // do not modify a string that has an "open" getBuffer(minCapacity)
1266 return *this;
1267 }
1268
f3c0d7a5 1269 const UChar *text = textPtr;
b75a7d8f
A
1270 if(text == NULL) {
1271 // treat as an empty string, do not alias
1272 releaseArray();
46f4442e 1273 setToEmpty();
b75a7d8f
A
1274 return *this;
1275 }
1276
1277 if( textLength < -1 ||
1278 (textLength == -1 && !isTerminated) ||
1279 (textLength >= 0 && isTerminated && text[textLength] != 0)
1280 ) {
1281 setToBogus();
1282 return *this;
1283 }
1284
1285 releaseArray();
1286
46f4442e 1287 if(textLength == -1) {
b75a7d8f 1288 // text is terminated, or else it would have failed the above test
46f4442e 1289 textLength = u_strlen(text);
b75a7d8f 1290 }
b331163b 1291 fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
46f4442e 1292 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
b75a7d8f
A
1293 return *this;
1294}
1295
1296// setTo() analogous to the writable-aliasing constructor with the same signature
1297UnicodeString &
1298UnicodeString::setTo(UChar *buffer,
1299 int32_t buffLength,
1300 int32_t buffCapacity) {
b331163b 1301 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
b75a7d8f
A
1302 // do not modify a string that has an "open" getBuffer(minCapacity)
1303 return *this;
1304 }
1305
1306 if(buffer == NULL) {
1307 // treat as an empty string, do not alias
1308 releaseArray();
46f4442e 1309 setToEmpty();
b75a7d8f
A
1310 return *this;
1311 }
1312
374ca955 1313 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
b75a7d8f
A
1314 setToBogus();
1315 return *this;
374ca955
A
1316 } else if(buffLength == -1) {
1317 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1318 const UChar *p = buffer, *limit = buffer + buffCapacity;
1319 while(p != limit && *p != 0) {
1320 ++p;
1321 }
1322 buffLength = (int32_t)(p - buffer);
b75a7d8f
A
1323 }
1324
1325 releaseArray();
1326
b331163b 1327 fUnion.fFields.fLengthAndFlags = kWritableAlias;
46f4442e 1328 setArray(buffer, buffLength, buffCapacity);
b75a7d8f
A
1329 return *this;
1330}
1331
f3c0d7a5 1332UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
729e4ab9
A
1333 unBogus();
1334 int32_t length = utf8.length();
1335 int32_t capacity;
1336 // The UTF-16 string will be at most as long as the UTF-8 string.
1337 if(length <= US_STACKBUF_SIZE) {
1338 capacity = US_STACKBUF_SIZE;
1339 } else {
1340 capacity = length + 1; // +1 for the terminating NUL.
1341 }
1342 UChar *utf16 = getBuffer(capacity);
1343 int32_t length16;
1344 UErrorCode errorCode = U_ZERO_ERROR;
1345 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1346 utf8.data(), length,
1347 0xfffd, // Substitution character.
1348 NULL, // Don't care about number of substitutions.
1349 &errorCode);
1350 releaseBuffer(length16);
1351 if(U_FAILURE(errorCode)) {
1352 setToBogus();
1353 }
1354 return *this;
1355}
1356
b75a7d8f
A
1357UnicodeString&
1358UnicodeString::setCharAt(int32_t offset,
1359 UChar c)
1360{
46f4442e
A
1361 int32_t len = length();
1362 if(cloneArrayIfNeeded() && len > 0) {
b75a7d8f
A
1363 if(offset < 0) {
1364 offset = 0;
46f4442e
A
1365 } else if(offset >= len) {
1366 offset = len - 1;
b75a7d8f
A
1367 }
1368
46f4442e 1369 getArrayStart()[offset] = c;
b75a7d8f
A
1370 }
1371 return *this;
1372}
1373
4388f060
A
1374UnicodeString&
1375UnicodeString::replace(int32_t start,
1376 int32_t _length,
1377 UChar32 srcChar) {
1378 UChar buffer[U16_MAX_LENGTH];
1379 int32_t count = 0;
1380 UBool isError = FALSE;
1381 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1382 // We test isError so that the compiler does not complain that we don't.
51004dcb
A
1383 // If isError (srcChar is not a valid code point) then count==0 which means
1384 // we remove the source segment rather than replacing it with srcChar.
1385 return doReplace(start, _length, buffer, 0, isError ? 0 : count);
4388f060
A
1386}
1387
1388UnicodeString&
1389UnicodeString::append(UChar32 srcChar) {
1390 UChar buffer[U16_MAX_LENGTH];
1391 int32_t _length = 0;
1392 UBool isError = FALSE;
1393 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1394 // We test isError so that the compiler does not complain that we don't.
2ca993e8
A
1395 // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1396 return isError ? *this : doAppend(buffer, 0, _length);
4388f060
A
1397}
1398
b75a7d8f
A
1399UnicodeString&
1400UnicodeString::doReplace( int32_t start,
1401 int32_t length,
1402 const UnicodeString& src,
1403 int32_t srcStart,
1404 int32_t srcLength)
1405{
2ca993e8
A
1406 // pin the indices to legal values
1407 src.pinIndices(srcStart, srcLength);
b75a7d8f 1408
2ca993e8
A
1409 // get the characters from src
1410 // and replace the range in ourselves with them
1411 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
b75a7d8f
A
1412}
1413
1414UnicodeString&
1415UnicodeString::doReplace(int32_t start,
1416 int32_t length,
1417 const UChar *srcChars,
1418 int32_t srcStart,
1419 int32_t srcLength)
1420{
46f4442e 1421 if(!isWritable()) {
b75a7d8f
A
1422 return *this;
1423 }
1424
729e4ab9
A
1425 int32_t oldLength = this->length();
1426
1427 // optimize (read-only alias).remove(0, start) and .remove(start, end)
b331163b 1428 if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
729e4ab9
A
1429 if(start == 0) {
1430 // remove prefix by adjusting the array pointer
1431 pinIndex(length);
1432 fUnion.fFields.fArray += length;
1433 fUnion.fFields.fCapacity -= length;
1434 setLength(oldLength - length);
1435 return *this;
1436 } else {
1437 pinIndex(start);
1438 if(length >= (oldLength - start)) {
1439 // remove suffix by reducing the length (like truncate())
1440 setLength(start);
1441 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1442 return *this;
1443 }
1444 }
1445 }
1446
2ca993e8
A
1447 if(start == oldLength) {
1448 return doAppend(srcChars, srcStart, srcLength);
1449 }
1450
b75a7d8f
A
1451 if(srcChars == 0) {
1452 srcStart = srcLength = 0;
1453 } else if(srcLength < 0) {
1454 // get the srcLength if necessary
1455 srcLength = u_strlen(srcChars + srcStart);
1456 }
1457
2ca993e8
A
1458 // pin the indices to legal values
1459 pinIndices(start, length);
b75a7d8f 1460
a62d09fc
A
1461 // Calculate the size of the string after the replace.
1462 // Avoid int32_t overflow.
1463 int32_t newLength = oldLength - length;
1464 if(srcLength > (INT32_MAX - newLength)) {
1465 setToBogus();
1466 return *this;
1467 }
1468 newLength += srcLength;
b75a7d8f 1469
2ca993e8 1470 // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
46f4442e
A
1471 // therefore we need to keep the current fArray
1472 UChar oldStackBuffer[US_STACKBUF_SIZE];
1473 UChar *oldArray;
b331163b 1474 if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
46f4442e
A
1475 // copy the stack buffer contents because it will be overwritten with
1476 // fUnion.fFields values
b331163b 1477 u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
46f4442e
A
1478 oldArray = oldStackBuffer;
1479 } else {
1480 oldArray = getArrayStart();
1481 }
b75a7d8f
A
1482
1483 // clone our array and allocate a bigger array if needed
46f4442e 1484 int32_t *bufferToDelete = 0;
a62d09fc 1485 if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
b75a7d8f
A
1486 FALSE, &bufferToDelete)
1487 ) {
1488 return *this;
1489 }
1490
1491 // now do the replace
1492
46f4442e
A
1493 UChar *newArray = getArrayStart();
1494 if(newArray != oldArray) {
b75a7d8f 1495 // if fArray changed, then we need to copy everything except what will change
46f4442e 1496 us_arrayCopy(oldArray, 0, newArray, 0, start);
b75a7d8f 1497 us_arrayCopy(oldArray, start + length,
46f4442e 1498 newArray, start + srcLength,
b75a7d8f
A
1499 oldLength - (start + length));
1500 } else if(length != srcLength) {
1501 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1502 us_arrayCopy(oldArray, start + length,
46f4442e 1503 newArray, start + srcLength,
b75a7d8f
A
1504 oldLength - (start + length));
1505 }
1506
1507 // now fill in the hole with the new string
46f4442e 1508 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
b75a7d8f 1509
4388f060 1510 setLength(newLength);
b75a7d8f
A
1511
1512 // delayed delete in case srcChars == fArray when we started, and
1513 // to keep oldArray alive for the above operations
1514 if (bufferToDelete) {
1515 uprv_free(bufferToDelete);
1516 }
1517
1518 return *this;
1519}
1520
2ca993e8
A
1521// Versions of doReplace() only for append() variants.
1522// doReplace() and doAppend() optimize for different cases.
1523
1524UnicodeString&
1525UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1526 if(srcLength == 0) {
1527 return *this;
1528 }
1529
1530 // pin the indices to legal values
1531 src.pinIndices(srcStart, srcLength);
1532 return doAppend(src.getArrayStart(), srcStart, srcLength);
1533}
1534
1535UnicodeString&
1536UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
1537 if(!isWritable() || srcLength == 0 || srcChars == NULL) {
1538 return *this;
1539 }
1540
1541 if(srcLength < 0) {
1542 // get the srcLength if necessary
1543 if((srcLength = u_strlen(srcChars + srcStart)) == 0) {
1544 return *this;
1545 }
1546 }
1547
1548 int32_t oldLength = length();
1549 int32_t newLength = oldLength + srcLength;
1550 // optimize append() onto a large-enough, owned string
1551 if((newLength <= getCapacity() && isBufferWritable()) ||
a62d09fc 1552 cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
2ca993e8
A
1553 UChar *newArray = getArrayStart();
1554 // Do not copy characters when
1555 // UChar *buffer=str.getAppendBuffer(...);
1556 // is followed by
1557 // str.append(buffer, length);
1558 // or
1559 // str.appendString(buffer, length)
1560 // or similar.
1561 if(srcChars + srcStart != newArray + oldLength) {
1562 us_arrayCopy(srcChars, srcStart, newArray, oldLength, srcLength);
1563 }
1564 setLength(newLength);
1565 }
1566 return *this;
1567}
1568
b75a7d8f
A
1569/**
1570 * Replaceable API
1571 */
1572void
1573UnicodeString::handleReplaceBetween(int32_t start,
1574 int32_t limit,
1575 const UnicodeString& text) {
1576 replaceBetween(start, limit, text);
1577}
1578
1579/**
1580 * Replaceable API
1581 */
1582void
1583UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1584 if (limit <= start) {
1585 return; // Nothing to do; avoid bogus malloc call
1586 }
1587 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
46f4442e
A
1588 // Check to make sure text is not null.
1589 if (text != NULL) {
1590 extractBetween(start, limit, text, 0);
1591 insert(dest, text, 0, limit - start);
1592 uprv_free(text);
1593 }
b75a7d8f
A
1594}
1595
1596/**
1597 * Replaceable API
1598 *
1599 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1600 * so we implement this function here.
1601 */
1602UBool Replaceable::hasMetaData() const {
1603 return TRUE;
1604}
1605
1606/**
1607 * Replaceable API
1608 */
1609UBool UnicodeString::hasMetaData() const {
1610 return FALSE;
1611}
1612
1613UnicodeString&
729e4ab9
A
1614UnicodeString::doReverse(int32_t start, int32_t length) {
1615 if(length <= 1 || !cloneArrayIfNeeded()) {
b75a7d8f
A
1616 return *this;
1617 }
1618
1619 // pin the indices to legal values
1620 pinIndices(start, length);
729e4ab9
A
1621 if(length <= 1) { // pinIndices() might have shrunk the length
1622 return *this;
1623 }
b75a7d8f
A
1624
1625 UChar *left = getArrayStart() + start;
729e4ab9 1626 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
b75a7d8f
A
1627 UChar swap;
1628 UBool hasSupplementary = FALSE;
1629
729e4ab9
A
1630 // Before the loop we know left<right because length>=2.
1631 do {
1632 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1633 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1634 *right-- = swap;
1635 } while(left < right);
1636 // Make sure to test the middle code unit of an odd-length string.
1637 // Redundant if the length is even.
1638 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
b75a7d8f
A
1639
1640 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1641 if(hasSupplementary) {
1642 UChar swap2;
1643
1644 left = getArrayStart() + start;
46f4442e 1645 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
b75a7d8f 1646 while(left < right) {
729e4ab9 1647 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
b75a7d8f
A
1648 *left++ = swap2;
1649 *left++ = swap;
1650 } else {
1651 ++left;
1652 }
1653 }
1654 }
1655
1656 return *this;
1657}
1658
1659UBool
1660UnicodeString::padLeading(int32_t targetLength,
1661 UChar padChar)
1662{
46f4442e
A
1663 int32_t oldLength = length();
1664 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
b75a7d8f
A
1665 return FALSE;
1666 } else {
1667 // move contents up by padding width
46f4442e
A
1668 UChar *array = getArrayStart();
1669 int32_t start = targetLength - oldLength;
1670 us_arrayCopy(array, 0, array, start, oldLength);
b75a7d8f
A
1671
1672 // fill in padding character
1673 while(--start >= 0) {
46f4442e 1674 array[start] = padChar;
b75a7d8f 1675 }
46f4442e 1676 setLength(targetLength);
b75a7d8f
A
1677 return TRUE;
1678 }
1679}
1680
1681UBool
1682UnicodeString::padTrailing(int32_t targetLength,
1683 UChar padChar)
1684{
46f4442e
A
1685 int32_t oldLength = length();
1686 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
b75a7d8f
A
1687 return FALSE;
1688 } else {
1689 // fill in padding character
46f4442e 1690 UChar *array = getArrayStart();
b75a7d8f 1691 int32_t length = targetLength;
46f4442e
A
1692 while(--length >= oldLength) {
1693 array[length] = padChar;
b75a7d8f 1694 }
46f4442e 1695 setLength(targetLength);
b75a7d8f
A
1696 return TRUE;
1697 }
1698}
1699
b75a7d8f
A
1700//========================================
1701// Hashing
1702//========================================
1703int32_t
1704UnicodeString::doHashCode() const
1705{
1706 /* Delegate hash computation to uhash. This makes UnicodeString
1707 * hashing consistent with UChar* hashing. */
4388f060 1708 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
b75a7d8f
A
1709 if (hashCode == kInvalidHashCode) {
1710 hashCode = kEmptyHashCode;
1711 }
1712 return hashCode;
1713}
1714
b75a7d8f
A
1715//========================================
1716// External Buffer
1717//========================================
1718
f3c0d7a5 1719char16_t *
b75a7d8f
A
1720UnicodeString::getBuffer(int32_t minCapacity) {
1721 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
b331163b
A
1722 fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
1723 setZeroLength();
46f4442e 1724 return getArrayStart();
b75a7d8f 1725 } else {
f3c0d7a5 1726 return nullptr;
b75a7d8f
A
1727 }
1728}
1729
1730void
1731UnicodeString::releaseBuffer(int32_t newLength) {
b331163b 1732 if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
b75a7d8f 1733 // set the new fLength
46f4442e 1734 int32_t capacity=getCapacity();
b75a7d8f
A
1735 if(newLength==-1) {
1736 // the new length is the string length, capped by fCapacity
46f4442e 1737 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
b75a7d8f
A
1738 while(p<limit && *p!=0) {
1739 ++p;
1740 }
46f4442e
A
1741 newLength=(int32_t)(p-array);
1742 } else if(newLength>capacity) {
1743 newLength=capacity;
b75a7d8f 1744 }
46f4442e 1745 setLength(newLength);
b331163b 1746 fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
b75a7d8f
A
1747 }
1748}
1749
1750//========================================
1751// Miscellaneous
1752//========================================
1753UBool
1754UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1755 int32_t growCapacity,
1756 UBool doCopyArray,
1757 int32_t **pBufferToDelete,
1758 UBool forceClone) {
1759 // default parameters need to be static, therefore
1760 // the defaults are -1 to have convenience defaults
1761 if(newCapacity == -1) {
46f4442e 1762 newCapacity = getCapacity();
b75a7d8f
A
1763 }
1764
1765 // while a getBuffer(minCapacity) is "open",
1766 // prevent any modifications of the string by returning FALSE here
1767 // if the string is bogus, then only an assignment or similar can revive it
46f4442e 1768 if(!isWritable()) {
b75a7d8f
A
1769 return FALSE;
1770 }
1771
1772 /*
1773 * We need to make a copy of the array if
1774 * the buffer is read-only, or
1775 * the buffer is refCounted (shared), and refCount>1, or
1776 * the buffer is too small.
1777 * Return FALSE if memory could not be allocated.
1778 */
1779 if(forceClone ||
b331163b
A
1780 fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
1781 (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
46f4442e 1782 newCapacity > getCapacity()
b75a7d8f 1783 ) {
b75a7d8f 1784 // check growCapacity for default value and use of the stack buffer
4388f060 1785 if(growCapacity < 0) {
b75a7d8f
A
1786 growCapacity = newCapacity;
1787 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1788 growCapacity = US_STACKBUF_SIZE;
1789 }
1790
46f4442e
A
1791 // save old values
1792 UChar oldStackBuffer[US_STACKBUF_SIZE];
1793 UChar *oldArray;
b331163b
A
1794 int32_t oldLength = length();
1795 int16_t flags = fUnion.fFields.fLengthAndFlags;
46f4442e
A
1796
1797 if(flags&kUsingStackBuffer) {
4388f060 1798 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
46f4442e
A
1799 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1800 // copy the stack buffer contents because it will be overwritten with
1801 // fUnion.fFields values
b331163b 1802 us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
46f4442e
A
1803 oldArray = oldStackBuffer;
1804 } else {
b331163b 1805 oldArray = NULL; // no need to copy from the stack buffer to itself
46f4442e
A
1806 }
1807 } else {
1808 oldArray = fUnion.fFields.fArray;
4388f060 1809 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
46f4442e
A
1810 }
1811
b75a7d8f
A
1812 // allocate a new array
1813 if(allocate(growCapacity) ||
729e4ab9 1814 (newCapacity < growCapacity && allocate(newCapacity))
b75a7d8f 1815 ) {
b331163b 1816 if(doCopyArray) {
b75a7d8f
A
1817 // copy the contents
1818 // do not copy more than what fits - it may be smaller than before
b331163b 1819 int32_t minLength = oldLength;
46f4442e
A
1820 newCapacity = getCapacity();
1821 if(newCapacity < minLength) {
1822 minLength = newCapacity;
b75a7d8f 1823 }
b331163b
A
1824 if(oldArray != NULL) {
1825 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1826 }
1827 setLength(minLength);
b75a7d8f 1828 } else {
b331163b 1829 setZeroLength();
b75a7d8f
A
1830 }
1831
1832 // release the old array
1833 if(flags & kRefCounted) {
1834 // the array is refCounted; decrement and release if 0
57a6839d 1835 u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
b75a7d8f
A
1836 if(umtx_atomic_dec(pRefCount) == 0) {
1837 if(pBufferToDelete == 0) {
57a6839d
A
1838 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1839 // is defined as volatile. (Volatile has useful non-standard behavior
1840 // with this compiler.)
1841 uprv_free((void *)pRefCount);
b75a7d8f
A
1842 } else {
1843 // the caller requested to delete it himself
57a6839d 1844 *pBufferToDelete = (int32_t *)pRefCount;
b75a7d8f
A
1845 }
1846 }
1847 }
1848 } else {
1849 // not enough memory for growCapacity and not even for the smaller newCapacity
1850 // reset the old values for setToBogus() to release the array
46f4442e
A
1851 if(!(flags&kUsingStackBuffer)) {
1852 fUnion.fFields.fArray = oldArray;
1853 }
b331163b 1854 fUnion.fFields.fLengthAndFlags = flags;
b75a7d8f
A
1855 setToBogus();
1856 return FALSE;
1857 }
1858 }
1859 return TRUE;
1860}
4388f060
A
1861
1862// UnicodeStringAppendable ------------------------------------------------- ***
1863
1864UnicodeStringAppendable::~UnicodeStringAppendable() {}
1865
1866UBool
1867UnicodeStringAppendable::appendCodeUnit(UChar c) {
2ca993e8 1868 return str.doAppend(&c, 0, 1).isWritable();
4388f060
A
1869}
1870
1871UBool
1872UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1873 UChar buffer[U16_MAX_LENGTH];
1874 int32_t cLength = 0;
1875 UBool isError = FALSE;
1876 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
2ca993e8 1877 return !isError && str.doAppend(buffer, 0, cLength).isWritable();
4388f060
A
1878}
1879
1880UBool
1881UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
2ca993e8 1882 return str.doAppend(s, 0, length).isWritable();
4388f060
A
1883}
1884
1885UBool
1886UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1887 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1888}
1889
1890UChar *
1891UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1892 int32_t desiredCapacityHint,
1893 UChar *scratch, int32_t scratchCapacity,
1894 int32_t *resultCapacity) {
1895 if(minCapacity < 1 || scratchCapacity < minCapacity) {
1896 *resultCapacity = 0;
1897 return NULL;
1898 }
1899 int32_t oldLength = str.length();
a62d09fc
A
1900 if(minCapacity <= (kMaxCapacity - oldLength) &&
1901 desiredCapacityHint <= (kMaxCapacity - oldLength) &&
1902 str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
4388f060
A
1903 *resultCapacity = str.getCapacity() - oldLength;
1904 return str.getArrayStart() + oldLength;
1905 }
1906 *resultCapacity = scratchCapacity;
1907 return scratch;
1908}
1909
b75a7d8f 1910U_NAMESPACE_END
73c04bcf 1911
4388f060
A
1912U_NAMESPACE_USE
1913
1914U_CAPI int32_t U_EXPORT2
1915uhash_hashUnicodeString(const UElement key) {
1916 const UnicodeString *str = (const UnicodeString*) key.pointer;
1917 return (str == NULL) ? 0 : str->hashCode();
1918}
1919
1920// Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1921// does not depend on hashtable code.
1922U_CAPI UBool U_EXPORT2
1923uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1924 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1925 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1926 if (str1 == str2) {
1927 return TRUE;
1928 }
1929 if (str1 == NULL || str2 == NULL) {
1930 return FALSE;
1931 }
1932 return *str1 == *str2;
1933}
1934
73c04bcf
A
1935#ifdef U_STATIC_IMPLEMENTATION
1936/*
1937This should never be called. It is defined here to make sure that the
1938virtual vector deleting destructor is defined within unistr.cpp.
1939The vector deleting destructor is already a part of UObject,
1940but defining it here makes sure that it is included with this object file.
1941This makes sure that static library dependencies are kept to a minimum.
1942*/
1943static void uprv_UnicodeStringDummy(void) {
73c04bcf
A
1944 delete [] (new UnicodeString[2]);
1945}
1946#endif