1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
5 * Copyright (C) 1999-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 09/25/98 stephen Creation.
15 * 04/20/99 stephen Overhauled per 4/16 code review.
16 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
19 * 06/25/01 grhoten Removed the dependency on iostream
20 ******************************************************************************
23 #include "unicode/utypes.h"
24 #include "unicode/appendable.h"
25 #include "unicode/putil.h"
28 #include "unicode/ustring.h"
29 #include "unicode/unistr.h"
30 #include "unicode/utf.h"
31 #include "unicode/utf16.h"
44 print(const UnicodeString
& s
,
49 for(int i
= 0; i
< s
.length(); ++i
) {
51 if(c
>= 0x007E || c
< 0x0020)
52 cout
<< "[0x" << hex
<< s
[i
] << "]";
66 for(int i
= 0; i
< len
; ++i
) {
68 if(c
>= 0x007E || c
< 0x0020)
69 cout
<< "[0x" << hex
<< s
[i
] << "]";
78 // Local function definitions for now
80 // need to copy areas that may overlap
83 us_arrayCopy(const UChar
*src
, int32_t srcStart
,
84 UChar
*dst
, int32_t dstStart
, int32_t count
)
87 uprv_memmove(dst
+dstStart
, src
+srcStart
, (size_t)count
*sizeof(*src
));
91 // u_unescapeAt() callback to get a UChar from a UnicodeString
93 static UChar U_CALLCONV
94 UnicodeString_charAt(int32_t offset
, void *context
) {
95 return ((icu::UnicodeString
*) context
)->charAt(offset
);
101 /* The Replaceable virtual destructor can't be defined in the header
102 due to how AIX works with multiple definitions of virtual functions.
104 Replaceable::~Replaceable() {}
106 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString
)
108 UnicodeString U_EXPORT2
109 operator+ (const UnicodeString
&s1
, const UnicodeString
&s2
) {
111 UnicodeString(s1
.length()+s2
.length()+1, (UChar32
)0, 0).
116 //========================================
117 // Reference Counting functions, put at top of file so that optimizing compilers
118 // have a chance to automatically inline.
119 //========================================
122 UnicodeString::addRef() {
123 umtx_atomic_inc((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1);
127 UnicodeString::removeRef() {
128 return umtx_atomic_dec((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1);
132 UnicodeString::refCount() const {
133 return umtx_loadAcquire(*((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1));
137 UnicodeString::releaseArray() {
138 if((fUnion
.fFields
.fLengthAndFlags
& kRefCounted
) && removeRef() == 0) {
139 uprv_free((int32_t *)fUnion
.fFields
.fArray
- 1);
145 //========================================
147 //========================================
149 // The default constructor is inline in unistr.h.
151 UnicodeString::UnicodeString(int32_t capacity
, UChar32 c
, int32_t count
) {
152 fUnion
.fFields
.fLengthAndFlags
= 0;
153 if(count
<= 0 || (uint32_t)c
> 0x10ffff) {
154 // just allocate and do not do anything else
156 } else if(c
<= 0xffff) {
157 int32_t length
= count
;
158 if(capacity
< length
) {
161 if(allocate(capacity
)) {
162 UChar
*array
= getArrayStart();
163 UChar unit
= (UChar
)c
;
164 for(int32_t i
= 0; i
< length
; ++i
) {
169 } else { // supplementary code point, write surrogate pairs
170 if(count
> (INT32_MAX
/ 2)) {
171 // We would get more than 2G UChars.
175 int32_t length
= count
* 2;
176 if(capacity
< length
) {
179 if(allocate(capacity
)) {
180 UChar
*array
= getArrayStart();
181 UChar lead
= U16_LEAD(c
);
182 UChar trail
= U16_TRAIL(c
);
183 for(int32_t i
= 0; i
< length
; i
+= 2) {
185 array
[i
+ 1] = trail
;
192 UnicodeString::UnicodeString(UChar ch
) {
193 fUnion
.fFields
.fLengthAndFlags
= kLength1
| kShortString
;
194 fUnion
.fStackFields
.fBuffer
[0] = ch
;
197 UnicodeString::UnicodeString(UChar32 ch
) {
198 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
200 UBool isError
= FALSE
;
201 U16_APPEND(fUnion
.fStackFields
.fBuffer
, i
, US_STACKBUF_SIZE
, ch
, isError
);
202 // We test isError so that the compiler does not complain that we don't.
203 // If isError then i==0 which is what we want anyway.
209 UnicodeString::UnicodeString(const UChar
*text
) {
210 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
211 doAppend(text
, 0, -1);
214 UnicodeString::UnicodeString(const UChar
*text
,
215 int32_t textLength
) {
216 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
217 doAppend(text
, 0, textLength
);
220 UnicodeString::UnicodeString(UBool isTerminated
,
221 ConstChar16Ptr textPtr
,
222 int32_t textLength
) {
223 fUnion
.fFields
.fLengthAndFlags
= kReadonlyAlias
;
224 const UChar
*text
= textPtr
;
226 // treat as an empty string, do not alias
228 } else if(textLength
< -1 ||
229 (textLength
== -1 && !isTerminated
) ||
230 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
234 if(textLength
== -1) {
235 // text is terminated, or else it would have failed the above test
236 textLength
= u_strlen(text
);
238 setArray(const_cast<UChar
*>(text
), textLength
,
239 isTerminated
? textLength
+ 1 : textLength
);
243 UnicodeString::UnicodeString(UChar
*buff
,
245 int32_t buffCapacity
) {
246 fUnion
.fFields
.fLengthAndFlags
= kWritableAlias
;
248 // treat as an empty string, do not alias
250 } else if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
253 if(buffLength
== -1) {
254 // fLength = u_strlen(buff); but do not look beyond buffCapacity
255 const UChar
*p
= buff
, *limit
= buff
+ buffCapacity
;
256 while(p
!= limit
&& *p
!= 0) {
259 buffLength
= (int32_t)(p
- buff
);
261 setArray(buff
, buffLength
, buffCapacity
);
265 UnicodeString::UnicodeString(const char *src
, int32_t length
, EInvariant
) {
266 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
268 // treat as an empty string
271 length
=(int32_t)uprv_strlen(src
);
273 if(cloneArrayIfNeeded(length
, length
, FALSE
)) {
274 u_charsToUChars(src
, getArrayStart(), length
);
282 #if U_CHARSET_IS_UTF8
284 UnicodeString::UnicodeString(const char *codepageData
) {
285 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
286 if(codepageData
!= 0) {
287 setToUTF8(codepageData
);
291 UnicodeString::UnicodeString(const char *codepageData
, int32_t dataLength
) {
292 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
293 // if there's nothing to convert, do nothing
294 if(codepageData
== 0 || dataLength
== 0 || dataLength
< -1) {
297 if(dataLength
== -1) {
298 dataLength
= (int32_t)uprv_strlen(codepageData
);
300 setToUTF8(StringPiece(codepageData
, dataLength
));
303 // else see unistr_cnv.cpp
306 UnicodeString::UnicodeString(const UnicodeString
& that
) {
307 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
311 #if U_HAVE_RVALUE_REFERENCES
312 UnicodeString::UnicodeString(UnicodeString
&&src
) U_NOEXCEPT
{
313 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
318 UnicodeString::UnicodeString(const UnicodeString
& that
,
320 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
321 setTo(that
, srcStart
);
324 UnicodeString::UnicodeString(const UnicodeString
& that
,
327 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
328 setTo(that
, srcStart
, srcLength
);
331 // Replaceable base class clone() default implementation, does not clone
333 Replaceable::clone() const {
337 // UnicodeString overrides clone() with a real implementation
339 UnicodeString::clone() const {
340 return new UnicodeString(*this);
343 //========================================
345 //========================================
349 const int32_t kGrowSize
= 128;
351 // The number of bytes for one int32_t reference counter and capacity UChars
352 // must fit into a 32-bit size_t (at least when on a 32-bit platform).
353 // We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
354 // and round up to a multiple of 16 bytes.
355 // This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
356 // (With more complicated checks we could go up to 0x7ffffffd without rounding up,
357 // but that does not seem worth it.)
358 const int32_t kMaxCapacity
= 0x7ffffff5;
360 int32_t getGrowCapacity(int32_t newLength
) {
361 int32_t growSize
= (newLength
>> 2) + kGrowSize
;
362 if(growSize
<= (kMaxCapacity
- newLength
)) {
363 return newLength
+ growSize
;
372 UnicodeString::allocate(int32_t capacity
) {
373 if(capacity
<= US_STACKBUF_SIZE
) {
374 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
377 if(capacity
<= kMaxCapacity
) {
378 ++capacity
; // for the NUL
379 // Switch to size_t which is unsigned so that we can allocate up to 4GB.
380 // Reference counter + UChars.
381 size_t numBytes
= sizeof(int32_t) + (size_t)capacity
* U_SIZEOF_UCHAR
;
382 // Round up to a multiple of 16.
383 numBytes
= (numBytes
+ 15) & ~15;
384 int32_t *array
= (int32_t *) uprv_malloc(numBytes
);
386 // set initial refCount and point behind the refCount
388 numBytes
-= sizeof(int32_t);
390 // have fArray point to the first UChar
391 fUnion
.fFields
.fArray
= (UChar
*)array
;
392 fUnion
.fFields
.fCapacity
= (int32_t)(numBytes
/ U_SIZEOF_UCHAR
);
393 fUnion
.fFields
.fLengthAndFlags
= kLongString
;
397 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
398 fUnion
.fFields
.fArray
= 0;
399 fUnion
.fFields
.fCapacity
= 0;
403 //========================================
405 //========================================
407 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
408 static u_atomic_int32_t finalLengthCounts
[0x400]; // UnicodeString::kMaxShortLength+1
409 static u_atomic_int32_t
beyondCount(0);
411 U_CAPI
void unistr_printLengths() {
413 for(i
= 0; i
<= 59; ++i
) {
414 printf("%2d, %9d\n", i
, (int32_t)finalLengthCounts
[i
]);
416 int32_t beyond
= beyondCount
;
417 for(; i
< UPRV_LENGTHOF(finalLengthCounts
); ++i
) {
418 beyond
+= finalLengthCounts
[i
];
420 printf(">59, %9d\n", beyond
);
424 UnicodeString::~UnicodeString()
426 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
427 // Count lengths of strings at the end of their lifetime.
428 // Useful for discussion of a desirable stack buffer size.
429 // Count the contents length, not the optional NUL terminator nor further capacity.
430 // Ignore open-buffer strings and strings which alias external storage.
431 if((fUnion
.fFields
.fLengthAndFlags
&(kOpenGetBuffer
|kReadonlyAlias
|kWritableAlias
)) == 0) {
432 if(hasShortLength()) {
433 umtx_atomic_inc(finalLengthCounts
+ getShortLength());
435 umtx_atomic_inc(&beyondCount
);
443 //========================================
445 //========================================
447 UnicodeString
UnicodeString::fromUTF8(StringPiece utf8
) {
448 UnicodeString result
;
449 result
.setToUTF8(utf8
);
453 UnicodeString
UnicodeString::fromUTF32(const UChar32
*utf32
, int32_t length
) {
454 UnicodeString result
;
456 // Most UTF-32 strings will be BMP-only and result in a same-length
457 // UTF-16 string. We overestimate the capacity just slightly,
458 // just in case there are a few supplementary characters.
459 if(length
<= US_STACKBUF_SIZE
) {
460 capacity
= US_STACKBUF_SIZE
;
462 capacity
= length
+ (length
>> 4) + 4;
465 UChar
*utf16
= result
.getBuffer(capacity
);
467 UErrorCode errorCode
= U_ZERO_ERROR
;
468 u_strFromUTF32WithSub(utf16
, result
.getCapacity(), &length16
,
470 0xfffd, // Substitution character.
471 NULL
, // Don't care about number of substitutions.
473 result
.releaseBuffer(length16
);
474 if(errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
475 capacity
= length16
+ 1; // +1 for the terminating NUL.
477 } else if(U_FAILURE(errorCode
)) {
485 //========================================
487 //========================================
490 UnicodeString::operator=(const UnicodeString
&src
) {
491 return copyFrom(src
);
495 UnicodeString::fastCopyFrom(const UnicodeString
&src
) {
496 return copyFrom(src
, TRUE
);
500 UnicodeString::copyFrom(const UnicodeString
&src
, UBool fastCopy
) {
501 // if assigning to ourselves, do nothing
506 // is the right side bogus?
512 // delete the current contents
516 // empty string - use the stack buffer
521 // fLength>0 and not an "open" src.getBuffer(minCapacity)
522 fUnion
.fFields
.fLengthAndFlags
= src
.fUnion
.fFields
.fLengthAndFlags
;
523 switch(src
.fUnion
.fFields
.fLengthAndFlags
& kAllStorageFlags
) {
525 // short string using the stack buffer, do the same
526 uprv_memcpy(fUnion
.fStackFields
.fBuffer
, src
.fUnion
.fStackFields
.fBuffer
,
527 getShortLength() * U_SIZEOF_UCHAR
);
530 // src uses a refCounted string buffer, use that buffer with refCount
531 // src is const, use a cast - we don't actually change it
532 ((UnicodeString
&)src
).addRef();
533 // copy all fields, share the reference-counted buffer
534 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
535 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
536 if(!hasShortLength()) {
537 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
542 // src is a readonly alias, do the same
543 // -> maintain the readonly alias as such
544 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
545 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
546 if(!hasShortLength()) {
547 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
551 // else if(!fastCopy) fall through to case kWritableAlias
552 // -> allocate a new buffer and copy the contents
554 case kWritableAlias
: {
555 // src is a writable alias; we make a copy of that instead
556 int32_t srcLength
= src
.length();
557 if(allocate(srcLength
)) {
558 u_memcpy(getArrayStart(), src
.getArrayStart(), srcLength
);
559 setLength(srcLength
);
562 // if there is not enough memory, then fall through to setting to bogus
566 // if src is bogus, set ourselves to bogus
567 // do not call setToBogus() here because fArray and flags are not consistent here
568 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
569 fUnion
.fFields
.fArray
= 0;
570 fUnion
.fFields
.fCapacity
= 0;
577 UnicodeString
&UnicodeString::moveFrom(UnicodeString
&src
) U_NOEXCEPT
{
578 // No explicit check for self move assignment, consistent with standard library.
579 // Self move assignment causes no crash nor leak but might make the object bogus.
581 copyFieldsFrom(src
, TRUE
);
585 // Same as moveFrom() except without memory management.
586 void UnicodeString::copyFieldsFrom(UnicodeString
&src
, UBool setSrcToBogus
) U_NOEXCEPT
{
587 int16_t lengthAndFlags
= fUnion
.fFields
.fLengthAndFlags
= src
.fUnion
.fFields
.fLengthAndFlags
;
588 if(lengthAndFlags
& kUsingStackBuffer
) {
589 // Short string using the stack buffer, copy the contents.
590 // Check for self assignment to prevent "overlap in memcpy" warnings,
591 // although it should be harmless to copy a buffer to itself exactly.
593 uprv_memcpy(fUnion
.fStackFields
.fBuffer
, src
.fUnion
.fStackFields
.fBuffer
,
594 getShortLength() * U_SIZEOF_UCHAR
);
597 // In all other cases, copy all fields.
598 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
599 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
600 if(!hasShortLength()) {
601 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
604 // Set src to bogus without releasing any memory.
605 src
.fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
606 src
.fUnion
.fFields
.fArray
= NULL
;
607 src
.fUnion
.fFields
.fCapacity
= 0;
612 void UnicodeString::swap(UnicodeString
&other
) U_NOEXCEPT
{
613 UnicodeString temp
; // Empty short string: Known not to need releaseArray().
614 // Copy fields without resetting source values in between.
615 temp
.copyFieldsFrom(*this, FALSE
);
616 this->copyFieldsFrom(other
, FALSE
);
617 other
.copyFieldsFrom(temp
, FALSE
);
618 // Set temp to an empty string so that other's memory is not released twice.
619 temp
.fUnion
.fFields
.fLengthAndFlags
= kShortString
;
622 //========================================
623 // Miscellaneous operations
624 //========================================
626 UnicodeString
UnicodeString::unescape() const {
627 UnicodeString
result(length(), (UChar32
)0, (int32_t)0); // construct with capacity
628 if (result
.isBogus()) {
631 const UChar
*array
= getBuffer();
632 int32_t len
= length();
634 for (int32_t i
=0;;) {
636 result
.append(array
, prev
, len
- prev
);
639 if (array
[i
++] == 0x5C /*'\\'*/) {
640 result
.append(array
, prev
, (i
- 1) - prev
);
641 UChar32 c
= unescapeAt(i
); // advances i
643 result
.remove(); // return empty string
644 break; // invalid escape sequence
653 UChar32
UnicodeString::unescapeAt(int32_t &offset
) const {
654 return u_unescapeAt(UnicodeString_charAt
, &offset
, length(), (void*)this);
657 //========================================
658 // Read-only implementation
659 //========================================
661 UnicodeString::doEquals(const UnicodeString
&text
, int32_t len
) const {
662 // Requires: this & text not bogus and have same lengths.
663 // Byte-wise comparison works for equality regardless of endianness.
664 return uprv_memcmp(getArrayStart(), text
.getArrayStart(), len
* U_SIZEOF_UCHAR
) == 0;
668 UnicodeString::doCompare( int32_t start
,
670 const UChar
*srcChars
,
672 int32_t srcLength
) const
674 // compare illegal string values
679 // pin indices to legal values
680 pinIndices(start
, length
);
682 if(srcChars
== NULL
) {
683 // treat const UChar *srcChars==NULL as an empty string
684 return length
== 0 ? 0 : 1;
687 // get the correct pointer
688 const UChar
*chars
= getArrayStart();
691 srcChars
+= srcStart
;
696 // get the srcLength if necessary
698 srcLength
= u_strlen(srcChars
+ srcStart
);
701 // are we comparing different lengths?
702 if(length
!= srcLength
) {
703 if(length
< srcLength
) {
707 minLength
= srcLength
;
716 * note that uprv_memcmp() returns an int but we return an int8_t;
717 * we need to take care not to truncate the result -
718 * one way to do this is to right-shift the value to
719 * move the sign bit into the lower 8 bits and making sure that this
720 * does not become 0 itself
723 if(minLength
> 0 && chars
!= srcChars
) {
727 // big-endian: byte comparison works
728 result
= uprv_memcmp(chars
, srcChars
, minLength
* sizeof(UChar
));
730 return (int8_t)(result
>> 15 | 1);
733 // little-endian: compare UChar units
735 result
= ((int32_t)*(chars
++) - (int32_t)*(srcChars
++));
737 return (int8_t)(result
>> 15 | 1);
739 } while(--minLength
> 0);
745 /* String compare in code point order - doCompare() compares in code unit order. */
747 UnicodeString::doCompareCodePointOrder(int32_t start
,
749 const UChar
*srcChars
,
751 int32_t srcLength
) const
753 // compare illegal string values
754 // treat const UChar *srcChars==NULL as an empty string
759 // pin indices to legal values
760 pinIndices(start
, length
);
762 if(srcChars
== NULL
) {
763 srcStart
= srcLength
= 0;
766 int32_t diff
= uprv_strCompare(getArrayStart() + start
, length
, (srcChars
!=NULL
)?(srcChars
+ srcStart
):NULL
, srcLength
, FALSE
, TRUE
);
767 /* translate the 32-bit result into an 8-bit one */
769 return (int8_t)(diff
>> 15 | 1);
776 UnicodeString::getLength() const {
781 UnicodeString::getCharAt(int32_t offset
) const {
782 return charAt(offset
);
786 UnicodeString::getChar32At(int32_t offset
) const {
787 return char32At(offset
);
791 UnicodeString::char32At(int32_t offset
) const
793 int32_t len
= length();
794 if((uint32_t)offset
< (uint32_t)len
) {
795 const UChar
*array
= getArrayStart();
797 U16_GET(array
, 0, offset
, len
, c
);
800 return kInvalidUChar
;
805 UnicodeString::getChar32Start(int32_t offset
) const {
806 if((uint32_t)offset
< (uint32_t)length()) {
807 const UChar
*array
= getArrayStart();
808 U16_SET_CP_START(array
, 0, offset
);
816 UnicodeString::getChar32Limit(int32_t offset
) const {
817 int32_t len
= length();
818 if((uint32_t)offset
< (uint32_t)len
) {
819 const UChar
*array
= getArrayStart();
820 U16_SET_CP_LIMIT(array
, 0, offset
, len
);
828 UnicodeString::countChar32(int32_t start
, int32_t length
) const {
829 pinIndices(start
, length
);
830 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
831 return u_countChar32(getArrayStart()+start
, length
);
835 UnicodeString::hasMoreChar32Than(int32_t start
, int32_t length
, int32_t number
) const {
836 pinIndices(start
, length
);
837 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
838 return u_strHasMoreChar32Than(getArrayStart()+start
, length
, number
);
842 UnicodeString::moveIndex32(int32_t index
, int32_t delta
) const {
844 int32_t len
= length();
847 } else if(index
>len
) {
851 const UChar
*array
= getArrayStart();
853 U16_FWD_N(array
, index
, len
, delta
);
855 U16_BACK_N(array
, 0, index
, -delta
);
862 UnicodeString::doExtract(int32_t start
,
865 int32_t dstStart
) const
867 // pin indices to legal values
868 pinIndices(start
, length
);
870 // do not copy anything if we alias dst itself
871 const UChar
*array
= getArrayStart();
872 if(array
+ start
!= dst
+ dstStart
) {
873 us_arrayCopy(array
, start
, dst
, dstStart
, length
);
878 UnicodeString::extract(Char16Ptr dest
, int32_t destCapacity
,
879 UErrorCode
&errorCode
) const {
880 int32_t len
= length();
881 if(U_SUCCESS(errorCode
)) {
882 if(isBogus() || destCapacity
<0 || (destCapacity
>0 && dest
==0)) {
883 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
885 const UChar
*array
= getArrayStart();
886 if(len
>0 && len
<=destCapacity
&& array
!=dest
) {
887 u_memcpy(dest
, array
, len
);
889 return u_terminateUChars(dest
, destCapacity
, len
, &errorCode
);
897 UnicodeString::extract(int32_t start
,
900 int32_t targetCapacity
,
901 enum EInvariant
) const
903 // if the arguments are illegal, then do nothing
904 if(targetCapacity
< 0 || (targetCapacity
> 0 && target
== NULL
)) {
908 // pin the indices to legal values
909 pinIndices(start
, length
);
911 if(length
<= targetCapacity
) {
912 u_UCharsToChars(getArrayStart() + start
, target
, length
);
914 UErrorCode status
= U_ZERO_ERROR
;
915 return u_terminateChars(target
, targetCapacity
, length
, &status
);
919 UnicodeString::tempSubString(int32_t start
, int32_t len
) const {
920 pinIndices(start
, len
);
921 const UChar
*array
= getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
923 array
=fUnion
.fStackFields
.fBuffer
; // anything not NULL because that would make an empty string
924 len
=-2; // bogus result string
926 return UnicodeString(FALSE
, array
+ start
, len
);
930 UnicodeString::toUTF8(int32_t start
, int32_t len
,
931 char *target
, int32_t capacity
) const {
932 pinIndices(start
, len
);
934 UErrorCode errorCode
= U_ZERO_ERROR
;
935 u_strToUTF8WithSub(target
, capacity
, &length8
,
936 getBuffer() + start
, len
,
937 0xFFFD, // Standard substitution character.
938 NULL
, // Don't care about number of substitutions.
943 #if U_CHARSET_IS_UTF8
946 UnicodeString::extract(int32_t start
, int32_t len
,
947 char *target
, uint32_t dstSize
) const {
948 // if the arguments are illegal, then do nothing
949 if(/*dstSize < 0 || */(dstSize
> 0 && target
== 0)) {
952 return toUTF8(start
, len
, target
, dstSize
<= 0x7fffffff ? (int32_t)dstSize
: 0x7fffffff);
955 // else see unistr_cnv.cpp
959 UnicodeString::extractBetween(int32_t start
,
961 UnicodeString
& target
) const {
964 doExtract(start
, limit
- start
, target
);
967 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
968 // as many bytes as the source has UChars.
969 // The "worst cases" are writing systems like Indic, Thai and CJK with
972 UnicodeString::toUTF8(ByteSink
&sink
) const {
973 int32_t length16
= length();
975 char stackBuffer
[1024];
976 int32_t capacity
= (int32_t)sizeof(stackBuffer
);
977 UBool utf8IsOwned
= FALSE
;
978 char *utf8
= sink
.GetAppendBuffer(length16
< capacity
? length16
: capacity
,
980 stackBuffer
, capacity
,
983 UErrorCode errorCode
= U_ZERO_ERROR
;
984 u_strToUTF8WithSub(utf8
, capacity
, &length8
,
985 getBuffer(), length16
,
986 0xFFFD, // Standard substitution character.
987 NULL
, // Don't care about number of substitutions.
989 if(errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
990 utf8
= (char *)uprv_malloc(length8
);
993 errorCode
= U_ZERO_ERROR
;
994 u_strToUTF8WithSub(utf8
, length8
, &length8
,
995 getBuffer(), length16
,
996 0xFFFD, // Standard substitution character.
997 NULL
, // Don't care about number of substitutions.
1000 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1003 if(U_SUCCESS(errorCode
)) {
1004 sink
.Append(utf8
, length8
);
1014 UnicodeString::toUTF32(UChar32
*utf32
, int32_t capacity
, UErrorCode
&errorCode
) const {
1016 if(U_SUCCESS(errorCode
)) {
1017 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1018 u_strToUTF32WithSub(utf32
, capacity
, &length32
,
1019 getBuffer(), length(),
1020 0xfffd, // Substitution character.
1021 NULL
, // Don't care about number of substitutions.
1028 UnicodeString::indexOf(const UChar
*srcChars
,
1032 int32_t length
) const
1034 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
1038 // UnicodeString does not find empty substrings
1039 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
1043 // get the indices within bounds
1044 pinIndices(start
, length
);
1046 // find the first occurrence of the substring
1047 const UChar
*array
= getArrayStart();
1048 const UChar
*match
= u_strFindFirst(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
1052 return (int32_t)(match
- array
);
1057 UnicodeString::doIndexOf(UChar c
,
1059 int32_t length
) const
1062 pinIndices(start
, length
);
1064 // find the first occurrence of c
1065 const UChar
*array
= getArrayStart();
1066 const UChar
*match
= u_memchr(array
+ start
, c
, length
);
1070 return (int32_t)(match
- array
);
1075 UnicodeString::doIndexOf(UChar32 c
,
1077 int32_t length
) const {
1079 pinIndices(start
, length
);
1081 // find the first occurrence of c
1082 const UChar
*array
= getArrayStart();
1083 const UChar
*match
= u_memchr32(array
+ start
, c
, length
);
1087 return (int32_t)(match
- array
);
1092 UnicodeString::lastIndexOf(const UChar
*srcChars
,
1096 int32_t length
) const
1098 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
1102 // UnicodeString does not find empty substrings
1103 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
1107 // get the indices within bounds
1108 pinIndices(start
, length
);
1110 // find the last occurrence of the substring
1111 const UChar
*array
= getArrayStart();
1112 const UChar
*match
= u_strFindLast(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
1116 return (int32_t)(match
- array
);
1121 UnicodeString::doLastIndexOf(UChar c
,
1123 int32_t length
) const
1130 pinIndices(start
, length
);
1132 // find the last occurrence of c
1133 const UChar
*array
= getArrayStart();
1134 const UChar
*match
= u_memrchr(array
+ start
, c
, length
);
1138 return (int32_t)(match
- array
);
1143 UnicodeString::doLastIndexOf(UChar32 c
,
1145 int32_t length
) const {
1147 pinIndices(start
, length
);
1149 // find the last occurrence of c
1150 const UChar
*array
= getArrayStart();
1151 const UChar
*match
= u_memrchr32(array
+ start
, c
, length
);
1155 return (int32_t)(match
- array
);
1159 //========================================
1160 // Write implementation
1161 //========================================
1164 UnicodeString::findAndReplace(int32_t start
,
1166 const UnicodeString
& oldText
,
1169 const UnicodeString
& newText
,
1173 if(isBogus() || oldText
.isBogus() || newText
.isBogus()) {
1177 pinIndices(start
, length
);
1178 oldText
.pinIndices(oldStart
, oldLength
);
1179 newText
.pinIndices(newStart
, newLength
);
1181 if(oldLength
== 0) {
1185 while(length
> 0 && length
>= oldLength
) {
1186 int32_t pos
= indexOf(oldText
, oldStart
, oldLength
, start
, length
);
1188 // no more oldText's here: done
1191 // we found oldText, replace it by newText and go beyond it
1192 replace(pos
, oldLength
, newText
, newStart
, newLength
);
1193 length
-= pos
+ oldLength
- start
;
1194 start
= pos
+ newLength
;
1203 UnicodeString::setToBogus()
1207 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
1208 fUnion
.fFields
.fArray
= 0;
1209 fUnion
.fFields
.fCapacity
= 0;
1212 // turn a bogus string into an empty one
1214 UnicodeString::unBogus() {
1215 if(fUnion
.fFields
.fLengthAndFlags
& kIsBogus
) {
1221 UnicodeString::getTerminatedBuffer() {
1225 UChar
*array
= getArrayStart();
1226 int32_t len
= length();
1227 if(len
< getCapacity()) {
1228 if(fUnion
.fFields
.fLengthAndFlags
& kBufferIsReadonly
) {
1229 // If len<capacity on a read-only alias, then array[len] is
1230 // either the original NUL (if constructed with (TRUE, s, length))
1231 // or one of the original string contents characters (if later truncated),
1232 // therefore we can assume that array[len] is initialized memory.
1233 if(array
[len
] == 0) {
1236 } else if(((fUnion
.fFields
.fLengthAndFlags
& kRefCounted
) == 0 || refCount() == 1)) {
1237 // kRefCounted: Do not write the NUL if the buffer is shared.
1238 // That is mostly safe, except when the length of one copy was modified
1239 // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1240 // Then the NUL would be written into the middle of another copy's string.
1242 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1243 // Do not test if there is a NUL already because it might be uninitialized memory.
1244 // (That would be safe, but tools like valgrind & Purify would complain.)
1249 if(len
<INT32_MAX
&& cloneArrayIfNeeded(len
+1)) {
1250 array
= getArrayStart();
1258 // setTo() analogous to the readonly-aliasing constructor with the same signature
1260 UnicodeString::setTo(UBool isTerminated
,
1261 ConstChar16Ptr textPtr
,
1264 if(fUnion
.fFields
.fLengthAndFlags
& kOpenGetBuffer
) {
1265 // do not modify a string that has an "open" getBuffer(minCapacity)
1269 const UChar
*text
= textPtr
;
1271 // treat as an empty string, do not alias
1277 if( textLength
< -1 ||
1278 (textLength
== -1 && !isTerminated
) ||
1279 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
1287 if(textLength
== -1) {
1288 // text is terminated, or else it would have failed the above test
1289 textLength
= u_strlen(text
);
1291 fUnion
.fFields
.fLengthAndFlags
= kReadonlyAlias
;
1292 setArray((UChar
*)text
, textLength
, isTerminated
? textLength
+ 1 : textLength
);
1296 // setTo() analogous to the writable-aliasing constructor with the same signature
1298 UnicodeString::setTo(UChar
*buffer
,
1300 int32_t buffCapacity
) {
1301 if(fUnion
.fFields
.fLengthAndFlags
& kOpenGetBuffer
) {
1302 // do not modify a string that has an "open" getBuffer(minCapacity)
1306 if(buffer
== NULL
) {
1307 // treat as an empty string, do not alias
1313 if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
1316 } else if(buffLength
== -1) {
1317 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1318 const UChar
*p
= buffer
, *limit
= buffer
+ buffCapacity
;
1319 while(p
!= limit
&& *p
!= 0) {
1322 buffLength
= (int32_t)(p
- buffer
);
1327 fUnion
.fFields
.fLengthAndFlags
= kWritableAlias
;
1328 setArray(buffer
, buffLength
, buffCapacity
);
1332 UnicodeString
&UnicodeString::setToUTF8(StringPiece utf8
) {
1334 int32_t length
= utf8
.length();
1336 // The UTF-16 string will be at most as long as the UTF-8 string.
1337 if(length
<= US_STACKBUF_SIZE
) {
1338 capacity
= US_STACKBUF_SIZE
;
1340 capacity
= length
+ 1; // +1 for the terminating NUL.
1342 UChar
*utf16
= getBuffer(capacity
);
1344 UErrorCode errorCode
= U_ZERO_ERROR
;
1345 u_strFromUTF8WithSub(utf16
, getCapacity(), &length16
,
1346 utf8
.data(), length
,
1347 0xfffd, // Substitution character.
1348 NULL
, // Don't care about number of substitutions.
1350 releaseBuffer(length16
);
1351 if(U_FAILURE(errorCode
)) {
1358 UnicodeString::setCharAt(int32_t offset
,
1361 int32_t len
= length();
1362 if(cloneArrayIfNeeded() && len
> 0) {
1365 } else if(offset
>= len
) {
1369 getArrayStart()[offset
] = c
;
1375 UnicodeString::replace(int32_t start
,
1378 UChar buffer
[U16_MAX_LENGTH
];
1380 UBool isError
= FALSE
;
1381 U16_APPEND(buffer
, count
, U16_MAX_LENGTH
, srcChar
, isError
);
1382 // We test isError so that the compiler does not complain that we don't.
1383 // If isError (srcChar is not a valid code point) then count==0 which means
1384 // we remove the source segment rather than replacing it with srcChar.
1385 return doReplace(start
, _length
, buffer
, 0, isError
? 0 : count
);
1389 UnicodeString::append(UChar32 srcChar
) {
1390 UChar buffer
[U16_MAX_LENGTH
];
1391 int32_t _length
= 0;
1392 UBool isError
= FALSE
;
1393 U16_APPEND(buffer
, _length
, U16_MAX_LENGTH
, srcChar
, isError
);
1394 // We test isError so that the compiler does not complain that we don't.
1395 // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1396 return isError
? *this : doAppend(buffer
, 0, _length
);
1400 UnicodeString::doReplace( int32_t start
,
1402 const UnicodeString
& src
,
1406 // pin the indices to legal values
1407 src
.pinIndices(srcStart
, srcLength
);
1409 // get the characters from src
1410 // and replace the range in ourselves with them
1411 return doReplace(start
, length
, src
.getArrayStart(), srcStart
, srcLength
);
1415 UnicodeString::doReplace(int32_t start
,
1417 const UChar
*srcChars
,
1425 int32_t oldLength
= this->length();
1427 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1428 if((fUnion
.fFields
.fLengthAndFlags
&kBufferIsReadonly
) && srcLength
== 0) {
1430 // remove prefix by adjusting the array pointer
1432 fUnion
.fFields
.fArray
+= length
;
1433 fUnion
.fFields
.fCapacity
-= length
;
1434 setLength(oldLength
- length
);
1438 if(length
>= (oldLength
- start
)) {
1439 // remove suffix by reducing the length (like truncate())
1441 fUnion
.fFields
.fCapacity
= start
; // not NUL-terminated any more
1447 if(start
== oldLength
) {
1448 return doAppend(srcChars
, srcStart
, srcLength
);
1452 srcStart
= srcLength
= 0;
1453 } else if(srcLength
< 0) {
1454 // get the srcLength if necessary
1455 srcLength
= u_strlen(srcChars
+ srcStart
);
1458 // pin the indices to legal values
1459 pinIndices(start
, length
);
1461 // Calculate the size of the string after the replace.
1462 // Avoid int32_t overflow.
1463 int32_t newLength
= oldLength
- length
;
1464 if(srcLength
> (INT32_MAX
- newLength
)) {
1468 newLength
+= srcLength
;
1470 // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
1471 // therefore we need to keep the current fArray
1472 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1474 if((fUnion
.fFields
.fLengthAndFlags
&kUsingStackBuffer
) && (newLength
> US_STACKBUF_SIZE
)) {
1475 // copy the stack buffer contents because it will be overwritten with
1476 // fUnion.fFields values
1477 u_memcpy(oldStackBuffer
, fUnion
.fStackFields
.fBuffer
, oldLength
);
1478 oldArray
= oldStackBuffer
;
1480 oldArray
= getArrayStart();
1483 // clone our array and allocate a bigger array if needed
1484 int32_t *bufferToDelete
= 0;
1485 if(!cloneArrayIfNeeded(newLength
, getGrowCapacity(newLength
),
1486 FALSE
, &bufferToDelete
)
1491 // now do the replace
1493 UChar
*newArray
= getArrayStart();
1494 if(newArray
!= oldArray
) {
1495 // if fArray changed, then we need to copy everything except what will change
1496 us_arrayCopy(oldArray
, 0, newArray
, 0, start
);
1497 us_arrayCopy(oldArray
, start
+ length
,
1498 newArray
, start
+ srcLength
,
1499 oldLength
- (start
+ length
));
1500 } else if(length
!= srcLength
) {
1501 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1502 us_arrayCopy(oldArray
, start
+ length
,
1503 newArray
, start
+ srcLength
,
1504 oldLength
- (start
+ length
));
1507 // now fill in the hole with the new string
1508 us_arrayCopy(srcChars
, srcStart
, newArray
, start
, srcLength
);
1510 setLength(newLength
);
1512 // delayed delete in case srcChars == fArray when we started, and
1513 // to keep oldArray alive for the above operations
1514 if (bufferToDelete
) {
1515 uprv_free(bufferToDelete
);
1521 // Versions of doReplace() only for append() variants.
1522 // doReplace() and doAppend() optimize for different cases.
1525 UnicodeString::doAppend(const UnicodeString
& src
, int32_t srcStart
, int32_t srcLength
) {
1526 if(srcLength
== 0) {
1530 // pin the indices to legal values
1531 src
.pinIndices(srcStart
, srcLength
);
1532 return doAppend(src
.getArrayStart(), srcStart
, srcLength
);
1536 UnicodeString::doAppend(const UChar
*srcChars
, int32_t srcStart
, int32_t srcLength
) {
1537 if(!isWritable() || srcLength
== 0 || srcChars
== NULL
) {
1542 // get the srcLength if necessary
1543 if((srcLength
= u_strlen(srcChars
+ srcStart
)) == 0) {
1548 int32_t oldLength
= length();
1549 int32_t newLength
= oldLength
+ srcLength
;
1550 // optimize append() onto a large-enough, owned string
1551 if((newLength
<= getCapacity() && isBufferWritable()) ||
1552 cloneArrayIfNeeded(newLength
, getGrowCapacity(newLength
))) {
1553 UChar
*newArray
= getArrayStart();
1554 // Do not copy characters when
1555 // UChar *buffer=str.getAppendBuffer(...);
1557 // str.append(buffer, length);
1559 // str.appendString(buffer, length)
1561 if(srcChars
+ srcStart
!= newArray
+ oldLength
) {
1562 us_arrayCopy(srcChars
, srcStart
, newArray
, oldLength
, srcLength
);
1564 setLength(newLength
);
1573 UnicodeString::handleReplaceBetween(int32_t start
,
1575 const UnicodeString
& text
) {
1576 replaceBetween(start
, limit
, text
);
1583 UnicodeString::copy(int32_t start
, int32_t limit
, int32_t dest
) {
1584 if (limit
<= start
) {
1585 return; // Nothing to do; avoid bogus malloc call
1587 UChar
* text
= (UChar
*) uprv_malloc( sizeof(UChar
) * (limit
- start
) );
1588 // Check to make sure text is not null.
1590 extractBetween(start
, limit
, text
, 0);
1591 insert(dest
, text
, 0, limit
- start
);
1599 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1600 * so we implement this function here.
1602 UBool
Replaceable::hasMetaData() const {
1609 UBool
UnicodeString::hasMetaData() const {
1614 UnicodeString::doReverse(int32_t start
, int32_t length
) {
1615 if(length
<= 1 || !cloneArrayIfNeeded()) {
1619 // pin the indices to legal values
1620 pinIndices(start
, length
);
1621 if(length
<= 1) { // pinIndices() might have shrunk the length
1625 UChar
*left
= getArrayStart() + start
;
1626 UChar
*right
= left
+ length
- 1; // -1 for inclusive boundary (length>=2)
1628 UBool hasSupplementary
= FALSE
;
1630 // Before the loop we know left<right because length>=2.
1632 hasSupplementary
|= (UBool
)U16_IS_LEAD(swap
= *left
);
1633 hasSupplementary
|= (UBool
)U16_IS_LEAD(*left
++ = *right
);
1635 } while(left
< right
);
1636 // Make sure to test the middle code unit of an odd-length string.
1637 // Redundant if the length is even.
1638 hasSupplementary
|= (UBool
)U16_IS_LEAD(*left
);
1640 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1641 if(hasSupplementary
) {
1644 left
= getArrayStart() + start
;
1645 right
= left
+ length
- 1; // -1 so that we can look at *(left+1) if left<right
1646 while(left
< right
) {
1647 if(U16_IS_TRAIL(swap
= *left
) && U16_IS_LEAD(swap2
= *(left
+ 1))) {
1660 UnicodeString::padLeading(int32_t targetLength
,
1663 int32_t oldLength
= length();
1664 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1667 // move contents up by padding width
1668 UChar
*array
= getArrayStart();
1669 int32_t start
= targetLength
- oldLength
;
1670 us_arrayCopy(array
, 0, array
, start
, oldLength
);
1672 // fill in padding character
1673 while(--start
>= 0) {
1674 array
[start
] = padChar
;
1676 setLength(targetLength
);
1682 UnicodeString::padTrailing(int32_t targetLength
,
1685 int32_t oldLength
= length();
1686 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1689 // fill in padding character
1690 UChar
*array
= getArrayStart();
1691 int32_t length
= targetLength
;
1692 while(--length
>= oldLength
) {
1693 array
[length
] = padChar
;
1695 setLength(targetLength
);
1700 //========================================
1702 //========================================
1704 UnicodeString::doHashCode() const
1706 /* Delegate hash computation to uhash. This makes UnicodeString
1707 * hashing consistent with UChar* hashing. */
1708 int32_t hashCode
= ustr_hashUCharsN(getArrayStart(), length());
1709 if (hashCode
== kInvalidHashCode
) {
1710 hashCode
= kEmptyHashCode
;
1715 //========================================
1717 //========================================
1720 UnicodeString::getBuffer(int32_t minCapacity
) {
1721 if(minCapacity
>=-1 && cloneArrayIfNeeded(minCapacity
)) {
1722 fUnion
.fFields
.fLengthAndFlags
|=kOpenGetBuffer
;
1724 return getArrayStart();
1731 UnicodeString::releaseBuffer(int32_t newLength
) {
1732 if(fUnion
.fFields
.fLengthAndFlags
&kOpenGetBuffer
&& newLength
>=-1) {
1733 // set the new fLength
1734 int32_t capacity
=getCapacity();
1736 // the new length is the string length, capped by fCapacity
1737 const UChar
*array
=getArrayStart(), *p
=array
, *limit
=array
+capacity
;
1738 while(p
<limit
&& *p
!=0) {
1741 newLength
=(int32_t)(p
-array
);
1742 } else if(newLength
>capacity
) {
1745 setLength(newLength
);
1746 fUnion
.fFields
.fLengthAndFlags
&=~kOpenGetBuffer
;
1750 //========================================
1752 //========================================
1754 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity
,
1755 int32_t growCapacity
,
1757 int32_t **pBufferToDelete
,
1759 // default parameters need to be static, therefore
1760 // the defaults are -1 to have convenience defaults
1761 if(newCapacity
== -1) {
1762 newCapacity
= getCapacity();
1765 // while a getBuffer(minCapacity) is "open",
1766 // prevent any modifications of the string by returning FALSE here
1767 // if the string is bogus, then only an assignment or similar can revive it
1773 * We need to make a copy of the array if
1774 * the buffer is read-only, or
1775 * the buffer is refCounted (shared), and refCount>1, or
1776 * the buffer is too small.
1777 * Return FALSE if memory could not be allocated.
1780 fUnion
.fFields
.fLengthAndFlags
& kBufferIsReadonly
||
1781 (fUnion
.fFields
.fLengthAndFlags
& kRefCounted
&& refCount() > 1) ||
1782 newCapacity
> getCapacity()
1784 // check growCapacity for default value and use of the stack buffer
1785 if(growCapacity
< 0) {
1786 growCapacity
= newCapacity
;
1787 } else if(newCapacity
<= US_STACKBUF_SIZE
&& growCapacity
> US_STACKBUF_SIZE
) {
1788 growCapacity
= US_STACKBUF_SIZE
;
1792 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1794 int32_t oldLength
= length();
1795 int16_t flags
= fUnion
.fFields
.fLengthAndFlags
;
1797 if(flags
&kUsingStackBuffer
) {
1798 U_ASSERT(!(flags
&kRefCounted
)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1799 if(doCopyArray
&& growCapacity
> US_STACKBUF_SIZE
) {
1800 // copy the stack buffer contents because it will be overwritten with
1801 // fUnion.fFields values
1802 us_arrayCopy(fUnion
.fStackFields
.fBuffer
, 0, oldStackBuffer
, 0, oldLength
);
1803 oldArray
= oldStackBuffer
;
1805 oldArray
= NULL
; // no need to copy from the stack buffer to itself
1808 oldArray
= fUnion
.fFields
.fArray
;
1809 U_ASSERT(oldArray
!=NULL
); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1812 // allocate a new array
1813 if(allocate(growCapacity
) ||
1814 (newCapacity
< growCapacity
&& allocate(newCapacity
))
1817 // copy the contents
1818 // do not copy more than what fits - it may be smaller than before
1819 int32_t minLength
= oldLength
;
1820 newCapacity
= getCapacity();
1821 if(newCapacity
< minLength
) {
1822 minLength
= newCapacity
;
1824 if(oldArray
!= NULL
) {
1825 us_arrayCopy(oldArray
, 0, getArrayStart(), 0, minLength
);
1827 setLength(minLength
);
1832 // release the old array
1833 if(flags
& kRefCounted
) {
1834 // the array is refCounted; decrement and release if 0
1835 u_atomic_int32_t
*pRefCount
= ((u_atomic_int32_t
*)oldArray
- 1);
1836 if(umtx_atomic_dec(pRefCount
) == 0) {
1837 if(pBufferToDelete
== 0) {
1838 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1839 // is defined as volatile. (Volatile has useful non-standard behavior
1840 // with this compiler.)
1841 uprv_free((void *)pRefCount
);
1843 // the caller requested to delete it himself
1844 *pBufferToDelete
= (int32_t *)pRefCount
;
1849 // not enough memory for growCapacity and not even for the smaller newCapacity
1850 // reset the old values for setToBogus() to release the array
1851 if(!(flags
&kUsingStackBuffer
)) {
1852 fUnion
.fFields
.fArray
= oldArray
;
1854 fUnion
.fFields
.fLengthAndFlags
= flags
;
1862 // UnicodeStringAppendable ------------------------------------------------- ***
1864 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1867 UnicodeStringAppendable::appendCodeUnit(UChar c
) {
1868 return str
.doAppend(&c
, 0, 1).isWritable();
1872 UnicodeStringAppendable::appendCodePoint(UChar32 c
) {
1873 UChar buffer
[U16_MAX_LENGTH
];
1874 int32_t cLength
= 0;
1875 UBool isError
= FALSE
;
1876 U16_APPEND(buffer
, cLength
, U16_MAX_LENGTH
, c
, isError
);
1877 return !isError
&& str
.doAppend(buffer
, 0, cLength
).isWritable();
1881 UnicodeStringAppendable::appendString(const UChar
*s
, int32_t length
) {
1882 return str
.doAppend(s
, 0, length
).isWritable();
1886 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity
) {
1887 return str
.cloneArrayIfNeeded(str
.length() + appendCapacity
);
1891 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity
,
1892 int32_t desiredCapacityHint
,
1893 UChar
*scratch
, int32_t scratchCapacity
,
1894 int32_t *resultCapacity
) {
1895 if(minCapacity
< 1 || scratchCapacity
< minCapacity
) {
1896 *resultCapacity
= 0;
1899 int32_t oldLength
= str
.length();
1900 if(minCapacity
<= (kMaxCapacity
- oldLength
) &&
1901 desiredCapacityHint
<= (kMaxCapacity
- oldLength
) &&
1902 str
.cloneArrayIfNeeded(oldLength
+ minCapacity
, oldLength
+ desiredCapacityHint
)) {
1903 *resultCapacity
= str
.getCapacity() - oldLength
;
1904 return str
.getArrayStart() + oldLength
;
1906 *resultCapacity
= scratchCapacity
;
1914 U_CAPI
int32_t U_EXPORT2
1915 uhash_hashUnicodeString(const UElement key
) {
1916 const UnicodeString
*str
= (const UnicodeString
*) key
.pointer
;
1917 return (str
== NULL
) ? 0 : str
->hashCode();
1920 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1921 // does not depend on hashtable code.
1922 U_CAPI UBool U_EXPORT2
1923 uhash_compareUnicodeString(const UElement key1
, const UElement key2
) {
1924 const UnicodeString
*str1
= (const UnicodeString
*) key1
.pointer
;
1925 const UnicodeString
*str2
= (const UnicodeString
*) key2
.pointer
;
1929 if (str1
== NULL
|| str2
== NULL
) {
1932 return *str1
== *str2
;
1935 #ifdef U_STATIC_IMPLEMENTATION
1937 This should never be called. It is defined here to make sure that the
1938 virtual vector deleting destructor is defined within unistr.cpp.
1939 The vector deleting destructor is already a part of UObject,
1940 but defining it here makes sure that it is included with this object file.
1941 This makes sure that static library dependencies are kept to a minimum.
1943 static void uprv_UnicodeStringDummy(void) {
1944 delete [] (new UnicodeString
[2]);