1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
5 * Copyright (C) 1999-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 09/25/98 stephen Creation.
15 * 04/20/99 stephen Overhauled per 4/16 code review.
16 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
19 * 06/25/01 grhoten Removed the dependency on iostream
20 ******************************************************************************
23 #include "unicode/utypes.h"
24 #include "unicode/appendable.h"
25 #include "unicode/putil.h"
28 #include "unicode/ustring.h"
29 #include "unicode/unistr.h"
30 #include "unicode/utf.h"
31 #include "unicode/utf16.h"
44 print(const UnicodeString
& s
,
49 for(int i
= 0; i
< s
.length(); ++i
) {
51 if(c
>= 0x007E || c
< 0x0020)
52 cout
<< "[0x" << hex
<< s
[i
] << "]";
66 for(int i
= 0; i
< len
; ++i
) {
68 if(c
>= 0x007E || c
< 0x0020)
69 cout
<< "[0x" << hex
<< s
[i
] << "]";
78 // Local function definitions for now
80 // need to copy areas that may overlap
83 us_arrayCopy(const UChar
*src
, int32_t srcStart
,
84 UChar
*dst
, int32_t dstStart
, int32_t count
)
87 uprv_memmove(dst
+dstStart
, src
+srcStart
, (size_t)count
*sizeof(*src
));
91 // u_unescapeAt() callback to get a UChar from a UnicodeString
93 static UChar U_CALLCONV
94 UnicodeString_charAt(int32_t offset
, void *context
) {
95 return ((icu::UnicodeString
*) context
)->charAt(offset
);
101 /* The Replaceable virtual destructor can't be defined in the header
102 due to how AIX works with multiple definitions of virtual functions.
104 Replaceable::~Replaceable() {}
106 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString
)
108 UnicodeString U_EXPORT2
109 operator+ (const UnicodeString
&s1
, const UnicodeString
&s2
) {
111 UnicodeString(s1
.length()+s2
.length()+1, (UChar32
)0, 0).
116 //========================================
117 // Reference Counting functions, put at top of file so that optimizing compilers
118 // have a chance to automatically inline.
119 //========================================
122 UnicodeString::addRef() {
123 umtx_atomic_inc((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1);
127 UnicodeString::removeRef() {
128 return umtx_atomic_dec((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1);
132 UnicodeString::refCount() const {
133 return umtx_loadAcquire(*((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1));
137 UnicodeString::releaseArray() {
138 if((fUnion
.fFields
.fLengthAndFlags
& kRefCounted
) && removeRef() == 0) {
139 uprv_free((int32_t *)fUnion
.fFields
.fArray
- 1);
145 //========================================
147 //========================================
149 // The default constructor is inline in unistr.h.
151 UnicodeString::UnicodeString(int32_t capacity
, UChar32 c
, int32_t count
) {
152 fUnion
.fFields
.fLengthAndFlags
= 0;
153 if(count
<= 0 || (uint32_t)c
> 0x10ffff) {
154 // just allocate and do not do anything else
156 } else if(c
<= 0xffff) {
157 int32_t length
= count
;
158 if(capacity
< length
) {
161 if(allocate(capacity
)) {
162 UChar
*array
= getArrayStart();
163 UChar unit
= (UChar
)c
;
164 for(int32_t i
= 0; i
< length
; ++i
) {
169 } else { // supplementary code point, write surrogate pairs
170 if(count
> (INT32_MAX
/ 2)) {
171 // We would get more than 2G UChars.
175 int32_t length
= count
* 2;
176 if(capacity
< length
) {
179 if(allocate(capacity
)) {
180 UChar
*array
= getArrayStart();
181 UChar lead
= U16_LEAD(c
);
182 UChar trail
= U16_TRAIL(c
);
183 for(int32_t i
= 0; i
< length
; i
+= 2) {
185 array
[i
+ 1] = trail
;
192 UnicodeString::UnicodeString(UChar ch
) {
193 fUnion
.fFields
.fLengthAndFlags
= kLength1
| kShortString
;
194 fUnion
.fStackFields
.fBuffer
[0] = ch
;
197 UnicodeString::UnicodeString(UChar32 ch
) {
198 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
200 UBool isError
= FALSE
;
201 U16_APPEND(fUnion
.fStackFields
.fBuffer
, i
, US_STACKBUF_SIZE
, ch
, isError
);
202 // We test isError so that the compiler does not complain that we don't.
203 // If isError then i==0 which is what we want anyway.
209 UnicodeString::UnicodeString(const UChar
*text
) {
210 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
211 doAppend(text
, 0, -1);
214 UnicodeString::UnicodeString(const UChar
*text
,
215 int32_t textLength
) {
216 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
217 doAppend(text
, 0, textLength
);
220 UnicodeString::UnicodeString(UBool isTerminated
,
221 ConstChar16Ptr textPtr
,
222 int32_t textLength
) {
223 fUnion
.fFields
.fLengthAndFlags
= kReadonlyAlias
;
224 const UChar
*text
= textPtr
;
226 // treat as an empty string, do not alias
228 } else if(textLength
< -1 ||
229 (textLength
== -1 && !isTerminated
) ||
230 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
234 if(textLength
== -1) {
235 // text is terminated, or else it would have failed the above test
236 textLength
= u_strlen(text
);
238 setArray(const_cast<UChar
*>(text
), textLength
,
239 isTerminated
? textLength
+ 1 : textLength
);
243 UnicodeString::UnicodeString(UChar
*buff
,
245 int32_t buffCapacity
) {
246 fUnion
.fFields
.fLengthAndFlags
= kWritableAlias
;
248 // treat as an empty string, do not alias
250 } else if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
253 if(buffLength
== -1) {
254 // fLength = u_strlen(buff); but do not look beyond buffCapacity
255 const UChar
*p
= buff
, *limit
= buff
+ buffCapacity
;
256 while(p
!= limit
&& *p
!= 0) {
259 buffLength
= (int32_t)(p
- buff
);
261 setArray(buff
, buffLength
, buffCapacity
);
265 UnicodeString::UnicodeString(const char *src
, int32_t length
, EInvariant
) {
266 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
268 // treat as an empty string
271 length
=(int32_t)uprv_strlen(src
);
273 if(cloneArrayIfNeeded(length
, length
, FALSE
)) {
274 u_charsToUChars(src
, getArrayStart(), length
);
282 #if U_CHARSET_IS_UTF8
284 UnicodeString::UnicodeString(const char *codepageData
) {
285 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
286 if(codepageData
!= 0) {
287 setToUTF8(codepageData
);
291 UnicodeString::UnicodeString(const char *codepageData
, int32_t dataLength
) {
292 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
293 // if there's nothing to convert, do nothing
294 if(codepageData
== 0 || dataLength
== 0 || dataLength
< -1) {
297 if(dataLength
== -1) {
298 dataLength
= (int32_t)uprv_strlen(codepageData
);
300 setToUTF8(StringPiece(codepageData
, dataLength
));
303 // else see unistr_cnv.cpp
306 UnicodeString::UnicodeString(const UnicodeString
& that
) {
307 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
311 UnicodeString::UnicodeString(UnicodeString
&&src
) U_NOEXCEPT
{
312 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
316 UnicodeString::UnicodeString(const UnicodeString
& that
,
318 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
319 setTo(that
, srcStart
);
322 UnicodeString::UnicodeString(const UnicodeString
& that
,
325 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
326 setTo(that
, srcStart
, srcLength
);
329 // Replaceable base class clone() default implementation, does not clone
331 Replaceable::clone() const {
335 // UnicodeString overrides clone() with a real implementation
337 UnicodeString::clone() const {
338 return new UnicodeString(*this);
341 //========================================
343 //========================================
347 const int32_t kGrowSize
= 128;
349 // The number of bytes for one int32_t reference counter and capacity UChars
350 // must fit into a 32-bit size_t (at least when on a 32-bit platform).
351 // We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
352 // and round up to a multiple of 16 bytes.
353 // This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
354 // (With more complicated checks we could go up to 0x7ffffffd without rounding up,
355 // but that does not seem worth it.)
356 const int32_t kMaxCapacity
= 0x7ffffff5;
358 int32_t getGrowCapacity(int32_t newLength
) {
359 int32_t growSize
= (newLength
>> 2) + kGrowSize
;
360 if(growSize
<= (kMaxCapacity
- newLength
)) {
361 return newLength
+ growSize
;
370 UnicodeString::allocate(int32_t capacity
) {
371 if(capacity
<= US_STACKBUF_SIZE
) {
372 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
375 if(capacity
<= kMaxCapacity
) {
376 ++capacity
; // for the NUL
377 // Switch to size_t which is unsigned so that we can allocate up to 4GB.
378 // Reference counter + UChars.
379 size_t numBytes
= sizeof(int32_t) + (size_t)capacity
* U_SIZEOF_UCHAR
;
380 // Round up to a multiple of 16.
381 numBytes
= (numBytes
+ 15) & ~15;
382 int32_t *array
= (int32_t *) uprv_malloc(numBytes
);
384 // set initial refCount and point behind the refCount
386 numBytes
-= sizeof(int32_t);
388 // have fArray point to the first UChar
389 fUnion
.fFields
.fArray
= (UChar
*)array
;
390 fUnion
.fFields
.fCapacity
= (int32_t)(numBytes
/ U_SIZEOF_UCHAR
);
391 fUnion
.fFields
.fLengthAndFlags
= kLongString
;
395 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
396 fUnion
.fFields
.fArray
= 0;
397 fUnion
.fFields
.fCapacity
= 0;
401 //========================================
403 //========================================
405 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
406 static u_atomic_int32_t finalLengthCounts
[0x400]; // UnicodeString::kMaxShortLength+1
407 static u_atomic_int32_t
beyondCount(0);
409 U_CAPI
void unistr_printLengths() {
411 for(i
= 0; i
<= 59; ++i
) {
412 printf("%2d, %9d\n", i
, (int32_t)finalLengthCounts
[i
]);
414 int32_t beyond
= beyondCount
;
415 for(; i
< UPRV_LENGTHOF(finalLengthCounts
); ++i
) {
416 beyond
+= finalLengthCounts
[i
];
418 printf(">59, %9d\n", beyond
);
422 UnicodeString::~UnicodeString()
424 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
425 // Count lengths of strings at the end of their lifetime.
426 // Useful for discussion of a desirable stack buffer size.
427 // Count the contents length, not the optional NUL terminator nor further capacity.
428 // Ignore open-buffer strings and strings which alias external storage.
429 if((fUnion
.fFields
.fLengthAndFlags
&(kOpenGetBuffer
|kReadonlyAlias
|kWritableAlias
)) == 0) {
430 if(hasShortLength()) {
431 umtx_atomic_inc(finalLengthCounts
+ getShortLength());
433 umtx_atomic_inc(&beyondCount
);
441 //========================================
443 //========================================
445 UnicodeString
UnicodeString::fromUTF8(StringPiece utf8
) {
446 UnicodeString result
;
447 result
.setToUTF8(utf8
);
451 UnicodeString
UnicodeString::fromUTF32(const UChar32
*utf32
, int32_t length
) {
452 UnicodeString result
;
454 // Most UTF-32 strings will be BMP-only and result in a same-length
455 // UTF-16 string. We overestimate the capacity just slightly,
456 // just in case there are a few supplementary characters.
457 if(length
<= US_STACKBUF_SIZE
) {
458 capacity
= US_STACKBUF_SIZE
;
460 capacity
= length
+ (length
>> 4) + 4;
463 UChar
*utf16
= result
.getBuffer(capacity
);
465 UErrorCode errorCode
= U_ZERO_ERROR
;
466 u_strFromUTF32WithSub(utf16
, result
.getCapacity(), &length16
,
468 0xfffd, // Substitution character.
469 NULL
, // Don't care about number of substitutions.
471 result
.releaseBuffer(length16
);
472 if(errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
473 capacity
= length16
+ 1; // +1 for the terminating NUL.
475 } else if(U_FAILURE(errorCode
)) {
483 //========================================
485 //========================================
488 UnicodeString::operator=(const UnicodeString
&src
) {
489 return copyFrom(src
);
493 UnicodeString::fastCopyFrom(const UnicodeString
&src
) {
494 return copyFrom(src
, TRUE
);
498 UnicodeString::copyFrom(const UnicodeString
&src
, UBool fastCopy
) {
499 // if assigning to ourselves, do nothing
504 // is the right side bogus?
510 // delete the current contents
514 // empty string - use the stack buffer
519 // fLength>0 and not an "open" src.getBuffer(minCapacity)
520 fUnion
.fFields
.fLengthAndFlags
= src
.fUnion
.fFields
.fLengthAndFlags
;
521 switch(src
.fUnion
.fFields
.fLengthAndFlags
& kAllStorageFlags
) {
523 // short string using the stack buffer, do the same
524 uprv_memcpy(fUnion
.fStackFields
.fBuffer
, src
.fUnion
.fStackFields
.fBuffer
,
525 getShortLength() * U_SIZEOF_UCHAR
);
528 // src uses a refCounted string buffer, use that buffer with refCount
529 // src is const, use a cast - we don't actually change it
530 ((UnicodeString
&)src
).addRef();
531 // copy all fields, share the reference-counted buffer
532 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
533 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
534 if(!hasShortLength()) {
535 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
540 // src is a readonly alias, do the same
541 // -> maintain the readonly alias as such
542 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
543 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
544 if(!hasShortLength()) {
545 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
549 // else if(!fastCopy) fall through to case kWritableAlias
550 // -> allocate a new buffer and copy the contents
552 case kWritableAlias
: {
553 // src is a writable alias; we make a copy of that instead
554 int32_t srcLength
= src
.length();
555 if(allocate(srcLength
)) {
556 u_memcpy(getArrayStart(), src
.getArrayStart(), srcLength
);
557 setLength(srcLength
);
560 // if there is not enough memory, then fall through to setting to bogus
564 // if src is bogus, set ourselves to bogus
565 // do not call setToBogus() here because fArray and flags are not consistent here
566 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
567 fUnion
.fFields
.fArray
= 0;
568 fUnion
.fFields
.fCapacity
= 0;
575 UnicodeString
&UnicodeString::moveFrom(UnicodeString
&src
) U_NOEXCEPT
{
576 // No explicit check for self move assignment, consistent with standard library.
577 // Self move assignment causes no crash nor leak but might make the object bogus.
579 copyFieldsFrom(src
, TRUE
);
583 // Same as moveFrom() except without memory management.
584 void UnicodeString::copyFieldsFrom(UnicodeString
&src
, UBool setSrcToBogus
) U_NOEXCEPT
{
585 int16_t lengthAndFlags
= fUnion
.fFields
.fLengthAndFlags
= src
.fUnion
.fFields
.fLengthAndFlags
;
586 if(lengthAndFlags
& kUsingStackBuffer
) {
587 // Short string using the stack buffer, copy the contents.
588 // Check for self assignment to prevent "overlap in memcpy" warnings,
589 // although it should be harmless to copy a buffer to itself exactly.
591 uprv_memcpy(fUnion
.fStackFields
.fBuffer
, src
.fUnion
.fStackFields
.fBuffer
,
592 getShortLength() * U_SIZEOF_UCHAR
);
595 // In all other cases, copy all fields.
596 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
597 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
598 if(!hasShortLength()) {
599 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
602 // Set src to bogus without releasing any memory.
603 src
.fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
604 src
.fUnion
.fFields
.fArray
= NULL
;
605 src
.fUnion
.fFields
.fCapacity
= 0;
610 void UnicodeString::swap(UnicodeString
&other
) U_NOEXCEPT
{
611 UnicodeString temp
; // Empty short string: Known not to need releaseArray().
612 // Copy fields without resetting source values in between.
613 temp
.copyFieldsFrom(*this, FALSE
);
614 this->copyFieldsFrom(other
, FALSE
);
615 other
.copyFieldsFrom(temp
, FALSE
);
616 // Set temp to an empty string so that other's memory is not released twice.
617 temp
.fUnion
.fFields
.fLengthAndFlags
= kShortString
;
620 //========================================
621 // Miscellaneous operations
622 //========================================
624 UnicodeString
UnicodeString::unescape() const {
625 UnicodeString
result(length(), (UChar32
)0, (int32_t)0); // construct with capacity
626 if (result
.isBogus()) {
629 const UChar
*array
= getBuffer();
630 int32_t len
= length();
632 for (int32_t i
=0;;) {
634 result
.append(array
, prev
, len
- prev
);
637 if (array
[i
++] == 0x5C /*'\\'*/) {
638 result
.append(array
, prev
, (i
- 1) - prev
);
639 UChar32 c
= unescapeAt(i
); // advances i
641 result
.remove(); // return empty string
642 break; // invalid escape sequence
651 UChar32
UnicodeString::unescapeAt(int32_t &offset
) const {
652 return u_unescapeAt(UnicodeString_charAt
, &offset
, length(), (void*)this);
655 //========================================
656 // Read-only implementation
657 //========================================
659 UnicodeString::doEquals(const UnicodeString
&text
, int32_t len
) const {
660 // Requires: this & text not bogus and have same lengths.
661 // Byte-wise comparison works for equality regardless of endianness.
662 return uprv_memcmp(getArrayStart(), text
.getArrayStart(), len
* U_SIZEOF_UCHAR
) == 0;
666 UnicodeString::doCompare( int32_t start
,
668 const UChar
*srcChars
,
670 int32_t srcLength
) const
672 // compare illegal string values
677 // pin indices to legal values
678 pinIndices(start
, length
);
680 if(srcChars
== NULL
) {
681 // treat const UChar *srcChars==NULL as an empty string
682 return length
== 0 ? 0 : 1;
685 // get the correct pointer
686 const UChar
*chars
= getArrayStart();
689 srcChars
+= srcStart
;
694 // get the srcLength if necessary
696 srcLength
= u_strlen(srcChars
+ srcStart
);
699 // are we comparing different lengths?
700 if(length
!= srcLength
) {
701 if(length
< srcLength
) {
705 minLength
= srcLength
;
714 * note that uprv_memcmp() returns an int but we return an int8_t;
715 * we need to take care not to truncate the result -
716 * one way to do this is to right-shift the value to
717 * move the sign bit into the lower 8 bits and making sure that this
718 * does not become 0 itself
721 if(minLength
> 0 && chars
!= srcChars
) {
725 // big-endian: byte comparison works
726 result
= uprv_memcmp(chars
, srcChars
, minLength
* sizeof(UChar
));
728 return (int8_t)(result
>> 15 | 1);
731 // little-endian: compare UChar units
733 result
= ((int32_t)*(chars
++) - (int32_t)*(srcChars
++));
735 return (int8_t)(result
>> 15 | 1);
737 } while(--minLength
> 0);
743 /* String compare in code point order - doCompare() compares in code unit order. */
745 UnicodeString::doCompareCodePointOrder(int32_t start
,
747 const UChar
*srcChars
,
749 int32_t srcLength
) const
751 // compare illegal string values
752 // treat const UChar *srcChars==NULL as an empty string
757 // pin indices to legal values
758 pinIndices(start
, length
);
760 if(srcChars
== NULL
) {
761 srcStart
= srcLength
= 0;
764 int32_t diff
= uprv_strCompare(getArrayStart() + start
, length
, (srcChars
!=NULL
)?(srcChars
+ srcStart
):NULL
, srcLength
, FALSE
, TRUE
);
765 /* translate the 32-bit result into an 8-bit one */
767 return (int8_t)(diff
>> 15 | 1);
774 UnicodeString::getLength() const {
779 UnicodeString::getCharAt(int32_t offset
) const {
780 return charAt(offset
);
784 UnicodeString::getChar32At(int32_t offset
) const {
785 return char32At(offset
);
789 UnicodeString::char32At(int32_t offset
) const
791 int32_t len
= length();
792 if((uint32_t)offset
< (uint32_t)len
) {
793 const UChar
*array
= getArrayStart();
795 U16_GET(array
, 0, offset
, len
, c
);
798 return kInvalidUChar
;
803 UnicodeString::getChar32Start(int32_t offset
) const {
804 if((uint32_t)offset
< (uint32_t)length()) {
805 const UChar
*array
= getArrayStart();
806 U16_SET_CP_START(array
, 0, offset
);
814 UnicodeString::getChar32Limit(int32_t offset
) const {
815 int32_t len
= length();
816 if((uint32_t)offset
< (uint32_t)len
) {
817 const UChar
*array
= getArrayStart();
818 U16_SET_CP_LIMIT(array
, 0, offset
, len
);
826 UnicodeString::countChar32(int32_t start
, int32_t length
) const {
827 pinIndices(start
, length
);
828 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
829 return u_countChar32(getArrayStart()+start
, length
);
833 UnicodeString::hasMoreChar32Than(int32_t start
, int32_t length
, int32_t number
) const {
834 pinIndices(start
, length
);
835 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
836 return u_strHasMoreChar32Than(getArrayStart()+start
, length
, number
);
840 UnicodeString::moveIndex32(int32_t index
, int32_t delta
) const {
842 int32_t len
= length();
845 } else if(index
>len
) {
849 const UChar
*array
= getArrayStart();
851 U16_FWD_N(array
, index
, len
, delta
);
853 U16_BACK_N(array
, 0, index
, -delta
);
860 UnicodeString::doExtract(int32_t start
,
863 int32_t dstStart
) const
865 // pin indices to legal values
866 pinIndices(start
, length
);
868 // do not copy anything if we alias dst itself
869 const UChar
*array
= getArrayStart();
870 if(array
+ start
!= dst
+ dstStart
) {
871 us_arrayCopy(array
, start
, dst
, dstStart
, length
);
876 UnicodeString::extract(Char16Ptr dest
, int32_t destCapacity
,
877 UErrorCode
&errorCode
) const {
878 int32_t len
= length();
879 if(U_SUCCESS(errorCode
)) {
880 if(isBogus() || destCapacity
<0 || (destCapacity
>0 && dest
==0)) {
881 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
883 const UChar
*array
= getArrayStart();
884 if(len
>0 && len
<=destCapacity
&& array
!=dest
) {
885 u_memcpy(dest
, array
, len
);
887 return u_terminateUChars(dest
, destCapacity
, len
, &errorCode
);
895 UnicodeString::extract(int32_t start
,
898 int32_t targetCapacity
,
899 enum EInvariant
) const
901 // if the arguments are illegal, then do nothing
902 if(targetCapacity
< 0 || (targetCapacity
> 0 && target
== NULL
)) {
906 // pin the indices to legal values
907 pinIndices(start
, length
);
909 if(length
<= targetCapacity
) {
910 u_UCharsToChars(getArrayStart() + start
, target
, length
);
912 UErrorCode status
= U_ZERO_ERROR
;
913 return u_terminateChars(target
, targetCapacity
, length
, &status
);
917 UnicodeString::tempSubString(int32_t start
, int32_t len
) const {
918 pinIndices(start
, len
);
919 const UChar
*array
= getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
921 array
=fUnion
.fStackFields
.fBuffer
; // anything not NULL because that would make an empty string
922 len
=-2; // bogus result string
924 return UnicodeString(FALSE
, array
+ start
, len
);
928 UnicodeString::toUTF8(int32_t start
, int32_t len
,
929 char *target
, int32_t capacity
) const {
930 pinIndices(start
, len
);
932 UErrorCode errorCode
= U_ZERO_ERROR
;
933 u_strToUTF8WithSub(target
, capacity
, &length8
,
934 getBuffer() + start
, len
,
935 0xFFFD, // Standard substitution character.
936 NULL
, // Don't care about number of substitutions.
941 #if U_CHARSET_IS_UTF8
944 UnicodeString::extract(int32_t start
, int32_t len
,
945 char *target
, uint32_t dstSize
) const {
946 // if the arguments are illegal, then do nothing
947 if(/*dstSize < 0 || */(dstSize
> 0 && target
== 0)) {
950 return toUTF8(start
, len
, target
, dstSize
<= 0x7fffffff ? (int32_t)dstSize
: 0x7fffffff);
953 // else see unistr_cnv.cpp
957 UnicodeString::extractBetween(int32_t start
,
959 UnicodeString
& target
) const {
962 doExtract(start
, limit
- start
, target
);
965 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
966 // as many bytes as the source has UChars.
967 // The "worst cases" are writing systems like Indic, Thai and CJK with
970 UnicodeString::toUTF8(ByteSink
&sink
) const {
971 int32_t length16
= length();
973 char stackBuffer
[1024];
974 int32_t capacity
= (int32_t)sizeof(stackBuffer
);
975 UBool utf8IsOwned
= FALSE
;
976 char *utf8
= sink
.GetAppendBuffer(length16
< capacity
? length16
: capacity
,
978 stackBuffer
, capacity
,
981 UErrorCode errorCode
= U_ZERO_ERROR
;
982 u_strToUTF8WithSub(utf8
, capacity
, &length8
,
983 getBuffer(), length16
,
984 0xFFFD, // Standard substitution character.
985 NULL
, // Don't care about number of substitutions.
987 if(errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
988 utf8
= (char *)uprv_malloc(length8
);
991 errorCode
= U_ZERO_ERROR
;
992 u_strToUTF8WithSub(utf8
, length8
, &length8
,
993 getBuffer(), length16
,
994 0xFFFD, // Standard substitution character.
995 NULL
, // Don't care about number of substitutions.
998 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1001 if(U_SUCCESS(errorCode
)) {
1002 sink
.Append(utf8
, length8
);
1012 UnicodeString::toUTF32(UChar32
*utf32
, int32_t capacity
, UErrorCode
&errorCode
) const {
1014 if(U_SUCCESS(errorCode
)) {
1015 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1016 u_strToUTF32WithSub(utf32
, capacity
, &length32
,
1017 getBuffer(), length(),
1018 0xfffd, // Substitution character.
1019 NULL
, // Don't care about number of substitutions.
1026 UnicodeString::indexOf(const UChar
*srcChars
,
1030 int32_t length
) const
1032 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
1036 // UnicodeString does not find empty substrings
1037 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
1041 // get the indices within bounds
1042 pinIndices(start
, length
);
1044 // find the first occurrence of the substring
1045 const UChar
*array
= getArrayStart();
1046 const UChar
*match
= u_strFindFirst(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
1050 return (int32_t)(match
- array
);
1055 UnicodeString::doIndexOf(UChar c
,
1057 int32_t length
) const
1060 pinIndices(start
, length
);
1062 // find the first occurrence of c
1063 const UChar
*array
= getArrayStart();
1064 const UChar
*match
= u_memchr(array
+ start
, c
, length
);
1068 return (int32_t)(match
- array
);
1073 UnicodeString::doIndexOf(UChar32 c
,
1075 int32_t length
) const {
1077 pinIndices(start
, length
);
1079 // find the first occurrence of c
1080 const UChar
*array
= getArrayStart();
1081 const UChar
*match
= u_memchr32(array
+ start
, c
, length
);
1085 return (int32_t)(match
- array
);
1090 UnicodeString::lastIndexOf(const UChar
*srcChars
,
1094 int32_t length
) const
1096 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
1100 // UnicodeString does not find empty substrings
1101 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
1105 // get the indices within bounds
1106 pinIndices(start
, length
);
1108 // find the last occurrence of the substring
1109 const UChar
*array
= getArrayStart();
1110 const UChar
*match
= u_strFindLast(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
1114 return (int32_t)(match
- array
);
1119 UnicodeString::doLastIndexOf(UChar c
,
1121 int32_t length
) const
1128 pinIndices(start
, length
);
1130 // find the last occurrence of c
1131 const UChar
*array
= getArrayStart();
1132 const UChar
*match
= u_memrchr(array
+ start
, c
, length
);
1136 return (int32_t)(match
- array
);
1141 UnicodeString::doLastIndexOf(UChar32 c
,
1143 int32_t length
) const {
1145 pinIndices(start
, length
);
1147 // find the last occurrence of c
1148 const UChar
*array
= getArrayStart();
1149 const UChar
*match
= u_memrchr32(array
+ start
, c
, length
);
1153 return (int32_t)(match
- array
);
1157 //========================================
1158 // Write implementation
1159 //========================================
1162 UnicodeString::findAndReplace(int32_t start
,
1164 const UnicodeString
& oldText
,
1167 const UnicodeString
& newText
,
1171 if(isBogus() || oldText
.isBogus() || newText
.isBogus()) {
1175 pinIndices(start
, length
);
1176 oldText
.pinIndices(oldStart
, oldLength
);
1177 newText
.pinIndices(newStart
, newLength
);
1179 if(oldLength
== 0) {
1183 while(length
> 0 && length
>= oldLength
) {
1184 int32_t pos
= indexOf(oldText
, oldStart
, oldLength
, start
, length
);
1186 // no more oldText's here: done
1189 // we found oldText, replace it by newText and go beyond it
1190 replace(pos
, oldLength
, newText
, newStart
, newLength
);
1191 length
-= pos
+ oldLength
- start
;
1192 start
= pos
+ newLength
;
1201 UnicodeString::setToBogus()
1205 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
1206 fUnion
.fFields
.fArray
= 0;
1207 fUnion
.fFields
.fCapacity
= 0;
1210 // turn a bogus string into an empty one
1212 UnicodeString::unBogus() {
1213 if(fUnion
.fFields
.fLengthAndFlags
& kIsBogus
) {
1219 UnicodeString::getTerminatedBuffer() {
1223 UChar
*array
= getArrayStart();
1224 int32_t len
= length();
1225 if(len
< getCapacity()) {
1226 if(fUnion
.fFields
.fLengthAndFlags
& kBufferIsReadonly
) {
1227 // If len<capacity on a read-only alias, then array[len] is
1228 // either the original NUL (if constructed with (TRUE, s, length))
1229 // or one of the original string contents characters (if later truncated),
1230 // therefore we can assume that array[len] is initialized memory.
1231 if(array
[len
] == 0) {
1234 } else if(((fUnion
.fFields
.fLengthAndFlags
& kRefCounted
) == 0 || refCount() == 1)) {
1235 // kRefCounted: Do not write the NUL if the buffer is shared.
1236 // That is mostly safe, except when the length of one copy was modified
1237 // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1238 // Then the NUL would be written into the middle of another copy's string.
1240 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1241 // Do not test if there is a NUL already because it might be uninitialized memory.
1242 // (That would be safe, but tools like valgrind & Purify would complain.)
1247 if(len
<INT32_MAX
&& cloneArrayIfNeeded(len
+1)) {
1248 array
= getArrayStart();
1256 // setTo() analogous to the readonly-aliasing constructor with the same signature
1258 UnicodeString::setTo(UBool isTerminated
,
1259 ConstChar16Ptr textPtr
,
1262 if(fUnion
.fFields
.fLengthAndFlags
& kOpenGetBuffer
) {
1263 // do not modify a string that has an "open" getBuffer(minCapacity)
1267 const UChar
*text
= textPtr
;
1269 // treat as an empty string, do not alias
1275 if( textLength
< -1 ||
1276 (textLength
== -1 && !isTerminated
) ||
1277 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
1285 if(textLength
== -1) {
1286 // text is terminated, or else it would have failed the above test
1287 textLength
= u_strlen(text
);
1289 fUnion
.fFields
.fLengthAndFlags
= kReadonlyAlias
;
1290 setArray((UChar
*)text
, textLength
, isTerminated
? textLength
+ 1 : textLength
);
1294 // setTo() analogous to the writable-aliasing constructor with the same signature
1296 UnicodeString::setTo(UChar
*buffer
,
1298 int32_t buffCapacity
) {
1299 if(fUnion
.fFields
.fLengthAndFlags
& kOpenGetBuffer
) {
1300 // do not modify a string that has an "open" getBuffer(minCapacity)
1304 if(buffer
== NULL
) {
1305 // treat as an empty string, do not alias
1311 if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
1314 } else if(buffLength
== -1) {
1315 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1316 const UChar
*p
= buffer
, *limit
= buffer
+ buffCapacity
;
1317 while(p
!= limit
&& *p
!= 0) {
1320 buffLength
= (int32_t)(p
- buffer
);
1325 fUnion
.fFields
.fLengthAndFlags
= kWritableAlias
;
1326 setArray(buffer
, buffLength
, buffCapacity
);
1330 UnicodeString
&UnicodeString::setToUTF8(StringPiece utf8
) {
1332 int32_t length
= utf8
.length();
1334 // The UTF-16 string will be at most as long as the UTF-8 string.
1335 if(length
<= US_STACKBUF_SIZE
) {
1336 capacity
= US_STACKBUF_SIZE
;
1338 capacity
= length
+ 1; // +1 for the terminating NUL.
1340 UChar
*utf16
= getBuffer(capacity
);
1342 UErrorCode errorCode
= U_ZERO_ERROR
;
1343 u_strFromUTF8WithSub(utf16
, getCapacity(), &length16
,
1344 utf8
.data(), length
,
1345 0xfffd, // Substitution character.
1346 NULL
, // Don't care about number of substitutions.
1348 releaseBuffer(length16
);
1349 if(U_FAILURE(errorCode
)) {
1356 UnicodeString::setCharAt(int32_t offset
,
1359 int32_t len
= length();
1360 if(cloneArrayIfNeeded() && len
> 0) {
1363 } else if(offset
>= len
) {
1367 getArrayStart()[offset
] = c
;
1373 UnicodeString::replace(int32_t start
,
1376 UChar buffer
[U16_MAX_LENGTH
];
1378 UBool isError
= FALSE
;
1379 U16_APPEND(buffer
, count
, U16_MAX_LENGTH
, srcChar
, isError
);
1380 // We test isError so that the compiler does not complain that we don't.
1381 // If isError (srcChar is not a valid code point) then count==0 which means
1382 // we remove the source segment rather than replacing it with srcChar.
1383 return doReplace(start
, _length
, buffer
, 0, isError
? 0 : count
);
1387 UnicodeString::append(UChar32 srcChar
) {
1388 UChar buffer
[U16_MAX_LENGTH
];
1389 int32_t _length
= 0;
1390 UBool isError
= FALSE
;
1391 U16_APPEND(buffer
, _length
, U16_MAX_LENGTH
, srcChar
, isError
);
1392 // We test isError so that the compiler does not complain that we don't.
1393 // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1394 return isError
? *this : doAppend(buffer
, 0, _length
);
1398 UnicodeString::doReplace( int32_t start
,
1400 const UnicodeString
& src
,
1404 // pin the indices to legal values
1405 src
.pinIndices(srcStart
, srcLength
);
1407 // get the characters from src
1408 // and replace the range in ourselves with them
1409 return doReplace(start
, length
, src
.getArrayStart(), srcStart
, srcLength
);
1413 UnicodeString::doReplace(int32_t start
,
1415 const UChar
*srcChars
,
1423 int32_t oldLength
= this->length();
1425 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1426 if((fUnion
.fFields
.fLengthAndFlags
&kBufferIsReadonly
) && srcLength
== 0) {
1428 // remove prefix by adjusting the array pointer
1430 fUnion
.fFields
.fArray
+= length
;
1431 fUnion
.fFields
.fCapacity
-= length
;
1432 setLength(oldLength
- length
);
1436 if(length
>= (oldLength
- start
)) {
1437 // remove suffix by reducing the length (like truncate())
1439 fUnion
.fFields
.fCapacity
= start
; // not NUL-terminated any more
1445 if(start
== oldLength
) {
1446 return doAppend(srcChars
, srcStart
, srcLength
);
1450 srcStart
= srcLength
= 0;
1451 } else if(srcLength
< 0) {
1452 // get the srcLength if necessary
1453 srcLength
= u_strlen(srcChars
+ srcStart
);
1456 // pin the indices to legal values
1457 pinIndices(start
, length
);
1459 // Calculate the size of the string after the replace.
1460 // Avoid int32_t overflow.
1461 int32_t newLength
= oldLength
- length
;
1462 if(srcLength
> (INT32_MAX
- newLength
)) {
1466 newLength
+= srcLength
;
1468 // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
1469 // therefore we need to keep the current fArray
1470 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1472 if((fUnion
.fFields
.fLengthAndFlags
&kUsingStackBuffer
) && (newLength
> US_STACKBUF_SIZE
)) {
1473 // copy the stack buffer contents because it will be overwritten with
1474 // fUnion.fFields values
1475 u_memcpy(oldStackBuffer
, fUnion
.fStackFields
.fBuffer
, oldLength
);
1476 oldArray
= oldStackBuffer
;
1478 oldArray
= getArrayStart();
1481 // clone our array and allocate a bigger array if needed
1482 int32_t *bufferToDelete
= 0;
1483 if(!cloneArrayIfNeeded(newLength
, getGrowCapacity(newLength
),
1484 FALSE
, &bufferToDelete
)
1489 // now do the replace
1491 UChar
*newArray
= getArrayStart();
1492 if(newArray
!= oldArray
) {
1493 // if fArray changed, then we need to copy everything except what will change
1494 us_arrayCopy(oldArray
, 0, newArray
, 0, start
);
1495 us_arrayCopy(oldArray
, start
+ length
,
1496 newArray
, start
+ srcLength
,
1497 oldLength
- (start
+ length
));
1498 } else if(length
!= srcLength
) {
1499 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1500 us_arrayCopy(oldArray
, start
+ length
,
1501 newArray
, start
+ srcLength
,
1502 oldLength
- (start
+ length
));
1505 // now fill in the hole with the new string
1506 us_arrayCopy(srcChars
, srcStart
, newArray
, start
, srcLength
);
1508 setLength(newLength
);
1510 // delayed delete in case srcChars == fArray when we started, and
1511 // to keep oldArray alive for the above operations
1512 if (bufferToDelete
) {
1513 uprv_free(bufferToDelete
);
1519 // Versions of doReplace() only for append() variants.
1520 // doReplace() and doAppend() optimize for different cases.
1523 UnicodeString::doAppend(const UnicodeString
& src
, int32_t srcStart
, int32_t srcLength
) {
1524 if(srcLength
== 0) {
1528 // pin the indices to legal values
1529 src
.pinIndices(srcStart
, srcLength
);
1530 return doAppend(src
.getArrayStart(), srcStart
, srcLength
);
1534 UnicodeString::doAppend(const UChar
*srcChars
, int32_t srcStart
, int32_t srcLength
) {
1535 if(!isWritable() || srcLength
== 0 || srcChars
== NULL
) {
1540 // get the srcLength if necessary
1541 if((srcLength
= u_strlen(srcChars
+ srcStart
)) == 0) {
1546 int32_t oldLength
= length();
1547 int32_t newLength
= oldLength
+ srcLength
;
1548 // optimize append() onto a large-enough, owned string
1549 if((newLength
<= getCapacity() && isBufferWritable()) ||
1550 cloneArrayIfNeeded(newLength
, getGrowCapacity(newLength
))) {
1551 UChar
*newArray
= getArrayStart();
1552 // Do not copy characters when
1553 // UChar *buffer=str.getAppendBuffer(...);
1555 // str.append(buffer, length);
1557 // str.appendString(buffer, length)
1559 if(srcChars
+ srcStart
!= newArray
+ oldLength
) {
1560 us_arrayCopy(srcChars
, srcStart
, newArray
, oldLength
, srcLength
);
1562 setLength(newLength
);
1571 UnicodeString::handleReplaceBetween(int32_t start
,
1573 const UnicodeString
& text
) {
1574 replaceBetween(start
, limit
, text
);
1581 UnicodeString::copy(int32_t start
, int32_t limit
, int32_t dest
) {
1582 if (limit
<= start
) {
1583 return; // Nothing to do; avoid bogus malloc call
1585 UChar
* text
= (UChar
*) uprv_malloc( sizeof(UChar
) * (limit
- start
) );
1586 // Check to make sure text is not null.
1588 extractBetween(start
, limit
, text
, 0);
1589 insert(dest
, text
, 0, limit
- start
);
1597 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1598 * so we implement this function here.
1600 UBool
Replaceable::hasMetaData() const {
1607 UBool
UnicodeString::hasMetaData() const {
1612 UnicodeString::doReverse(int32_t start
, int32_t length
) {
1613 if(length
<= 1 || !cloneArrayIfNeeded()) {
1617 // pin the indices to legal values
1618 pinIndices(start
, length
);
1619 if(length
<= 1) { // pinIndices() might have shrunk the length
1623 UChar
*left
= getArrayStart() + start
;
1624 UChar
*right
= left
+ length
- 1; // -1 for inclusive boundary (length>=2)
1626 UBool hasSupplementary
= FALSE
;
1628 // Before the loop we know left<right because length>=2.
1630 hasSupplementary
|= (UBool
)U16_IS_LEAD(swap
= *left
);
1631 hasSupplementary
|= (UBool
)U16_IS_LEAD(*left
++ = *right
);
1633 } while(left
< right
);
1634 // Make sure to test the middle code unit of an odd-length string.
1635 // Redundant if the length is even.
1636 hasSupplementary
|= (UBool
)U16_IS_LEAD(*left
);
1638 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1639 if(hasSupplementary
) {
1642 left
= getArrayStart() + start
;
1643 right
= left
+ length
- 1; // -1 so that we can look at *(left+1) if left<right
1644 while(left
< right
) {
1645 if(U16_IS_TRAIL(swap
= *left
) && U16_IS_LEAD(swap2
= *(left
+ 1))) {
1658 UnicodeString::padLeading(int32_t targetLength
,
1661 int32_t oldLength
= length();
1662 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1665 // move contents up by padding width
1666 UChar
*array
= getArrayStart();
1667 int32_t start
= targetLength
- oldLength
;
1668 us_arrayCopy(array
, 0, array
, start
, oldLength
);
1670 // fill in padding character
1671 while(--start
>= 0) {
1672 array
[start
] = padChar
;
1674 setLength(targetLength
);
1680 UnicodeString::padTrailing(int32_t targetLength
,
1683 int32_t oldLength
= length();
1684 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1687 // fill in padding character
1688 UChar
*array
= getArrayStart();
1689 int32_t length
= targetLength
;
1690 while(--length
>= oldLength
) {
1691 array
[length
] = padChar
;
1693 setLength(targetLength
);
1698 //========================================
1700 //========================================
1702 UnicodeString::doHashCode() const
1704 /* Delegate hash computation to uhash. This makes UnicodeString
1705 * hashing consistent with UChar* hashing. */
1706 int32_t hashCode
= ustr_hashUCharsN(getArrayStart(), length());
1707 if (hashCode
== kInvalidHashCode
) {
1708 hashCode
= kEmptyHashCode
;
1713 //========================================
1715 //========================================
1718 UnicodeString::getBuffer(int32_t minCapacity
) {
1719 if(minCapacity
>=-1 && cloneArrayIfNeeded(minCapacity
)) {
1720 fUnion
.fFields
.fLengthAndFlags
|=kOpenGetBuffer
;
1722 return getArrayStart();
1729 UnicodeString::releaseBuffer(int32_t newLength
) {
1730 if(fUnion
.fFields
.fLengthAndFlags
&kOpenGetBuffer
&& newLength
>=-1) {
1731 // set the new fLength
1732 int32_t capacity
=getCapacity();
1734 // the new length is the string length, capped by fCapacity
1735 const UChar
*array
=getArrayStart(), *p
=array
, *limit
=array
+capacity
;
1736 while(p
<limit
&& *p
!=0) {
1739 newLength
=(int32_t)(p
-array
);
1740 } else if(newLength
>capacity
) {
1743 setLength(newLength
);
1744 fUnion
.fFields
.fLengthAndFlags
&=~kOpenGetBuffer
;
1748 //========================================
1750 //========================================
1752 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity
,
1753 int32_t growCapacity
,
1755 int32_t **pBufferToDelete
,
1757 // default parameters need to be static, therefore
1758 // the defaults are -1 to have convenience defaults
1759 if(newCapacity
== -1) {
1760 newCapacity
= getCapacity();
1763 // while a getBuffer(minCapacity) is "open",
1764 // prevent any modifications of the string by returning FALSE here
1765 // if the string is bogus, then only an assignment or similar can revive it
1771 * We need to make a copy of the array if
1772 * the buffer is read-only, or
1773 * the buffer is refCounted (shared), and refCount>1, or
1774 * the buffer is too small.
1775 * Return FALSE if memory could not be allocated.
1778 fUnion
.fFields
.fLengthAndFlags
& kBufferIsReadonly
||
1779 (fUnion
.fFields
.fLengthAndFlags
& kRefCounted
&& refCount() > 1) ||
1780 newCapacity
> getCapacity()
1782 // check growCapacity for default value and use of the stack buffer
1783 if(growCapacity
< 0) {
1784 growCapacity
= newCapacity
;
1785 } else if(newCapacity
<= US_STACKBUF_SIZE
&& growCapacity
> US_STACKBUF_SIZE
) {
1786 growCapacity
= US_STACKBUF_SIZE
;
1790 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1792 int32_t oldLength
= length();
1793 int16_t flags
= fUnion
.fFields
.fLengthAndFlags
;
1795 if(flags
&kUsingStackBuffer
) {
1796 U_ASSERT(!(flags
&kRefCounted
)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1797 if(doCopyArray
&& growCapacity
> US_STACKBUF_SIZE
) {
1798 // copy the stack buffer contents because it will be overwritten with
1799 // fUnion.fFields values
1800 us_arrayCopy(fUnion
.fStackFields
.fBuffer
, 0, oldStackBuffer
, 0, oldLength
);
1801 oldArray
= oldStackBuffer
;
1803 oldArray
= NULL
; // no need to copy from the stack buffer to itself
1806 oldArray
= fUnion
.fFields
.fArray
;
1807 U_ASSERT(oldArray
!=NULL
); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1810 // allocate a new array
1811 if(allocate(growCapacity
) ||
1812 (newCapacity
< growCapacity
&& allocate(newCapacity
))
1815 // copy the contents
1816 // do not copy more than what fits - it may be smaller than before
1817 int32_t minLength
= oldLength
;
1818 newCapacity
= getCapacity();
1819 if(newCapacity
< minLength
) {
1820 minLength
= newCapacity
;
1822 if(oldArray
!= NULL
) {
1823 us_arrayCopy(oldArray
, 0, getArrayStart(), 0, minLength
);
1825 setLength(minLength
);
1830 // release the old array
1831 if(flags
& kRefCounted
) {
1832 // the array is refCounted; decrement and release if 0
1833 u_atomic_int32_t
*pRefCount
= ((u_atomic_int32_t
*)oldArray
- 1);
1834 if(umtx_atomic_dec(pRefCount
) == 0) {
1835 if(pBufferToDelete
== 0) {
1836 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1837 // is defined as volatile. (Volatile has useful non-standard behavior
1838 // with this compiler.)
1839 uprv_free((void *)pRefCount
);
1841 // the caller requested to delete it himself
1842 *pBufferToDelete
= (int32_t *)pRefCount
;
1847 // not enough memory for growCapacity and not even for the smaller newCapacity
1848 // reset the old values for setToBogus() to release the array
1849 if(!(flags
&kUsingStackBuffer
)) {
1850 fUnion
.fFields
.fArray
= oldArray
;
1852 fUnion
.fFields
.fLengthAndFlags
= flags
;
1860 // UnicodeStringAppendable ------------------------------------------------- ***
1862 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1865 UnicodeStringAppendable::appendCodeUnit(UChar c
) {
1866 return str
.doAppend(&c
, 0, 1).isWritable();
1870 UnicodeStringAppendable::appendCodePoint(UChar32 c
) {
1871 UChar buffer
[U16_MAX_LENGTH
];
1872 int32_t cLength
= 0;
1873 UBool isError
= FALSE
;
1874 U16_APPEND(buffer
, cLength
, U16_MAX_LENGTH
, c
, isError
);
1875 return !isError
&& str
.doAppend(buffer
, 0, cLength
).isWritable();
1879 UnicodeStringAppendable::appendString(const UChar
*s
, int32_t length
) {
1880 return str
.doAppend(s
, 0, length
).isWritable();
1884 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity
) {
1885 return str
.cloneArrayIfNeeded(str
.length() + appendCapacity
);
1889 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity
,
1890 int32_t desiredCapacityHint
,
1891 UChar
*scratch
, int32_t scratchCapacity
,
1892 int32_t *resultCapacity
) {
1893 if(minCapacity
< 1 || scratchCapacity
< minCapacity
) {
1894 *resultCapacity
= 0;
1897 int32_t oldLength
= str
.length();
1898 if(minCapacity
<= (kMaxCapacity
- oldLength
) &&
1899 desiredCapacityHint
<= (kMaxCapacity
- oldLength
) &&
1900 str
.cloneArrayIfNeeded(oldLength
+ minCapacity
, oldLength
+ desiredCapacityHint
)) {
1901 *resultCapacity
= str
.getCapacity() - oldLength
;
1902 return str
.getArrayStart() + oldLength
;
1904 *resultCapacity
= scratchCapacity
;
1912 U_CAPI
int32_t U_EXPORT2
1913 uhash_hashUnicodeString(const UElement key
) {
1914 const UnicodeString
*str
= (const UnicodeString
*) key
.pointer
;
1915 return (str
== NULL
) ? 0 : str
->hashCode();
1918 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1919 // does not depend on hashtable code.
1920 U_CAPI UBool U_EXPORT2
1921 uhash_compareUnicodeString(const UElement key1
, const UElement key2
) {
1922 const UnicodeString
*str1
= (const UnicodeString
*) key1
.pointer
;
1923 const UnicodeString
*str2
= (const UnicodeString
*) key2
.pointer
;
1927 if (str1
== NULL
|| str2
== NULL
) {
1930 return *str1
== *str2
;
1933 #ifdef U_STATIC_IMPLEMENTATION
1935 This should never be called. It is defined here to make sure that the
1936 virtual vector deleting destructor is defined within unistr.cpp.
1937 The vector deleting destructor is already a part of UObject,
1938 but defining it here makes sure that it is included with this object file.
1939 This makes sure that static library dependencies are kept to a minimum.
1941 static void uprv_UnicodeStringDummy(void) {
1942 delete [] (new UnicodeString
[2]);