2 ******************************************************************************
3 * Copyright (C) 1999-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
21 #include "unicode/utypes.h"
22 #include "unicode/appendable.h"
23 #include "unicode/putil.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/utf.h"
29 #include "unicode/utf16.h"
42 print(const UnicodeString
& s
,
47 for(int i
= 0; i
< s
.length(); ++i
) {
49 if(c
>= 0x007E || c
< 0x0020)
50 cout
<< "[0x" << hex
<< s
[i
] << "]";
64 for(int i
= 0; i
< len
; ++i
) {
66 if(c
>= 0x007E || c
< 0x0020)
67 cout
<< "[0x" << hex
<< s
[i
] << "]";
76 // Local function definitions for now
78 // need to copy areas that may overlap
81 us_arrayCopy(const UChar
*src
, int32_t srcStart
,
82 UChar
*dst
, int32_t dstStart
, int32_t count
)
85 uprv_memmove(dst
+dstStart
, src
+srcStart
, (size_t)(count
*sizeof(*src
)));
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
91 static UChar U_CALLCONV
92 UnicodeString_charAt(int32_t offset
, void *context
) {
93 return ((icu::UnicodeString
*) context
)->charAt(offset
);
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
102 Replaceable::~Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString
)
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString
&s1
, const UnicodeString
&s2
) {
109 UnicodeString(s1
.length()+s2
.length()+1, (UChar32
)0, 0).
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
120 UnicodeString::addRef() {
121 umtx_atomic_inc((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1);
125 UnicodeString::removeRef() {
126 return umtx_atomic_dec((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1);
130 UnicodeString::refCount() const {
131 return umtx_loadAcquire(*((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1));
135 UnicodeString::releaseArray() {
136 if((fUnion
.fFields
.fLengthAndFlags
& kRefCounted
) && removeRef() == 0) {
137 uprv_free((int32_t *)fUnion
.fFields
.fArray
- 1);
143 //========================================
145 //========================================
147 // The default constructor is inline in unistr.h.
149 UnicodeString::UnicodeString(int32_t capacity
, UChar32 c
, int32_t count
) {
150 fUnion
.fFields
.fLengthAndFlags
= 0;
151 if(count
<= 0 || (uint32_t)c
> 0x10ffff) {
152 // just allocate and do not do anything else
155 // count > 0, allocate and fill the new string with count c's
156 int32_t unitCount
= U16_LENGTH(c
), length
= count
* unitCount
;
157 if(capacity
< length
) {
160 if(allocate(capacity
)) {
161 UChar
*array
= getArrayStart();
164 // fill the new string with c
166 // fill with length UChars
168 array
[i
++] = (UChar
)c
;
171 // get the code units for c
172 UChar units
[U16_MAX_LENGTH
];
173 U16_APPEND_UNSAFE(units
, i
, c
);
175 // now it must be i==unitCount
178 // for Unicode, unitCount can only be 1, 2, 3, or 4
179 // 1 is handled above
182 while(unitIdx
< unitCount
) {
183 array
[i
++]=units
[unitIdx
++];
192 UnicodeString::UnicodeString(UChar ch
) {
193 fUnion
.fFields
.fLengthAndFlags
= kLength1
| kShortString
;
194 fUnion
.fStackFields
.fBuffer
[0] = ch
;
197 UnicodeString::UnicodeString(UChar32 ch
) {
198 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
200 UBool isError
= FALSE
;
201 U16_APPEND(fUnion
.fStackFields
.fBuffer
, i
, US_STACKBUF_SIZE
, ch
, isError
);
202 // We test isError so that the compiler does not complain that we don't.
203 // If isError then i==0 which is what we want anyway.
209 UnicodeString::UnicodeString(const UChar
*text
) {
210 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
211 doAppend(text
, 0, -1);
214 UnicodeString::UnicodeString(const UChar
*text
,
215 int32_t textLength
) {
216 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
217 doAppend(text
, 0, textLength
);
220 UnicodeString::UnicodeString(UBool isTerminated
,
222 int32_t textLength
) {
223 fUnion
.fFields
.fLengthAndFlags
= kReadonlyAlias
;
225 // treat as an empty string, do not alias
227 } else if(textLength
< -1 ||
228 (textLength
== -1 && !isTerminated
) ||
229 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
233 if(textLength
== -1) {
234 // text is terminated, or else it would have failed the above test
235 textLength
= u_strlen(text
);
237 setArray((UChar
*)text
, textLength
, isTerminated
? textLength
+ 1 : textLength
);
241 UnicodeString::UnicodeString(UChar
*buff
,
243 int32_t buffCapacity
) {
244 fUnion
.fFields
.fLengthAndFlags
= kWritableAlias
;
246 // treat as an empty string, do not alias
248 } else if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
251 if(buffLength
== -1) {
252 // fLength = u_strlen(buff); but do not look beyond buffCapacity
253 const UChar
*p
= buff
, *limit
= buff
+ buffCapacity
;
254 while(p
!= limit
&& *p
!= 0) {
257 buffLength
= (int32_t)(p
- buff
);
259 setArray(buff
, buffLength
, buffCapacity
);
263 UnicodeString::UnicodeString(const char *src
, int32_t length
, EInvariant
) {
264 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
266 // treat as an empty string
269 length
=(int32_t)uprv_strlen(src
);
271 if(cloneArrayIfNeeded(length
, length
, FALSE
)) {
272 u_charsToUChars(src
, getArrayStart(), length
);
280 #if U_CHARSET_IS_UTF8
282 UnicodeString::UnicodeString(const char *codepageData
) {
283 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
284 if(codepageData
!= 0) {
285 setToUTF8(codepageData
);
289 UnicodeString::UnicodeString(const char *codepageData
, int32_t dataLength
) {
290 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
291 // if there's nothing to convert, do nothing
292 if(codepageData
== 0 || dataLength
== 0 || dataLength
< -1) {
295 if(dataLength
== -1) {
296 dataLength
= (int32_t)uprv_strlen(codepageData
);
298 setToUTF8(StringPiece(codepageData
, dataLength
));
301 // else see unistr_cnv.cpp
304 UnicodeString::UnicodeString(const UnicodeString
& that
) {
305 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
309 #if U_HAVE_RVALUE_REFERENCES
310 UnicodeString::UnicodeString(UnicodeString
&&src
) U_NOEXCEPT
{
311 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
316 UnicodeString::UnicodeString(const UnicodeString
& that
,
318 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
319 setTo(that
, srcStart
);
322 UnicodeString::UnicodeString(const UnicodeString
& that
,
325 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
326 setTo(that
, srcStart
, srcLength
);
329 // Replaceable base class clone() default implementation, does not clone
331 Replaceable::clone() const {
335 // UnicodeString overrides clone() with a real implementation
337 UnicodeString::clone() const {
338 return new UnicodeString(*this);
341 //========================================
343 //========================================
346 UnicodeString::allocate(int32_t capacity
) {
347 if(capacity
<= US_STACKBUF_SIZE
) {
348 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
350 // count bytes for the refCounter and the string capacity, and
351 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
352 // to be safely aligned for the refCount
353 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
354 int32_t words
= (int32_t)(((sizeof(int32_t) + (capacity
+ 1) * U_SIZEOF_UCHAR
+ 15) & ~15) >> 2);
355 int32_t *array
= (int32_t*) uprv_malloc( sizeof(int32_t) * words
);
357 // set initial refCount and point behind the refCount
360 // have fArray point to the first UChar
361 fUnion
.fFields
.fArray
= (UChar
*)array
;
362 fUnion
.fFields
.fCapacity
= (int32_t)((words
- 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR
));
363 fUnion
.fFields
.fLengthAndFlags
= kLongString
;
365 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
366 fUnion
.fFields
.fArray
= 0;
367 fUnion
.fFields
.fCapacity
= 0;
374 //========================================
376 //========================================
378 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
379 static u_atomic_int32_t finalLengthCounts
[0x400]; // UnicodeString::kMaxShortLength+1
380 static u_atomic_int32_t
beyondCount(0);
382 U_CAPI
void unistr_printLengths() {
384 for(i
= 0; i
<= 59; ++i
) {
385 printf("%2d, %9d\n", i
, (int32_t)finalLengthCounts
[i
]);
387 int32_t beyond
= beyondCount
;
388 for(; i
< UPRV_LENGTHOF(finalLengthCounts
); ++i
) {
389 beyond
+= finalLengthCounts
[i
];
391 printf(">59, %9d\n", beyond
);
395 UnicodeString::~UnicodeString()
397 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
398 // Count lengths of strings at the end of their lifetime.
399 // Useful for discussion of a desirable stack buffer size.
400 // Count the contents length, not the optional NUL terminator nor further capacity.
401 // Ignore open-buffer strings and strings which alias external storage.
402 if((fUnion
.fFields
.fLengthAndFlags
&(kOpenGetBuffer
|kReadonlyAlias
|kWritableAlias
)) == 0) {
403 if(hasShortLength()) {
404 umtx_atomic_inc(finalLengthCounts
+ getShortLength());
406 umtx_atomic_inc(&beyondCount
);
414 //========================================
416 //========================================
418 UnicodeString
UnicodeString::fromUTF8(const StringPiece
&utf8
) {
419 UnicodeString result
;
420 result
.setToUTF8(utf8
);
424 UnicodeString
UnicodeString::fromUTF32(const UChar32
*utf32
, int32_t length
) {
425 UnicodeString result
;
427 // Most UTF-32 strings will be BMP-only and result in a same-length
428 // UTF-16 string. We overestimate the capacity just slightly,
429 // just in case there are a few supplementary characters.
430 if(length
<= US_STACKBUF_SIZE
) {
431 capacity
= US_STACKBUF_SIZE
;
433 capacity
= length
+ (length
>> 4) + 4;
436 UChar
*utf16
= result
.getBuffer(capacity
);
438 UErrorCode errorCode
= U_ZERO_ERROR
;
439 u_strFromUTF32WithSub(utf16
, result
.getCapacity(), &length16
,
441 0xfffd, // Substitution character.
442 NULL
, // Don't care about number of substitutions.
444 result
.releaseBuffer(length16
);
445 if(errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
446 capacity
= length16
+ 1; // +1 for the terminating NUL.
448 } else if(U_FAILURE(errorCode
)) {
456 //========================================
458 //========================================
461 UnicodeString::operator=(const UnicodeString
&src
) {
462 return copyFrom(src
);
466 UnicodeString::fastCopyFrom(const UnicodeString
&src
) {
467 return copyFrom(src
, TRUE
);
471 UnicodeString::copyFrom(const UnicodeString
&src
, UBool fastCopy
) {
472 // if assigning to ourselves, do nothing
477 // is the right side bogus?
483 // delete the current contents
487 // empty string - use the stack buffer
492 // fLength>0 and not an "open" src.getBuffer(minCapacity)
493 fUnion
.fFields
.fLengthAndFlags
= src
.fUnion
.fFields
.fLengthAndFlags
;
494 switch(src
.fUnion
.fFields
.fLengthAndFlags
& kAllStorageFlags
) {
496 // short string using the stack buffer, do the same
497 uprv_memcpy(fUnion
.fStackFields
.fBuffer
, src
.fUnion
.fStackFields
.fBuffer
,
498 getShortLength() * U_SIZEOF_UCHAR
);
501 // src uses a refCounted string buffer, use that buffer with refCount
502 // src is const, use a cast - we don't actually change it
503 ((UnicodeString
&)src
).addRef();
504 // copy all fields, share the reference-counted buffer
505 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
506 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
507 if(!hasShortLength()) {
508 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
513 // src is a readonly alias, do the same
514 // -> maintain the readonly alias as such
515 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
516 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
517 if(!hasShortLength()) {
518 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
522 // else if(!fastCopy) fall through to case kWritableAlias
523 // -> allocate a new buffer and copy the contents
525 case kWritableAlias
: {
526 // src is a writable alias; we make a copy of that instead
527 int32_t srcLength
= src
.length();
528 if(allocate(srcLength
)) {
529 uprv_memcpy(getArrayStart(), src
.getArrayStart(), srcLength
* U_SIZEOF_UCHAR
);
530 setLength(srcLength
);
533 // if there is not enough memory, then fall through to setting to bogus
537 // if src is bogus, set ourselves to bogus
538 // do not call setToBogus() here because fArray and flags are not consistent here
539 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
540 fUnion
.fFields
.fArray
= 0;
541 fUnion
.fFields
.fCapacity
= 0;
548 UnicodeString
&UnicodeString::moveFrom(UnicodeString
&src
) U_NOEXCEPT
{
549 // No explicit check for self move assignment, consistent with standard library.
550 // Self move assignment causes no crash nor leak but might make the object bogus.
552 copyFieldsFrom(src
, TRUE
);
556 // Same as moveFrom() except without memory management.
557 void UnicodeString::copyFieldsFrom(UnicodeString
&src
, UBool setSrcToBogus
) U_NOEXCEPT
{
558 int16_t lengthAndFlags
= fUnion
.fFields
.fLengthAndFlags
= src
.fUnion
.fFields
.fLengthAndFlags
;
559 if(lengthAndFlags
& kUsingStackBuffer
) {
560 // Short string using the stack buffer, copy the contents.
561 // Check for self assignment to prevent "overlap in memcpy" warnings,
562 // although it should be harmless to copy a buffer to itself exactly.
564 uprv_memcpy(fUnion
.fStackFields
.fBuffer
, src
.fUnion
.fStackFields
.fBuffer
,
565 getShortLength() * U_SIZEOF_UCHAR
);
568 // In all other cases, copy all fields.
569 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
570 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
571 if(!hasShortLength()) {
572 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
575 // Set src to bogus without releasing any memory.
576 src
.fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
577 src
.fUnion
.fFields
.fArray
= NULL
;
578 src
.fUnion
.fFields
.fCapacity
= 0;
583 void UnicodeString::swap(UnicodeString
&other
) U_NOEXCEPT
{
584 UnicodeString temp
; // Empty short string: Known not to need releaseArray().
585 // Copy fields without resetting source values in between.
586 temp
.copyFieldsFrom(*this, FALSE
);
587 this->copyFieldsFrom(other
, FALSE
);
588 other
.copyFieldsFrom(temp
, FALSE
);
589 // Set temp to an empty string so that other's memory is not released twice.
590 temp
.fUnion
.fFields
.fLengthAndFlags
= kShortString
;
593 //========================================
594 // Miscellaneous operations
595 //========================================
597 UnicodeString
UnicodeString::unescape() const {
598 UnicodeString
result(length(), (UChar32
)0, (int32_t)0); // construct with capacity
599 if (result
.isBogus()) {
602 const UChar
*array
= getBuffer();
603 int32_t len
= length();
605 for (int32_t i
=0;;) {
607 result
.append(array
, prev
, len
- prev
);
610 if (array
[i
++] == 0x5C /*'\\'*/) {
611 result
.append(array
, prev
, (i
- 1) - prev
);
612 UChar32 c
= unescapeAt(i
); // advances i
614 result
.remove(); // return empty string
615 break; // invalid escape sequence
624 UChar32
UnicodeString::unescapeAt(int32_t &offset
) const {
625 return u_unescapeAt(UnicodeString_charAt
, &offset
, length(), (void*)this);
628 //========================================
629 // Read-only implementation
630 //========================================
632 UnicodeString::doEquals(const UnicodeString
&text
, int32_t len
) const {
633 // Requires: this & text not bogus and have same lengths.
634 // Byte-wise comparison works for equality regardless of endianness.
635 return uprv_memcmp(getArrayStart(), text
.getArrayStart(), len
* U_SIZEOF_UCHAR
) == 0;
639 UnicodeString::doCompare( int32_t start
,
641 const UChar
*srcChars
,
643 int32_t srcLength
) const
645 // compare illegal string values
650 // pin indices to legal values
651 pinIndices(start
, length
);
653 if(srcChars
== NULL
) {
654 // treat const UChar *srcChars==NULL as an empty string
655 return length
== 0 ? 0 : 1;
658 // get the correct pointer
659 const UChar
*chars
= getArrayStart();
662 srcChars
+= srcStart
;
667 // get the srcLength if necessary
669 srcLength
= u_strlen(srcChars
+ srcStart
);
672 // are we comparing different lengths?
673 if(length
!= srcLength
) {
674 if(length
< srcLength
) {
678 minLength
= srcLength
;
687 * note that uprv_memcmp() returns an int but we return an int8_t;
688 * we need to take care not to truncate the result -
689 * one way to do this is to right-shift the value to
690 * move the sign bit into the lower 8 bits and making sure that this
691 * does not become 0 itself
694 if(minLength
> 0 && chars
!= srcChars
) {
698 // big-endian: byte comparison works
699 result
= uprv_memcmp(chars
, srcChars
, minLength
* sizeof(UChar
));
701 return (int8_t)(result
>> 15 | 1);
704 // little-endian: compare UChar units
706 result
= ((int32_t)*(chars
++) - (int32_t)*(srcChars
++));
708 return (int8_t)(result
>> 15 | 1);
710 } while(--minLength
> 0);
716 /* String compare in code point order - doCompare() compares in code unit order. */
718 UnicodeString::doCompareCodePointOrder(int32_t start
,
720 const UChar
*srcChars
,
722 int32_t srcLength
) const
724 // compare illegal string values
725 // treat const UChar *srcChars==NULL as an empty string
730 // pin indices to legal values
731 pinIndices(start
, length
);
733 if(srcChars
== NULL
) {
734 srcStart
= srcLength
= 0;
737 int32_t diff
= uprv_strCompare(getArrayStart() + start
, length
, (srcChars
!=NULL
)?(srcChars
+ srcStart
):NULL
, srcLength
, FALSE
, TRUE
);
738 /* translate the 32-bit result into an 8-bit one */
740 return (int8_t)(diff
>> 15 | 1);
747 UnicodeString::getLength() const {
752 UnicodeString::getCharAt(int32_t offset
) const {
753 return charAt(offset
);
757 UnicodeString::getChar32At(int32_t offset
) const {
758 return char32At(offset
);
762 UnicodeString::char32At(int32_t offset
) const
764 int32_t len
= length();
765 if((uint32_t)offset
< (uint32_t)len
) {
766 const UChar
*array
= getArrayStart();
768 U16_GET(array
, 0, offset
, len
, c
);
771 return kInvalidUChar
;
776 UnicodeString::getChar32Start(int32_t offset
) const {
777 if((uint32_t)offset
< (uint32_t)length()) {
778 const UChar
*array
= getArrayStart();
779 U16_SET_CP_START(array
, 0, offset
);
787 UnicodeString::getChar32Limit(int32_t offset
) const {
788 int32_t len
= length();
789 if((uint32_t)offset
< (uint32_t)len
) {
790 const UChar
*array
= getArrayStart();
791 U16_SET_CP_LIMIT(array
, 0, offset
, len
);
799 UnicodeString::countChar32(int32_t start
, int32_t length
) const {
800 pinIndices(start
, length
);
801 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
802 return u_countChar32(getArrayStart()+start
, length
);
806 UnicodeString::hasMoreChar32Than(int32_t start
, int32_t length
, int32_t number
) const {
807 pinIndices(start
, length
);
808 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
809 return u_strHasMoreChar32Than(getArrayStart()+start
, length
, number
);
813 UnicodeString::moveIndex32(int32_t index
, int32_t delta
) const {
815 int32_t len
= length();
818 } else if(index
>len
) {
822 const UChar
*array
= getArrayStart();
824 U16_FWD_N(array
, index
, len
, delta
);
826 U16_BACK_N(array
, 0, index
, -delta
);
833 UnicodeString::doExtract(int32_t start
,
836 int32_t dstStart
) const
838 // pin indices to legal values
839 pinIndices(start
, length
);
841 // do not copy anything if we alias dst itself
842 const UChar
*array
= getArrayStart();
843 if(array
+ start
!= dst
+ dstStart
) {
844 us_arrayCopy(array
, start
, dst
, dstStart
, length
);
849 UnicodeString::extract(UChar
*dest
, int32_t destCapacity
,
850 UErrorCode
&errorCode
) const {
851 int32_t len
= length();
852 if(U_SUCCESS(errorCode
)) {
853 if(isBogus() || destCapacity
<0 || (destCapacity
>0 && dest
==0)) {
854 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
856 const UChar
*array
= getArrayStart();
857 if(len
>0 && len
<=destCapacity
&& array
!=dest
) {
858 uprv_memcpy(dest
, array
, len
*U_SIZEOF_UCHAR
);
860 return u_terminateUChars(dest
, destCapacity
, len
, &errorCode
);
868 UnicodeString::extract(int32_t start
,
871 int32_t targetCapacity
,
872 enum EInvariant
) const
874 // if the arguments are illegal, then do nothing
875 if(targetCapacity
< 0 || (targetCapacity
> 0 && target
== NULL
)) {
879 // pin the indices to legal values
880 pinIndices(start
, length
);
882 if(length
<= targetCapacity
) {
883 u_UCharsToChars(getArrayStart() + start
, target
, length
);
885 UErrorCode status
= U_ZERO_ERROR
;
886 return u_terminateChars(target
, targetCapacity
, length
, &status
);
890 UnicodeString::tempSubString(int32_t start
, int32_t len
) const {
891 pinIndices(start
, len
);
892 const UChar
*array
= getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
894 array
=fUnion
.fStackFields
.fBuffer
; // anything not NULL because that would make an empty string
895 len
=-2; // bogus result string
897 return UnicodeString(FALSE
, array
+ start
, len
);
901 UnicodeString::toUTF8(int32_t start
, int32_t len
,
902 char *target
, int32_t capacity
) const {
903 pinIndices(start
, len
);
905 UErrorCode errorCode
= U_ZERO_ERROR
;
906 u_strToUTF8WithSub(target
, capacity
, &length8
,
907 getBuffer() + start
, len
,
908 0xFFFD, // Standard substitution character.
909 NULL
, // Don't care about number of substitutions.
914 #if U_CHARSET_IS_UTF8
917 UnicodeString::extract(int32_t start
, int32_t len
,
918 char *target
, uint32_t dstSize
) const {
919 // if the arguments are illegal, then do nothing
920 if(/*dstSize < 0 || */(dstSize
> 0 && target
== 0)) {
923 return toUTF8(start
, len
, target
, dstSize
<= 0x7fffffff ? (int32_t)dstSize
: 0x7fffffff);
926 // else see unistr_cnv.cpp
930 UnicodeString::extractBetween(int32_t start
,
932 UnicodeString
& target
) const {
935 doExtract(start
, limit
- start
, target
);
938 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
939 // as many bytes as the source has UChars.
940 // The "worst cases" are writing systems like Indic, Thai and CJK with
943 UnicodeString::toUTF8(ByteSink
&sink
) const {
944 int32_t length16
= length();
946 char stackBuffer
[1024];
947 int32_t capacity
= (int32_t)sizeof(stackBuffer
);
948 UBool utf8IsOwned
= FALSE
;
949 char *utf8
= sink
.GetAppendBuffer(length16
< capacity
? length16
: capacity
,
951 stackBuffer
, capacity
,
954 UErrorCode errorCode
= U_ZERO_ERROR
;
955 u_strToUTF8WithSub(utf8
, capacity
, &length8
,
956 getBuffer(), length16
,
957 0xFFFD, // Standard substitution character.
958 NULL
, // Don't care about number of substitutions.
960 if(errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
961 utf8
= (char *)uprv_malloc(length8
);
964 errorCode
= U_ZERO_ERROR
;
965 u_strToUTF8WithSub(utf8
, length8
, &length8
,
966 getBuffer(), length16
,
967 0xFFFD, // Standard substitution character.
968 NULL
, // Don't care about number of substitutions.
971 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
974 if(U_SUCCESS(errorCode
)) {
975 sink
.Append(utf8
, length8
);
985 UnicodeString::toUTF32(UChar32
*utf32
, int32_t capacity
, UErrorCode
&errorCode
) const {
987 if(U_SUCCESS(errorCode
)) {
988 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
989 u_strToUTF32WithSub(utf32
, capacity
, &length32
,
990 getBuffer(), length(),
991 0xfffd, // Substitution character.
992 NULL
, // Don't care about number of substitutions.
999 UnicodeString::indexOf(const UChar
*srcChars
,
1003 int32_t length
) const
1005 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
1009 // UnicodeString does not find empty substrings
1010 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
1014 // get the indices within bounds
1015 pinIndices(start
, length
);
1017 // find the first occurrence of the substring
1018 const UChar
*array
= getArrayStart();
1019 const UChar
*match
= u_strFindFirst(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
1023 return (int32_t)(match
- array
);
1028 UnicodeString::doIndexOf(UChar c
,
1030 int32_t length
) const
1033 pinIndices(start
, length
);
1035 // find the first occurrence of c
1036 const UChar
*array
= getArrayStart();
1037 const UChar
*match
= u_memchr(array
+ start
, c
, length
);
1041 return (int32_t)(match
- array
);
1046 UnicodeString::doIndexOf(UChar32 c
,
1048 int32_t length
) const {
1050 pinIndices(start
, length
);
1052 // find the first occurrence of c
1053 const UChar
*array
= getArrayStart();
1054 const UChar
*match
= u_memchr32(array
+ start
, c
, length
);
1058 return (int32_t)(match
- array
);
1063 UnicodeString::lastIndexOf(const UChar
*srcChars
,
1067 int32_t length
) const
1069 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
1073 // UnicodeString does not find empty substrings
1074 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
1078 // get the indices within bounds
1079 pinIndices(start
, length
);
1081 // find the last occurrence of the substring
1082 const UChar
*array
= getArrayStart();
1083 const UChar
*match
= u_strFindLast(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
1087 return (int32_t)(match
- array
);
1092 UnicodeString::doLastIndexOf(UChar c
,
1094 int32_t length
) const
1101 pinIndices(start
, length
);
1103 // find the last occurrence of c
1104 const UChar
*array
= getArrayStart();
1105 const UChar
*match
= u_memrchr(array
+ start
, c
, length
);
1109 return (int32_t)(match
- array
);
1114 UnicodeString::doLastIndexOf(UChar32 c
,
1116 int32_t length
) const {
1118 pinIndices(start
, length
);
1120 // find the last occurrence of c
1121 const UChar
*array
= getArrayStart();
1122 const UChar
*match
= u_memrchr32(array
+ start
, c
, length
);
1126 return (int32_t)(match
- array
);
1130 //========================================
1131 // Write implementation
1132 //========================================
1135 UnicodeString::findAndReplace(int32_t start
,
1137 const UnicodeString
& oldText
,
1140 const UnicodeString
& newText
,
1144 if(isBogus() || oldText
.isBogus() || newText
.isBogus()) {
1148 pinIndices(start
, length
);
1149 oldText
.pinIndices(oldStart
, oldLength
);
1150 newText
.pinIndices(newStart
, newLength
);
1152 if(oldLength
== 0) {
1156 while(length
> 0 && length
>= oldLength
) {
1157 int32_t pos
= indexOf(oldText
, oldStart
, oldLength
, start
, length
);
1159 // no more oldText's here: done
1162 // we found oldText, replace it by newText and go beyond it
1163 replace(pos
, oldLength
, newText
, newStart
, newLength
);
1164 length
-= pos
+ oldLength
- start
;
1165 start
= pos
+ newLength
;
1174 UnicodeString::setToBogus()
1178 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
1179 fUnion
.fFields
.fArray
= 0;
1180 fUnion
.fFields
.fCapacity
= 0;
1183 // turn a bogus string into an empty one
1185 UnicodeString::unBogus() {
1186 if(fUnion
.fFields
.fLengthAndFlags
& kIsBogus
) {
1192 UnicodeString::getTerminatedBuffer() {
1196 UChar
*array
= getArrayStart();
1197 int32_t len
= length();
1198 if(len
< getCapacity()) {
1199 if(fUnion
.fFields
.fLengthAndFlags
& kBufferIsReadonly
) {
1200 // If len<capacity on a read-only alias, then array[len] is
1201 // either the original NUL (if constructed with (TRUE, s, length))
1202 // or one of the original string contents characters (if later truncated),
1203 // therefore we can assume that array[len] is initialized memory.
1204 if(array
[len
] == 0) {
1207 } else if(((fUnion
.fFields
.fLengthAndFlags
& kRefCounted
) == 0 || refCount() == 1)) {
1208 // kRefCounted: Do not write the NUL if the buffer is shared.
1209 // That is mostly safe, except when the length of one copy was modified
1210 // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1211 // Then the NUL would be written into the middle of another copy's string.
1213 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1214 // Do not test if there is a NUL already because it might be uninitialized memory.
1215 // (That would be safe, but tools like valgrind & Purify would complain.)
1220 if(cloneArrayIfNeeded(len
+1)) {
1221 array
= getArrayStart();
1229 // setTo() analogous to the readonly-aliasing constructor with the same signature
1231 UnicodeString::setTo(UBool isTerminated
,
1235 if(fUnion
.fFields
.fLengthAndFlags
& kOpenGetBuffer
) {
1236 // do not modify a string that has an "open" getBuffer(minCapacity)
1241 // treat as an empty string, do not alias
1247 if( textLength
< -1 ||
1248 (textLength
== -1 && !isTerminated
) ||
1249 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
1257 if(textLength
== -1) {
1258 // text is terminated, or else it would have failed the above test
1259 textLength
= u_strlen(text
);
1261 fUnion
.fFields
.fLengthAndFlags
= kReadonlyAlias
;
1262 setArray((UChar
*)text
, textLength
, isTerminated
? textLength
+ 1 : textLength
);
1266 // setTo() analogous to the writable-aliasing constructor with the same signature
1268 UnicodeString::setTo(UChar
*buffer
,
1270 int32_t buffCapacity
) {
1271 if(fUnion
.fFields
.fLengthAndFlags
& kOpenGetBuffer
) {
1272 // do not modify a string that has an "open" getBuffer(minCapacity)
1276 if(buffer
== NULL
) {
1277 // treat as an empty string, do not alias
1283 if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
1286 } else if(buffLength
== -1) {
1287 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1288 const UChar
*p
= buffer
, *limit
= buffer
+ buffCapacity
;
1289 while(p
!= limit
&& *p
!= 0) {
1292 buffLength
= (int32_t)(p
- buffer
);
1297 fUnion
.fFields
.fLengthAndFlags
= kWritableAlias
;
1298 setArray(buffer
, buffLength
, buffCapacity
);
1302 UnicodeString
&UnicodeString::setToUTF8(const StringPiece
&utf8
) {
1304 int32_t length
= utf8
.length();
1306 // The UTF-16 string will be at most as long as the UTF-8 string.
1307 if(length
<= US_STACKBUF_SIZE
) {
1308 capacity
= US_STACKBUF_SIZE
;
1310 capacity
= length
+ 1; // +1 for the terminating NUL.
1312 UChar
*utf16
= getBuffer(capacity
);
1314 UErrorCode errorCode
= U_ZERO_ERROR
;
1315 u_strFromUTF8WithSub(utf16
, getCapacity(), &length16
,
1316 utf8
.data(), length
,
1317 0xfffd, // Substitution character.
1318 NULL
, // Don't care about number of substitutions.
1320 releaseBuffer(length16
);
1321 if(U_FAILURE(errorCode
)) {
1328 UnicodeString::setCharAt(int32_t offset
,
1331 int32_t len
= length();
1332 if(cloneArrayIfNeeded() && len
> 0) {
1335 } else if(offset
>= len
) {
1339 getArrayStart()[offset
] = c
;
1345 UnicodeString::replace(int32_t start
,
1348 UChar buffer
[U16_MAX_LENGTH
];
1350 UBool isError
= FALSE
;
1351 U16_APPEND(buffer
, count
, U16_MAX_LENGTH
, srcChar
, isError
);
1352 // We test isError so that the compiler does not complain that we don't.
1353 // If isError (srcChar is not a valid code point) then count==0 which means
1354 // we remove the source segment rather than replacing it with srcChar.
1355 return doReplace(start
, _length
, buffer
, 0, isError
? 0 : count
);
1359 UnicodeString::append(UChar32 srcChar
) {
1360 UChar buffer
[U16_MAX_LENGTH
];
1361 int32_t _length
= 0;
1362 UBool isError
= FALSE
;
1363 U16_APPEND(buffer
, _length
, U16_MAX_LENGTH
, srcChar
, isError
);
1364 // We test isError so that the compiler does not complain that we don't.
1365 // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1366 return isError
? *this : doAppend(buffer
, 0, _length
);
1370 UnicodeString::doReplace( int32_t start
,
1372 const UnicodeString
& src
,
1376 // pin the indices to legal values
1377 src
.pinIndices(srcStart
, srcLength
);
1379 // get the characters from src
1380 // and replace the range in ourselves with them
1381 return doReplace(start
, length
, src
.getArrayStart(), srcStart
, srcLength
);
1385 UnicodeString::doReplace(int32_t start
,
1387 const UChar
*srcChars
,
1395 int32_t oldLength
= this->length();
1397 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1398 if((fUnion
.fFields
.fLengthAndFlags
&kBufferIsReadonly
) && srcLength
== 0) {
1400 // remove prefix by adjusting the array pointer
1402 fUnion
.fFields
.fArray
+= length
;
1403 fUnion
.fFields
.fCapacity
-= length
;
1404 setLength(oldLength
- length
);
1408 if(length
>= (oldLength
- start
)) {
1409 // remove suffix by reducing the length (like truncate())
1411 fUnion
.fFields
.fCapacity
= start
; // not NUL-terminated any more
1417 if(start
== oldLength
) {
1418 return doAppend(srcChars
, srcStart
, srcLength
);
1422 srcStart
= srcLength
= 0;
1423 } else if(srcLength
< 0) {
1424 // get the srcLength if necessary
1425 srcLength
= u_strlen(srcChars
+ srcStart
);
1428 // pin the indices to legal values
1429 pinIndices(start
, length
);
1431 // calculate the size of the string after the replace
1432 int32_t newLength
= oldLength
- length
+ srcLength
;
1434 // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
1435 // therefore we need to keep the current fArray
1436 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1438 if((fUnion
.fFields
.fLengthAndFlags
&kUsingStackBuffer
) && (newLength
> US_STACKBUF_SIZE
)) {
1439 // copy the stack buffer contents because it will be overwritten with
1440 // fUnion.fFields values
1441 u_memcpy(oldStackBuffer
, fUnion
.fStackFields
.fBuffer
, oldLength
);
1442 oldArray
= oldStackBuffer
;
1444 oldArray
= getArrayStart();
1447 // clone our array and allocate a bigger array if needed
1448 int32_t *bufferToDelete
= 0;
1449 if(!cloneArrayIfNeeded(newLength
, newLength
+ (newLength
>> 2) + kGrowSize
,
1450 FALSE
, &bufferToDelete
)
1455 // now do the replace
1457 UChar
*newArray
= getArrayStart();
1458 if(newArray
!= oldArray
) {
1459 // if fArray changed, then we need to copy everything except what will change
1460 us_arrayCopy(oldArray
, 0, newArray
, 0, start
);
1461 us_arrayCopy(oldArray
, start
+ length
,
1462 newArray
, start
+ srcLength
,
1463 oldLength
- (start
+ length
));
1464 } else if(length
!= srcLength
) {
1465 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1466 us_arrayCopy(oldArray
, start
+ length
,
1467 newArray
, start
+ srcLength
,
1468 oldLength
- (start
+ length
));
1471 // now fill in the hole with the new string
1472 us_arrayCopy(srcChars
, srcStart
, newArray
, start
, srcLength
);
1474 setLength(newLength
);
1476 // delayed delete in case srcChars == fArray when we started, and
1477 // to keep oldArray alive for the above operations
1478 if (bufferToDelete
) {
1479 uprv_free(bufferToDelete
);
1485 // Versions of doReplace() only for append() variants.
1486 // doReplace() and doAppend() optimize for different cases.
1489 UnicodeString::doAppend(const UnicodeString
& src
, int32_t srcStart
, int32_t srcLength
) {
1490 if(srcLength
== 0) {
1494 // pin the indices to legal values
1495 src
.pinIndices(srcStart
, srcLength
);
1496 return doAppend(src
.getArrayStart(), srcStart
, srcLength
);
1500 UnicodeString::doAppend(const UChar
*srcChars
, int32_t srcStart
, int32_t srcLength
) {
1501 if(!isWritable() || srcLength
== 0 || srcChars
== NULL
) {
1506 // get the srcLength if necessary
1507 if((srcLength
= u_strlen(srcChars
+ srcStart
)) == 0) {
1512 int32_t oldLength
= length();
1513 int32_t newLength
= oldLength
+ srcLength
;
1514 // optimize append() onto a large-enough, owned string
1515 if((newLength
<= getCapacity() && isBufferWritable()) ||
1516 cloneArrayIfNeeded(newLength
, newLength
+ (newLength
>> 2) + kGrowSize
)) {
1517 UChar
*newArray
= getArrayStart();
1518 // Do not copy characters when
1519 // UChar *buffer=str.getAppendBuffer(...);
1521 // str.append(buffer, length);
1523 // str.appendString(buffer, length)
1525 if(srcChars
+ srcStart
!= newArray
+ oldLength
) {
1526 us_arrayCopy(srcChars
, srcStart
, newArray
, oldLength
, srcLength
);
1528 setLength(newLength
);
1537 UnicodeString::handleReplaceBetween(int32_t start
,
1539 const UnicodeString
& text
) {
1540 replaceBetween(start
, limit
, text
);
1547 UnicodeString::copy(int32_t start
, int32_t limit
, int32_t dest
) {
1548 if (limit
<= start
) {
1549 return; // Nothing to do; avoid bogus malloc call
1551 UChar
* text
= (UChar
*) uprv_malloc( sizeof(UChar
) * (limit
- start
) );
1552 // Check to make sure text is not null.
1554 extractBetween(start
, limit
, text
, 0);
1555 insert(dest
, text
, 0, limit
- start
);
1563 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1564 * so we implement this function here.
1566 UBool
Replaceable::hasMetaData() const {
1573 UBool
UnicodeString::hasMetaData() const {
1578 UnicodeString::doReverse(int32_t start
, int32_t length
) {
1579 if(length
<= 1 || !cloneArrayIfNeeded()) {
1583 // pin the indices to legal values
1584 pinIndices(start
, length
);
1585 if(length
<= 1) { // pinIndices() might have shrunk the length
1589 UChar
*left
= getArrayStart() + start
;
1590 UChar
*right
= left
+ length
- 1; // -1 for inclusive boundary (length>=2)
1592 UBool hasSupplementary
= FALSE
;
1594 // Before the loop we know left<right because length>=2.
1596 hasSupplementary
|= (UBool
)U16_IS_LEAD(swap
= *left
);
1597 hasSupplementary
|= (UBool
)U16_IS_LEAD(*left
++ = *right
);
1599 } while(left
< right
);
1600 // Make sure to test the middle code unit of an odd-length string.
1601 // Redundant if the length is even.
1602 hasSupplementary
|= (UBool
)U16_IS_LEAD(*left
);
1604 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1605 if(hasSupplementary
) {
1608 left
= getArrayStart() + start
;
1609 right
= left
+ length
- 1; // -1 so that we can look at *(left+1) if left<right
1610 while(left
< right
) {
1611 if(U16_IS_TRAIL(swap
= *left
) && U16_IS_LEAD(swap2
= *(left
+ 1))) {
1624 UnicodeString::padLeading(int32_t targetLength
,
1627 int32_t oldLength
= length();
1628 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1631 // move contents up by padding width
1632 UChar
*array
= getArrayStart();
1633 int32_t start
= targetLength
- oldLength
;
1634 us_arrayCopy(array
, 0, array
, start
, oldLength
);
1636 // fill in padding character
1637 while(--start
>= 0) {
1638 array
[start
] = padChar
;
1640 setLength(targetLength
);
1646 UnicodeString::padTrailing(int32_t targetLength
,
1649 int32_t oldLength
= length();
1650 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1653 // fill in padding character
1654 UChar
*array
= getArrayStart();
1655 int32_t length
= targetLength
;
1656 while(--length
>= oldLength
) {
1657 array
[length
] = padChar
;
1659 setLength(targetLength
);
1664 //========================================
1666 //========================================
1668 UnicodeString::doHashCode() const
1670 /* Delegate hash computation to uhash. This makes UnicodeString
1671 * hashing consistent with UChar* hashing. */
1672 int32_t hashCode
= ustr_hashUCharsN(getArrayStart(), length());
1673 if (hashCode
== kInvalidHashCode
) {
1674 hashCode
= kEmptyHashCode
;
1679 //========================================
1681 //========================================
1684 UnicodeString::getBuffer(int32_t minCapacity
) {
1685 if(minCapacity
>=-1 && cloneArrayIfNeeded(minCapacity
)) {
1686 fUnion
.fFields
.fLengthAndFlags
|=kOpenGetBuffer
;
1688 return getArrayStart();
1695 UnicodeString::releaseBuffer(int32_t newLength
) {
1696 if(fUnion
.fFields
.fLengthAndFlags
&kOpenGetBuffer
&& newLength
>=-1) {
1697 // set the new fLength
1698 int32_t capacity
=getCapacity();
1700 // the new length is the string length, capped by fCapacity
1701 const UChar
*array
=getArrayStart(), *p
=array
, *limit
=array
+capacity
;
1702 while(p
<limit
&& *p
!=0) {
1705 newLength
=(int32_t)(p
-array
);
1706 } else if(newLength
>capacity
) {
1709 setLength(newLength
);
1710 fUnion
.fFields
.fLengthAndFlags
&=~kOpenGetBuffer
;
1714 //========================================
1716 //========================================
1718 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity
,
1719 int32_t growCapacity
,
1721 int32_t **pBufferToDelete
,
1723 // default parameters need to be static, therefore
1724 // the defaults are -1 to have convenience defaults
1725 if(newCapacity
== -1) {
1726 newCapacity
= getCapacity();
1729 // while a getBuffer(minCapacity) is "open",
1730 // prevent any modifications of the string by returning FALSE here
1731 // if the string is bogus, then only an assignment or similar can revive it
1737 * We need to make a copy of the array if
1738 * the buffer is read-only, or
1739 * the buffer is refCounted (shared), and refCount>1, or
1740 * the buffer is too small.
1741 * Return FALSE if memory could not be allocated.
1744 fUnion
.fFields
.fLengthAndFlags
& kBufferIsReadonly
||
1745 (fUnion
.fFields
.fLengthAndFlags
& kRefCounted
&& refCount() > 1) ||
1746 newCapacity
> getCapacity()
1748 // check growCapacity for default value and use of the stack buffer
1749 if(growCapacity
< 0) {
1750 growCapacity
= newCapacity
;
1751 } else if(newCapacity
<= US_STACKBUF_SIZE
&& growCapacity
> US_STACKBUF_SIZE
) {
1752 growCapacity
= US_STACKBUF_SIZE
;
1756 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1758 int32_t oldLength
= length();
1759 int16_t flags
= fUnion
.fFields
.fLengthAndFlags
;
1761 if(flags
&kUsingStackBuffer
) {
1762 U_ASSERT(!(flags
&kRefCounted
)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1763 if(doCopyArray
&& growCapacity
> US_STACKBUF_SIZE
) {
1764 // copy the stack buffer contents because it will be overwritten with
1765 // fUnion.fFields values
1766 us_arrayCopy(fUnion
.fStackFields
.fBuffer
, 0, oldStackBuffer
, 0, oldLength
);
1767 oldArray
= oldStackBuffer
;
1769 oldArray
= NULL
; // no need to copy from the stack buffer to itself
1772 oldArray
= fUnion
.fFields
.fArray
;
1773 U_ASSERT(oldArray
!=NULL
); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1776 // allocate a new array
1777 if(allocate(growCapacity
) ||
1778 (newCapacity
< growCapacity
&& allocate(newCapacity
))
1781 // copy the contents
1782 // do not copy more than what fits - it may be smaller than before
1783 int32_t minLength
= oldLength
;
1784 newCapacity
= getCapacity();
1785 if(newCapacity
< minLength
) {
1786 minLength
= newCapacity
;
1788 if(oldArray
!= NULL
) {
1789 us_arrayCopy(oldArray
, 0, getArrayStart(), 0, minLength
);
1791 setLength(minLength
);
1796 // release the old array
1797 if(flags
& kRefCounted
) {
1798 // the array is refCounted; decrement and release if 0
1799 u_atomic_int32_t
*pRefCount
= ((u_atomic_int32_t
*)oldArray
- 1);
1800 if(umtx_atomic_dec(pRefCount
) == 0) {
1801 if(pBufferToDelete
== 0) {
1802 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1803 // is defined as volatile. (Volatile has useful non-standard behavior
1804 // with this compiler.)
1805 uprv_free((void *)pRefCount
);
1807 // the caller requested to delete it himself
1808 *pBufferToDelete
= (int32_t *)pRefCount
;
1813 // not enough memory for growCapacity and not even for the smaller newCapacity
1814 // reset the old values for setToBogus() to release the array
1815 if(!(flags
&kUsingStackBuffer
)) {
1816 fUnion
.fFields
.fArray
= oldArray
;
1818 fUnion
.fFields
.fLengthAndFlags
= flags
;
1826 // UnicodeStringAppendable ------------------------------------------------- ***
1828 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1831 UnicodeStringAppendable::appendCodeUnit(UChar c
) {
1832 return str
.doAppend(&c
, 0, 1).isWritable();
1836 UnicodeStringAppendable::appendCodePoint(UChar32 c
) {
1837 UChar buffer
[U16_MAX_LENGTH
];
1838 int32_t cLength
= 0;
1839 UBool isError
= FALSE
;
1840 U16_APPEND(buffer
, cLength
, U16_MAX_LENGTH
, c
, isError
);
1841 return !isError
&& str
.doAppend(buffer
, 0, cLength
).isWritable();
1845 UnicodeStringAppendable::appendString(const UChar
*s
, int32_t length
) {
1846 return str
.doAppend(s
, 0, length
).isWritable();
1850 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity
) {
1851 return str
.cloneArrayIfNeeded(str
.length() + appendCapacity
);
1855 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity
,
1856 int32_t desiredCapacityHint
,
1857 UChar
*scratch
, int32_t scratchCapacity
,
1858 int32_t *resultCapacity
) {
1859 if(minCapacity
< 1 || scratchCapacity
< minCapacity
) {
1860 *resultCapacity
= 0;
1863 int32_t oldLength
= str
.length();
1864 if(str
.cloneArrayIfNeeded(oldLength
+ minCapacity
, oldLength
+ desiredCapacityHint
)) {
1865 *resultCapacity
= str
.getCapacity() - oldLength
;
1866 return str
.getArrayStart() + oldLength
;
1868 *resultCapacity
= scratchCapacity
;
1876 U_CAPI
int32_t U_EXPORT2
1877 uhash_hashUnicodeString(const UElement key
) {
1878 const UnicodeString
*str
= (const UnicodeString
*) key
.pointer
;
1879 return (str
== NULL
) ? 0 : str
->hashCode();
1882 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1883 // does not depend on hashtable code.
1884 U_CAPI UBool U_EXPORT2
1885 uhash_compareUnicodeString(const UElement key1
, const UElement key2
) {
1886 const UnicodeString
*str1
= (const UnicodeString
*) key1
.pointer
;
1887 const UnicodeString
*str2
= (const UnicodeString
*) key2
.pointer
;
1891 if (str1
== NULL
|| str2
== NULL
) {
1894 return *str1
== *str2
;
1897 #ifdef U_STATIC_IMPLEMENTATION
1899 This should never be called. It is defined here to make sure that the
1900 virtual vector deleting destructor is defined within unistr.cpp.
1901 The vector deleting destructor is already a part of UObject,
1902 but defining it here makes sure that it is included with this object file.
1903 This makes sure that static library dependencies are kept to a minimum.
1905 static void uprv_UnicodeStringDummy(void) {
1906 delete [] (new UnicodeString
[2]);