1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
5 * Copyright (C) 1999-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 09/25/98 stephen Creation.
15 * 04/20/99 stephen Overhauled per 4/16 code review.
16 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
19 * 06/25/01 grhoten Removed the dependency on iostream
20 ******************************************************************************
23 #include "unicode/utypes.h"
24 #include "unicode/appendable.h"
25 #include "unicode/putil.h"
28 #include "unicode/ustring.h"
29 #include "unicode/unistr.h"
30 #include "unicode/utf.h"
31 #include "unicode/utf16.h"
44 print(const UnicodeString
& s
,
49 for(int i
= 0; i
< s
.length(); ++i
) {
51 if(c
>= 0x007E || c
< 0x0020)
52 cout
<< "[0x" << hex
<< s
[i
] << "]";
66 for(int i
= 0; i
< len
; ++i
) {
68 if(c
>= 0x007E || c
< 0x0020)
69 cout
<< "[0x" << hex
<< s
[i
] << "]";
78 // Local function definitions for now
80 // need to copy areas that may overlap
83 us_arrayCopy(const UChar
*src
, int32_t srcStart
,
84 UChar
*dst
, int32_t dstStart
, int32_t count
)
87 uprv_memmove(dst
+dstStart
, src
+srcStart
, (size_t)count
*sizeof(*src
));
91 // u_unescapeAt() callback to get a UChar from a UnicodeString
93 static UChar U_CALLCONV
94 UnicodeString_charAt(int32_t offset
, void *context
) {
95 return ((icu::UnicodeString
*) context
)->charAt(offset
);
101 /* The Replaceable virtual destructor can't be defined in the header
102 due to how AIX works with multiple definitions of virtual functions.
104 Replaceable::~Replaceable() {}
106 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString
)
108 UnicodeString U_EXPORT2
109 operator+ (const UnicodeString
&s1
, const UnicodeString
&s2
) {
111 UnicodeString(s1
.length()+s2
.length()+1, (UChar32
)0, 0).
116 //========================================
117 // Reference Counting functions, put at top of file so that optimizing compilers
118 // have a chance to automatically inline.
119 //========================================
122 UnicodeString::addRef() {
123 umtx_atomic_inc((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1);
127 UnicodeString::removeRef() {
128 return umtx_atomic_dec((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1);
132 UnicodeString::refCount() const {
133 return umtx_loadAcquire(*((u_atomic_int32_t
*)fUnion
.fFields
.fArray
- 1));
137 UnicodeString::releaseArray() {
138 if((fUnion
.fFields
.fLengthAndFlags
& kRefCounted
) && removeRef() == 0) {
139 uprv_free((int32_t *)fUnion
.fFields
.fArray
- 1);
145 //========================================
147 //========================================
149 // The default constructor is inline in unistr.h.
151 UnicodeString::UnicodeString(int32_t capacity
, UChar32 c
, int32_t count
) {
152 fUnion
.fFields
.fLengthAndFlags
= 0;
153 if(count
<= 0 || (uint32_t)c
> 0x10ffff) {
154 // just allocate and do not do anything else
156 } else if(c
<= 0xffff) {
157 int32_t length
= count
;
158 if(capacity
< length
) {
161 if(allocate(capacity
)) {
162 UChar
*array
= getArrayStart();
163 UChar unit
= (UChar
)c
;
164 for(int32_t i
= 0; i
< length
; ++i
) {
169 } else { // supplementary code point, write surrogate pairs
170 if(count
> (INT32_MAX
/ 2)) {
171 // We would get more than 2G UChars.
175 int32_t length
= count
* 2;
176 if(capacity
< length
) {
179 if(allocate(capacity
)) {
180 UChar
*array
= getArrayStart();
181 UChar lead
= U16_LEAD(c
);
182 UChar trail
= U16_TRAIL(c
);
183 for(int32_t i
= 0; i
< length
; i
+= 2) {
185 array
[i
+ 1] = trail
;
192 UnicodeString::UnicodeString(UChar ch
) {
193 fUnion
.fFields
.fLengthAndFlags
= kLength1
| kShortString
;
194 fUnion
.fStackFields
.fBuffer
[0] = ch
;
197 UnicodeString::UnicodeString(UChar32 ch
) {
198 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
200 UBool isError
= FALSE
;
201 U16_APPEND(fUnion
.fStackFields
.fBuffer
, i
, US_STACKBUF_SIZE
, ch
, isError
);
202 // We test isError so that the compiler does not complain that we don't.
203 // If isError then i==0 which is what we want anyway.
209 UnicodeString::UnicodeString(const UChar
*text
) {
210 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
211 doAppend(text
, 0, -1);
214 UnicodeString::UnicodeString(const UChar
*text
,
215 int32_t textLength
) {
216 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
217 doAppend(text
, 0, textLength
);
220 UnicodeString::UnicodeString(UBool isTerminated
,
221 ConstChar16Ptr textPtr
,
222 int32_t textLength
) {
223 fUnion
.fFields
.fLengthAndFlags
= kReadonlyAlias
;
224 const UChar
*text
= textPtr
;
226 // treat as an empty string, do not alias
228 } else if(textLength
< -1 ||
229 (textLength
== -1 && !isTerminated
) ||
230 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
234 if(textLength
== -1) {
235 // text is terminated, or else it would have failed the above test
236 textLength
= u_strlen(text
);
238 setArray(const_cast<UChar
*>(text
), textLength
,
239 isTerminated
? textLength
+ 1 : textLength
);
243 UnicodeString::UnicodeString(UChar
*buff
,
245 int32_t buffCapacity
) {
246 fUnion
.fFields
.fLengthAndFlags
= kWritableAlias
;
248 // treat as an empty string, do not alias
250 } else if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
253 if(buffLength
== -1) {
254 // fLength = u_strlen(buff); but do not look beyond buffCapacity
255 const UChar
*p
= buff
, *limit
= buff
+ buffCapacity
;
256 while(p
!= limit
&& *p
!= 0) {
259 buffLength
= (int32_t)(p
- buff
);
261 setArray(buff
, buffLength
, buffCapacity
);
265 UnicodeString::UnicodeString(const char *src
, int32_t length
, EInvariant
) {
266 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
268 // treat as an empty string
271 length
=(int32_t)uprv_strlen(src
);
273 if(cloneArrayIfNeeded(length
, length
, FALSE
)) {
274 u_charsToUChars(src
, getArrayStart(), length
);
282 #if U_CHARSET_IS_UTF8
284 UnicodeString::UnicodeString(const char *codepageData
) {
285 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
286 if(codepageData
!= 0) {
287 setToUTF8(codepageData
);
291 UnicodeString::UnicodeString(const char *codepageData
, int32_t dataLength
) {
292 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
293 // if there's nothing to convert, do nothing
294 if(codepageData
== 0 || dataLength
== 0 || dataLength
< -1) {
297 if(dataLength
== -1) {
298 dataLength
= (int32_t)uprv_strlen(codepageData
);
300 setToUTF8(StringPiece(codepageData
, dataLength
));
303 // else see unistr_cnv.cpp
306 UnicodeString::UnicodeString(const UnicodeString
& that
) {
307 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
311 UnicodeString::UnicodeString(UnicodeString
&&src
) U_NOEXCEPT
{
312 copyFieldsFrom(src
, TRUE
);
315 UnicodeString::UnicodeString(const UnicodeString
& that
,
317 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
318 setTo(that
, srcStart
);
321 UnicodeString::UnicodeString(const UnicodeString
& that
,
324 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
325 setTo(that
, srcStart
, srcLength
);
328 // Replaceable base class clone() default implementation, does not clone
330 Replaceable::clone() const {
334 // UnicodeString overrides clone() with a real implementation
336 UnicodeString::clone() const {
337 return new UnicodeString(*this);
340 //========================================
342 //========================================
346 const int32_t kGrowSize
= 128;
348 // The number of bytes for one int32_t reference counter and capacity UChars
349 // must fit into a 32-bit size_t (at least when on a 32-bit platform).
350 // We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
351 // and round up to a multiple of 16 bytes.
352 // This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
353 // (With more complicated checks we could go up to 0x7ffffffd without rounding up,
354 // but that does not seem worth it.)
355 const int32_t kMaxCapacity
= 0x7ffffff5;
357 int32_t getGrowCapacity(int32_t newLength
) {
358 int32_t growSize
= (newLength
>> 2) + kGrowSize
;
359 if(growSize
<= (kMaxCapacity
- newLength
)) {
360 return newLength
+ growSize
;
369 UnicodeString::allocate(int32_t capacity
) {
370 if(capacity
<= US_STACKBUF_SIZE
) {
371 fUnion
.fFields
.fLengthAndFlags
= kShortString
;
374 if(capacity
<= kMaxCapacity
) {
375 ++capacity
; // for the NUL
376 // Switch to size_t which is unsigned so that we can allocate up to 4GB.
377 // Reference counter + UChars.
378 size_t numBytes
= sizeof(int32_t) + (size_t)capacity
* U_SIZEOF_UCHAR
;
379 // Round up to a multiple of 16.
380 numBytes
= (numBytes
+ 15) & ~15;
381 int32_t *array
= (int32_t *) uprv_malloc(numBytes
);
383 // set initial refCount and point behind the refCount
385 numBytes
-= sizeof(int32_t);
387 // have fArray point to the first UChar
388 fUnion
.fFields
.fArray
= (UChar
*)array
;
389 fUnion
.fFields
.fCapacity
= (int32_t)(numBytes
/ U_SIZEOF_UCHAR
);
390 fUnion
.fFields
.fLengthAndFlags
= kLongString
;
394 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
395 fUnion
.fFields
.fArray
= 0;
396 fUnion
.fFields
.fCapacity
= 0;
400 //========================================
402 //========================================
404 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
405 static u_atomic_int32_t finalLengthCounts
[0x400]; // UnicodeString::kMaxShortLength+1
406 static u_atomic_int32_t
beyondCount(0);
408 U_CAPI
void unistr_printLengths() {
410 for(i
= 0; i
<= 59; ++i
) {
411 printf("%2d, %9d\n", i
, (int32_t)finalLengthCounts
[i
]);
413 int32_t beyond
= beyondCount
;
414 for(; i
< UPRV_LENGTHOF(finalLengthCounts
); ++i
) {
415 beyond
+= finalLengthCounts
[i
];
417 printf(">59, %9d\n", beyond
);
421 UnicodeString::~UnicodeString()
423 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
424 // Count lengths of strings at the end of their lifetime.
425 // Useful for discussion of a desirable stack buffer size.
426 // Count the contents length, not the optional NUL terminator nor further capacity.
427 // Ignore open-buffer strings and strings which alias external storage.
428 if((fUnion
.fFields
.fLengthAndFlags
&(kOpenGetBuffer
|kReadonlyAlias
|kWritableAlias
)) == 0) {
429 if(hasShortLength()) {
430 umtx_atomic_inc(finalLengthCounts
+ getShortLength());
432 umtx_atomic_inc(&beyondCount
);
440 //========================================
442 //========================================
444 UnicodeString
UnicodeString::fromUTF8(StringPiece utf8
) {
445 UnicodeString result
;
446 result
.setToUTF8(utf8
);
450 UnicodeString
UnicodeString::fromUTF32(const UChar32
*utf32
, int32_t length
) {
451 UnicodeString result
;
453 // Most UTF-32 strings will be BMP-only and result in a same-length
454 // UTF-16 string. We overestimate the capacity just slightly,
455 // just in case there are a few supplementary characters.
456 if(length
<= US_STACKBUF_SIZE
) {
457 capacity
= US_STACKBUF_SIZE
;
459 capacity
= length
+ (length
>> 4) + 4;
462 UChar
*utf16
= result
.getBuffer(capacity
);
464 UErrorCode errorCode
= U_ZERO_ERROR
;
465 u_strFromUTF32WithSub(utf16
, result
.getCapacity(), &length16
,
467 0xfffd, // Substitution character.
468 NULL
, // Don't care about number of substitutions.
470 result
.releaseBuffer(length16
);
471 if(errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
472 capacity
= length16
+ 1; // +1 for the terminating NUL.
474 } else if(U_FAILURE(errorCode
)) {
482 //========================================
484 //========================================
487 UnicodeString::operator=(const UnicodeString
&src
) {
488 return copyFrom(src
);
492 UnicodeString::fastCopyFrom(const UnicodeString
&src
) {
493 return copyFrom(src
, TRUE
);
497 UnicodeString::copyFrom(const UnicodeString
&src
, UBool fastCopy
) {
498 // if assigning to ourselves, do nothing
503 // is the right side bogus?
509 // delete the current contents
513 // empty string - use the stack buffer
518 // fLength>0 and not an "open" src.getBuffer(minCapacity)
519 fUnion
.fFields
.fLengthAndFlags
= src
.fUnion
.fFields
.fLengthAndFlags
;
520 switch(src
.fUnion
.fFields
.fLengthAndFlags
& kAllStorageFlags
) {
522 // short string using the stack buffer, do the same
523 uprv_memcpy(fUnion
.fStackFields
.fBuffer
, src
.fUnion
.fStackFields
.fBuffer
,
524 getShortLength() * U_SIZEOF_UCHAR
);
527 // src uses a refCounted string buffer, use that buffer with refCount
528 // src is const, use a cast - we don't actually change it
529 ((UnicodeString
&)src
).addRef();
530 // copy all fields, share the reference-counted buffer
531 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
532 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
533 if(!hasShortLength()) {
534 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
539 // src is a readonly alias, do the same
540 // -> maintain the readonly alias as such
541 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
542 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
543 if(!hasShortLength()) {
544 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
548 // else if(!fastCopy) fall through to case kWritableAlias
549 // -> allocate a new buffer and copy the contents
551 case kWritableAlias
: {
552 // src is a writable alias; we make a copy of that instead
553 int32_t srcLength
= src
.length();
554 if(allocate(srcLength
)) {
555 u_memcpy(getArrayStart(), src
.getArrayStart(), srcLength
);
556 setLength(srcLength
);
559 // if there is not enough memory, then fall through to setting to bogus
563 // if src is bogus, set ourselves to bogus
564 // do not call setToBogus() here because fArray and flags are not consistent here
565 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
566 fUnion
.fFields
.fArray
= 0;
567 fUnion
.fFields
.fCapacity
= 0;
574 UnicodeString
&UnicodeString::operator=(UnicodeString
&&src
) U_NOEXCEPT
{
575 // No explicit check for self move assignment, consistent with standard library.
576 // Self move assignment causes no crash nor leak but might make the object bogus.
578 copyFieldsFrom(src
, TRUE
);
582 // Same as move assignment except without memory management.
583 void UnicodeString::copyFieldsFrom(UnicodeString
&src
, UBool setSrcToBogus
) U_NOEXCEPT
{
584 int16_t lengthAndFlags
= fUnion
.fFields
.fLengthAndFlags
= src
.fUnion
.fFields
.fLengthAndFlags
;
585 if(lengthAndFlags
& kUsingStackBuffer
) {
586 // Short string using the stack buffer, copy the contents.
587 // Check for self assignment to prevent "overlap in memcpy" warnings,
588 // although it should be harmless to copy a buffer to itself exactly.
590 uprv_memcpy(fUnion
.fStackFields
.fBuffer
, src
.fUnion
.fStackFields
.fBuffer
,
591 getShortLength() * U_SIZEOF_UCHAR
);
594 // In all other cases, copy all fields.
595 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
596 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
597 if(!hasShortLength()) {
598 fUnion
.fFields
.fLength
= src
.fUnion
.fFields
.fLength
;
601 // Set src to bogus without releasing any memory.
602 src
.fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
603 src
.fUnion
.fFields
.fArray
= NULL
;
604 src
.fUnion
.fFields
.fCapacity
= 0;
609 void UnicodeString::swap(UnicodeString
&other
) U_NOEXCEPT
{
610 UnicodeString temp
; // Empty short string: Known not to need releaseArray().
611 // Copy fields without resetting source values in between.
612 temp
.copyFieldsFrom(*this, FALSE
);
613 this->copyFieldsFrom(other
, FALSE
);
614 other
.copyFieldsFrom(temp
, FALSE
);
615 // Set temp to an empty string so that other's memory is not released twice.
616 temp
.fUnion
.fFields
.fLengthAndFlags
= kShortString
;
619 //========================================
620 // Miscellaneous operations
621 //========================================
623 UnicodeString
UnicodeString::unescape() const {
624 UnicodeString
result(length(), (UChar32
)0, (int32_t)0); // construct with capacity
625 if (result
.isBogus()) {
628 const UChar
*array
= getBuffer();
629 int32_t len
= length();
631 for (int32_t i
=0;;) {
633 result
.append(array
, prev
, len
- prev
);
636 if (array
[i
++] == 0x5C /*'\\'*/) {
637 result
.append(array
, prev
, (i
- 1) - prev
);
638 UChar32 c
= unescapeAt(i
); // advances i
640 result
.remove(); // return empty string
641 break; // invalid escape sequence
650 UChar32
UnicodeString::unescapeAt(int32_t &offset
) const {
651 return u_unescapeAt(UnicodeString_charAt
, &offset
, length(), (void*)this);
654 //========================================
655 // Read-only implementation
656 //========================================
658 UnicodeString::doEquals(const UnicodeString
&text
, int32_t len
) const {
659 // Requires: this & text not bogus and have same lengths.
660 // Byte-wise comparison works for equality regardless of endianness.
661 return uprv_memcmp(getArrayStart(), text
.getArrayStart(), len
* U_SIZEOF_UCHAR
) == 0;
665 UnicodeString::doCompare( int32_t start
,
667 const UChar
*srcChars
,
669 int32_t srcLength
) const
671 // compare illegal string values
676 // pin indices to legal values
677 pinIndices(start
, length
);
679 if(srcChars
== NULL
) {
680 // treat const UChar *srcChars==NULL as an empty string
681 return length
== 0 ? 0 : 1;
684 // get the correct pointer
685 const UChar
*chars
= getArrayStart();
688 srcChars
+= srcStart
;
693 // get the srcLength if necessary
695 srcLength
= u_strlen(srcChars
+ srcStart
);
698 // are we comparing different lengths?
699 if(length
!= srcLength
) {
700 if(length
< srcLength
) {
704 minLength
= srcLength
;
713 * note that uprv_memcmp() returns an int but we return an int8_t;
714 * we need to take care not to truncate the result -
715 * one way to do this is to right-shift the value to
716 * move the sign bit into the lower 8 bits and making sure that this
717 * does not become 0 itself
720 if(minLength
> 0 && chars
!= srcChars
) {
724 // big-endian: byte comparison works
725 result
= uprv_memcmp(chars
, srcChars
, minLength
* sizeof(UChar
));
727 return (int8_t)(result
>> 15 | 1);
730 // little-endian: compare UChar units
732 result
= ((int32_t)*(chars
++) - (int32_t)*(srcChars
++));
734 return (int8_t)(result
>> 15 | 1);
736 } while(--minLength
> 0);
742 /* String compare in code point order - doCompare() compares in code unit order. */
744 UnicodeString::doCompareCodePointOrder(int32_t start
,
746 const UChar
*srcChars
,
748 int32_t srcLength
) const
750 // compare illegal string values
751 // treat const UChar *srcChars==NULL as an empty string
756 // pin indices to legal values
757 pinIndices(start
, length
);
759 if(srcChars
== NULL
) {
760 srcStart
= srcLength
= 0;
763 int32_t diff
= uprv_strCompare(getArrayStart() + start
, length
, (srcChars
!=NULL
)?(srcChars
+ srcStart
):NULL
, srcLength
, FALSE
, TRUE
);
764 /* translate the 32-bit result into an 8-bit one */
766 return (int8_t)(diff
>> 15 | 1);
773 UnicodeString::getLength() const {
778 UnicodeString::getCharAt(int32_t offset
) const {
779 return charAt(offset
);
783 UnicodeString::getChar32At(int32_t offset
) const {
784 return char32At(offset
);
788 UnicodeString::char32At(int32_t offset
) const
790 int32_t len
= length();
791 if((uint32_t)offset
< (uint32_t)len
) {
792 const UChar
*array
= getArrayStart();
794 U16_GET(array
, 0, offset
, len
, c
);
797 return kInvalidUChar
;
802 UnicodeString::getChar32Start(int32_t offset
) const {
803 if((uint32_t)offset
< (uint32_t)length()) {
804 const UChar
*array
= getArrayStart();
805 U16_SET_CP_START(array
, 0, offset
);
813 UnicodeString::getChar32Limit(int32_t offset
) const {
814 int32_t len
= length();
815 if((uint32_t)offset
< (uint32_t)len
) {
816 const UChar
*array
= getArrayStart();
817 U16_SET_CP_LIMIT(array
, 0, offset
, len
);
825 UnicodeString::countChar32(int32_t start
, int32_t length
) const {
826 pinIndices(start
, length
);
827 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
828 return u_countChar32(getArrayStart()+start
, length
);
832 UnicodeString::hasMoreChar32Than(int32_t start
, int32_t length
, int32_t number
) const {
833 pinIndices(start
, length
);
834 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
835 return u_strHasMoreChar32Than(getArrayStart()+start
, length
, number
);
839 UnicodeString::moveIndex32(int32_t index
, int32_t delta
) const {
841 int32_t len
= length();
844 } else if(index
>len
) {
848 const UChar
*array
= getArrayStart();
850 U16_FWD_N(array
, index
, len
, delta
);
852 U16_BACK_N(array
, 0, index
, -delta
);
859 UnicodeString::doExtract(int32_t start
,
862 int32_t dstStart
) const
864 // pin indices to legal values
865 pinIndices(start
, length
);
867 // do not copy anything if we alias dst itself
868 const UChar
*array
= getArrayStart();
869 if(array
+ start
!= dst
+ dstStart
) {
870 us_arrayCopy(array
, start
, dst
, dstStart
, length
);
875 UnicodeString::extract(Char16Ptr dest
, int32_t destCapacity
,
876 UErrorCode
&errorCode
) const {
877 int32_t len
= length();
878 if(U_SUCCESS(errorCode
)) {
879 if(isBogus() || destCapacity
<0 || (destCapacity
>0 && dest
==0)) {
880 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
882 const UChar
*array
= getArrayStart();
883 if(len
>0 && len
<=destCapacity
&& array
!=dest
) {
884 u_memcpy(dest
, array
, len
);
886 return u_terminateUChars(dest
, destCapacity
, len
, &errorCode
);
894 UnicodeString::extract(int32_t start
,
897 int32_t targetCapacity
,
898 enum EInvariant
) const
900 // if the arguments are illegal, then do nothing
901 if(targetCapacity
< 0 || (targetCapacity
> 0 && target
== NULL
)) {
905 // pin the indices to legal values
906 pinIndices(start
, length
);
908 if(length
<= targetCapacity
) {
909 u_UCharsToChars(getArrayStart() + start
, target
, length
);
911 UErrorCode status
= U_ZERO_ERROR
;
912 return u_terminateChars(target
, targetCapacity
, length
, &status
);
916 UnicodeString::tempSubString(int32_t start
, int32_t len
) const {
917 pinIndices(start
, len
);
918 const UChar
*array
= getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
920 array
=fUnion
.fStackFields
.fBuffer
; // anything not NULL because that would make an empty string
921 len
=-2; // bogus result string
923 return UnicodeString(FALSE
, array
+ start
, len
);
927 UnicodeString::toUTF8(int32_t start
, int32_t len
,
928 char *target
, int32_t capacity
) const {
929 pinIndices(start
, len
);
931 UErrorCode errorCode
= U_ZERO_ERROR
;
932 u_strToUTF8WithSub(target
, capacity
, &length8
,
933 getBuffer() + start
, len
,
934 0xFFFD, // Standard substitution character.
935 NULL
, // Don't care about number of substitutions.
940 #if U_CHARSET_IS_UTF8
943 UnicodeString::extract(int32_t start
, int32_t len
,
944 char *target
, uint32_t dstSize
) const {
945 // if the arguments are illegal, then do nothing
946 if(/*dstSize < 0 || */(dstSize
> 0 && target
== 0)) {
949 return toUTF8(start
, len
, target
, dstSize
<= 0x7fffffff ? (int32_t)dstSize
: 0x7fffffff);
952 // else see unistr_cnv.cpp
956 UnicodeString::extractBetween(int32_t start
,
958 UnicodeString
& target
) const {
961 doExtract(start
, limit
- start
, target
);
964 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
965 // as many bytes as the source has UChars.
966 // The "worst cases" are writing systems like Indic, Thai and CJK with
969 UnicodeString::toUTF8(ByteSink
&sink
) const {
970 int32_t length16
= length();
972 char stackBuffer
[1024];
973 int32_t capacity
= (int32_t)sizeof(stackBuffer
);
974 UBool utf8IsOwned
= FALSE
;
975 char *utf8
= sink
.GetAppendBuffer(length16
< capacity
? length16
: capacity
,
977 stackBuffer
, capacity
,
980 UErrorCode errorCode
= U_ZERO_ERROR
;
981 u_strToUTF8WithSub(utf8
, capacity
, &length8
,
982 getBuffer(), length16
,
983 0xFFFD, // Standard substitution character.
984 NULL
, // Don't care about number of substitutions.
986 if(errorCode
== U_BUFFER_OVERFLOW_ERROR
) {
987 utf8
= (char *)uprv_malloc(length8
);
990 errorCode
= U_ZERO_ERROR
;
991 u_strToUTF8WithSub(utf8
, length8
, &length8
,
992 getBuffer(), length16
,
993 0xFFFD, // Standard substitution character.
994 NULL
, // Don't care about number of substitutions.
997 errorCode
= U_MEMORY_ALLOCATION_ERROR
;
1000 if(U_SUCCESS(errorCode
)) {
1001 sink
.Append(utf8
, length8
);
1011 UnicodeString::toUTF32(UChar32
*utf32
, int32_t capacity
, UErrorCode
&errorCode
) const {
1013 if(U_SUCCESS(errorCode
)) {
1014 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1015 u_strToUTF32WithSub(utf32
, capacity
, &length32
,
1016 getBuffer(), length(),
1017 0xfffd, // Substitution character.
1018 NULL
, // Don't care about number of substitutions.
1025 UnicodeString::indexOf(const UChar
*srcChars
,
1029 int32_t length
) const
1031 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
1035 // UnicodeString does not find empty substrings
1036 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
1040 // get the indices within bounds
1041 pinIndices(start
, length
);
1043 // find the first occurrence of the substring
1044 const UChar
*array
= getArrayStart();
1045 const UChar
*match
= u_strFindFirst(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
1049 return (int32_t)(match
- array
);
1054 UnicodeString::doIndexOf(UChar c
,
1056 int32_t length
) const
1059 pinIndices(start
, length
);
1061 // find the first occurrence of c
1062 const UChar
*array
= getArrayStart();
1063 const UChar
*match
= u_memchr(array
+ start
, c
, length
);
1067 return (int32_t)(match
- array
);
1072 UnicodeString::doIndexOf(UChar32 c
,
1074 int32_t length
) const {
1076 pinIndices(start
, length
);
1078 // find the first occurrence of c
1079 const UChar
*array
= getArrayStart();
1080 const UChar
*match
= u_memchr32(array
+ start
, c
, length
);
1084 return (int32_t)(match
- array
);
1089 UnicodeString::lastIndexOf(const UChar
*srcChars
,
1093 int32_t length
) const
1095 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
1099 // UnicodeString does not find empty substrings
1100 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
1104 // get the indices within bounds
1105 pinIndices(start
, length
);
1107 // find the last occurrence of the substring
1108 const UChar
*array
= getArrayStart();
1109 const UChar
*match
= u_strFindLast(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
1113 return (int32_t)(match
- array
);
1118 UnicodeString::doLastIndexOf(UChar c
,
1120 int32_t length
) const
1127 pinIndices(start
, length
);
1129 // find the last occurrence of c
1130 const UChar
*array
= getArrayStart();
1131 const UChar
*match
= u_memrchr(array
+ start
, c
, length
);
1135 return (int32_t)(match
- array
);
1140 UnicodeString::doLastIndexOf(UChar32 c
,
1142 int32_t length
) const {
1144 pinIndices(start
, length
);
1146 // find the last occurrence of c
1147 const UChar
*array
= getArrayStart();
1148 const UChar
*match
= u_memrchr32(array
+ start
, c
, length
);
1152 return (int32_t)(match
- array
);
1156 //========================================
1157 // Write implementation
1158 //========================================
1161 UnicodeString::findAndReplace(int32_t start
,
1163 const UnicodeString
& oldText
,
1166 const UnicodeString
& newText
,
1170 if(isBogus() || oldText
.isBogus() || newText
.isBogus()) {
1174 pinIndices(start
, length
);
1175 oldText
.pinIndices(oldStart
, oldLength
);
1176 newText
.pinIndices(newStart
, newLength
);
1178 if(oldLength
== 0) {
1182 while(length
> 0 && length
>= oldLength
) {
1183 int32_t pos
= indexOf(oldText
, oldStart
, oldLength
, start
, length
);
1185 // no more oldText's here: done
1188 // we found oldText, replace it by newText and go beyond it
1189 replace(pos
, oldLength
, newText
, newStart
, newLength
);
1190 length
-= pos
+ oldLength
- start
;
1191 start
= pos
+ newLength
;
1200 UnicodeString::setToBogus()
1204 fUnion
.fFields
.fLengthAndFlags
= kIsBogus
;
1205 fUnion
.fFields
.fArray
= 0;
1206 fUnion
.fFields
.fCapacity
= 0;
1209 // turn a bogus string into an empty one
1211 UnicodeString::unBogus() {
1212 if(fUnion
.fFields
.fLengthAndFlags
& kIsBogus
) {
1218 UnicodeString::getTerminatedBuffer() {
1222 UChar
*array
= getArrayStart();
1223 int32_t len
= length();
1224 if(len
< getCapacity()) {
1225 if(fUnion
.fFields
.fLengthAndFlags
& kBufferIsReadonly
) {
1226 // If len<capacity on a read-only alias, then array[len] is
1227 // either the original NUL (if constructed with (TRUE, s, length))
1228 // or one of the original string contents characters (if later truncated),
1229 // therefore we can assume that array[len] is initialized memory.
1230 if(array
[len
] == 0) {
1233 } else if(((fUnion
.fFields
.fLengthAndFlags
& kRefCounted
) == 0 || refCount() == 1)) {
1234 // kRefCounted: Do not write the NUL if the buffer is shared.
1235 // That is mostly safe, except when the length of one copy was modified
1236 // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1237 // Then the NUL would be written into the middle of another copy's string.
1239 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1240 // Do not test if there is a NUL already because it might be uninitialized memory.
1241 // (That would be safe, but tools like valgrind & Purify would complain.)
1246 if(len
<INT32_MAX
&& cloneArrayIfNeeded(len
+1)) {
1247 array
= getArrayStart();
1255 // setTo() analogous to the readonly-aliasing constructor with the same signature
1257 UnicodeString::setTo(UBool isTerminated
,
1258 ConstChar16Ptr textPtr
,
1261 if(fUnion
.fFields
.fLengthAndFlags
& kOpenGetBuffer
) {
1262 // do not modify a string that has an "open" getBuffer(minCapacity)
1266 const UChar
*text
= textPtr
;
1268 // treat as an empty string, do not alias
1274 if( textLength
< -1 ||
1275 (textLength
== -1 && !isTerminated
) ||
1276 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
1284 if(textLength
== -1) {
1285 // text is terminated, or else it would have failed the above test
1286 textLength
= u_strlen(text
);
1288 fUnion
.fFields
.fLengthAndFlags
= kReadonlyAlias
;
1289 setArray((UChar
*)text
, textLength
, isTerminated
? textLength
+ 1 : textLength
);
1293 // setTo() analogous to the writable-aliasing constructor with the same signature
1295 UnicodeString::setTo(UChar
*buffer
,
1297 int32_t buffCapacity
) {
1298 if(fUnion
.fFields
.fLengthAndFlags
& kOpenGetBuffer
) {
1299 // do not modify a string that has an "open" getBuffer(minCapacity)
1303 if(buffer
== NULL
) {
1304 // treat as an empty string, do not alias
1310 if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
1313 } else if(buffLength
== -1) {
1314 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1315 const UChar
*p
= buffer
, *limit
= buffer
+ buffCapacity
;
1316 while(p
!= limit
&& *p
!= 0) {
1319 buffLength
= (int32_t)(p
- buffer
);
1324 fUnion
.fFields
.fLengthAndFlags
= kWritableAlias
;
1325 setArray(buffer
, buffLength
, buffCapacity
);
1329 UnicodeString
&UnicodeString::setToUTF8(StringPiece utf8
) {
1331 int32_t length
= utf8
.length();
1333 // The UTF-16 string will be at most as long as the UTF-8 string.
1334 if(length
<= US_STACKBUF_SIZE
) {
1335 capacity
= US_STACKBUF_SIZE
;
1337 capacity
= length
+ 1; // +1 for the terminating NUL.
1339 UChar
*utf16
= getBuffer(capacity
);
1341 UErrorCode errorCode
= U_ZERO_ERROR
;
1342 u_strFromUTF8WithSub(utf16
, getCapacity(), &length16
,
1343 utf8
.data(), length
,
1344 0xfffd, // Substitution character.
1345 NULL
, // Don't care about number of substitutions.
1347 releaseBuffer(length16
);
1348 if(U_FAILURE(errorCode
)) {
1355 UnicodeString::setCharAt(int32_t offset
,
1358 int32_t len
= length();
1359 if(cloneArrayIfNeeded() && len
> 0) {
1362 } else if(offset
>= len
) {
1366 getArrayStart()[offset
] = c
;
1372 UnicodeString::replace(int32_t start
,
1375 UChar buffer
[U16_MAX_LENGTH
];
1377 UBool isError
= FALSE
;
1378 U16_APPEND(buffer
, count
, U16_MAX_LENGTH
, srcChar
, isError
);
1379 // We test isError so that the compiler does not complain that we don't.
1380 // If isError (srcChar is not a valid code point) then count==0 which means
1381 // we remove the source segment rather than replacing it with srcChar.
1382 return doReplace(start
, _length
, buffer
, 0, isError
? 0 : count
);
1386 UnicodeString::append(UChar32 srcChar
) {
1387 UChar buffer
[U16_MAX_LENGTH
];
1388 int32_t _length
= 0;
1389 UBool isError
= FALSE
;
1390 U16_APPEND(buffer
, _length
, U16_MAX_LENGTH
, srcChar
, isError
);
1391 // We test isError so that the compiler does not complain that we don't.
1392 // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1393 return isError
? *this : doAppend(buffer
, 0, _length
);
1397 UnicodeString::doReplace( int32_t start
,
1399 const UnicodeString
& src
,
1403 // pin the indices to legal values
1404 src
.pinIndices(srcStart
, srcLength
);
1406 // get the characters from src
1407 // and replace the range in ourselves with them
1408 return doReplace(start
, length
, src
.getArrayStart(), srcStart
, srcLength
);
1412 UnicodeString::doReplace(int32_t start
,
1414 const UChar
*srcChars
,
1422 int32_t oldLength
= this->length();
1424 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1425 if((fUnion
.fFields
.fLengthAndFlags
&kBufferIsReadonly
) && srcLength
== 0) {
1427 // remove prefix by adjusting the array pointer
1429 fUnion
.fFields
.fArray
+= length
;
1430 fUnion
.fFields
.fCapacity
-= length
;
1431 setLength(oldLength
- length
);
1435 if(length
>= (oldLength
- start
)) {
1436 // remove suffix by reducing the length (like truncate())
1438 fUnion
.fFields
.fCapacity
= start
; // not NUL-terminated any more
1444 if(start
== oldLength
) {
1445 return doAppend(srcChars
, srcStart
, srcLength
);
1451 // Perform all remaining operations relative to srcChars + srcStart.
1452 // From this point forward, do not use srcStart.
1453 srcChars
+= srcStart
;
1454 if (srcLength
< 0) {
1455 // get the srcLength if necessary
1456 srcLength
= u_strlen(srcChars
);
1460 // pin the indices to legal values
1461 pinIndices(start
, length
);
1463 // Calculate the size of the string after the replace.
1464 // Avoid int32_t overflow.
1465 int32_t newLength
= oldLength
- length
;
1466 if(srcLength
> (INT32_MAX
- newLength
)) {
1470 newLength
+= srcLength
;
1472 // Check for insertion into ourself
1473 const UChar
*oldArray
= getArrayStart();
1474 if (isBufferWritable() &&
1475 oldArray
< srcChars
+ srcLength
&&
1476 srcChars
< oldArray
+ oldLength
) {
1477 // Copy into a new UnicodeString and start over
1478 UnicodeString
copy(srcChars
, srcLength
);
1479 if (copy
.isBogus()) {
1483 return doReplace(start
, length
, copy
.getArrayStart(), 0, srcLength
);
1486 // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
1487 // therefore we need to keep the current fArray
1488 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1489 if((fUnion
.fFields
.fLengthAndFlags
&kUsingStackBuffer
) && (newLength
> US_STACKBUF_SIZE
)) {
1490 // copy the stack buffer contents because it will be overwritten with
1491 // fUnion.fFields values
1492 u_memcpy(oldStackBuffer
, oldArray
, oldLength
);
1493 oldArray
= oldStackBuffer
;
1496 // clone our array and allocate a bigger array if needed
1497 int32_t *bufferToDelete
= 0;
1498 if(!cloneArrayIfNeeded(newLength
, getGrowCapacity(newLength
),
1499 FALSE
, &bufferToDelete
)
1504 // now do the replace
1506 UChar
*newArray
= getArrayStart();
1507 if(newArray
!= oldArray
) {
1508 // if fArray changed, then we need to copy everything except what will change
1509 us_arrayCopy(oldArray
, 0, newArray
, 0, start
);
1510 us_arrayCopy(oldArray
, start
+ length
,
1511 newArray
, start
+ srcLength
,
1512 oldLength
- (start
+ length
));
1513 } else if(length
!= srcLength
) {
1514 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1515 us_arrayCopy(oldArray
, start
+ length
,
1516 newArray
, start
+ srcLength
,
1517 oldLength
- (start
+ length
));
1520 // now fill in the hole with the new string
1521 us_arrayCopy(srcChars
, 0, newArray
, start
, srcLength
);
1523 setLength(newLength
);
1525 // delayed delete in case srcChars == fArray when we started, and
1526 // to keep oldArray alive for the above operations
1527 if (bufferToDelete
) {
1528 uprv_free(bufferToDelete
);
1534 // Versions of doReplace() only for append() variants.
1535 // doReplace() and doAppend() optimize for different cases.
1538 UnicodeString::doAppend(const UnicodeString
& src
, int32_t srcStart
, int32_t srcLength
) {
1539 if(srcLength
== 0) {
1543 // pin the indices to legal values
1544 src
.pinIndices(srcStart
, srcLength
);
1545 return doAppend(src
.getArrayStart(), srcStart
, srcLength
);
1549 UnicodeString::doAppend(const UChar
*srcChars
, int32_t srcStart
, int32_t srcLength
) {
1550 if(!isWritable() || srcLength
== 0 || srcChars
== NULL
) {
1554 // Perform all remaining operations relative to srcChars + srcStart.
1555 // From this point forward, do not use srcStart.
1556 srcChars
+= srcStart
;
1559 // get the srcLength if necessary
1560 if((srcLength
= u_strlen(srcChars
)) == 0) {
1565 int32_t oldLength
= length();
1566 int32_t newLength
= oldLength
+ srcLength
;
1568 // Check for append onto ourself
1569 const UChar
* oldArray
= getArrayStart();
1570 if (isBufferWritable() &&
1571 oldArray
< srcChars
+ srcLength
&&
1572 srcChars
< oldArray
+ oldLength
) {
1573 // Copy into a new UnicodeString and start over
1574 UnicodeString
copy(srcChars
, srcLength
);
1575 if (copy
.isBogus()) {
1579 return doAppend(copy
.getArrayStart(), 0, srcLength
);
1582 // optimize append() onto a large-enough, owned string
1583 if((newLength
<= getCapacity() && isBufferWritable()) ||
1584 cloneArrayIfNeeded(newLength
, getGrowCapacity(newLength
))) {
1585 UChar
*newArray
= getArrayStart();
1586 // Do not copy characters when
1587 // UChar *buffer=str.getAppendBuffer(...);
1589 // str.append(buffer, length);
1591 // str.appendString(buffer, length)
1593 if(srcChars
!= newArray
+ oldLength
) {
1594 us_arrayCopy(srcChars
, 0, newArray
, oldLength
, srcLength
);
1596 setLength(newLength
);
1605 UnicodeString::handleReplaceBetween(int32_t start
,
1607 const UnicodeString
& text
) {
1608 replaceBetween(start
, limit
, text
);
1615 UnicodeString::copy(int32_t start
, int32_t limit
, int32_t dest
) {
1616 if (limit
<= start
) {
1617 return; // Nothing to do; avoid bogus malloc call
1619 UChar
* text
= (UChar
*) uprv_malloc( sizeof(UChar
) * (limit
- start
) );
1620 // Check to make sure text is not null.
1622 extractBetween(start
, limit
, text
, 0);
1623 insert(dest
, text
, 0, limit
- start
);
1631 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1632 * so we implement this function here.
1634 UBool
Replaceable::hasMetaData() const {
1641 UBool
UnicodeString::hasMetaData() const {
1646 UnicodeString::doReverse(int32_t start
, int32_t length
) {
1647 if(length
<= 1 || !cloneArrayIfNeeded()) {
1651 // pin the indices to legal values
1652 pinIndices(start
, length
);
1653 if(length
<= 1) { // pinIndices() might have shrunk the length
1657 UChar
*left
= getArrayStart() + start
;
1658 UChar
*right
= left
+ length
- 1; // -1 for inclusive boundary (length>=2)
1660 UBool hasSupplementary
= FALSE
;
1662 // Before the loop we know left<right because length>=2.
1664 hasSupplementary
|= (UBool
)U16_IS_LEAD(swap
= *left
);
1665 hasSupplementary
|= (UBool
)U16_IS_LEAD(*left
++ = *right
);
1667 } while(left
< right
);
1668 // Make sure to test the middle code unit of an odd-length string.
1669 // Redundant if the length is even.
1670 hasSupplementary
|= (UBool
)U16_IS_LEAD(*left
);
1672 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1673 if(hasSupplementary
) {
1676 left
= getArrayStart() + start
;
1677 right
= left
+ length
- 1; // -1 so that we can look at *(left+1) if left<right
1678 while(left
< right
) {
1679 if(U16_IS_TRAIL(swap
= *left
) && U16_IS_LEAD(swap2
= *(left
+ 1))) {
1692 UnicodeString::padLeading(int32_t targetLength
,
1695 int32_t oldLength
= length();
1696 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1699 // move contents up by padding width
1700 UChar
*array
= getArrayStart();
1701 int32_t start
= targetLength
- oldLength
;
1702 us_arrayCopy(array
, 0, array
, start
, oldLength
);
1704 // fill in padding character
1705 while(--start
>= 0) {
1706 array
[start
] = padChar
;
1708 setLength(targetLength
);
1714 UnicodeString::padTrailing(int32_t targetLength
,
1717 int32_t oldLength
= length();
1718 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1721 // fill in padding character
1722 UChar
*array
= getArrayStart();
1723 int32_t length
= targetLength
;
1724 while(--length
>= oldLength
) {
1725 array
[length
] = padChar
;
1727 setLength(targetLength
);
1732 //========================================
1734 //========================================
1736 UnicodeString::doHashCode() const
1738 /* Delegate hash computation to uhash. This makes UnicodeString
1739 * hashing consistent with UChar* hashing. */
1740 int32_t hashCode
= ustr_hashUCharsN(getArrayStart(), length());
1741 if (hashCode
== kInvalidHashCode
) {
1742 hashCode
= kEmptyHashCode
;
1747 //========================================
1749 //========================================
1752 UnicodeString::getBuffer(int32_t minCapacity
) {
1753 if(minCapacity
>=-1 && cloneArrayIfNeeded(minCapacity
)) {
1754 fUnion
.fFields
.fLengthAndFlags
|=kOpenGetBuffer
;
1756 return getArrayStart();
1763 UnicodeString::releaseBuffer(int32_t newLength
) {
1764 if(fUnion
.fFields
.fLengthAndFlags
&kOpenGetBuffer
&& newLength
>=-1) {
1765 // set the new fLength
1766 int32_t capacity
=getCapacity();
1768 // the new length is the string length, capped by fCapacity
1769 const UChar
*array
=getArrayStart(), *p
=array
, *limit
=array
+capacity
;
1770 while(p
<limit
&& *p
!=0) {
1773 newLength
=(int32_t)(p
-array
);
1774 } else if(newLength
>capacity
) {
1777 setLength(newLength
);
1778 fUnion
.fFields
.fLengthAndFlags
&=~kOpenGetBuffer
;
1782 //========================================
1784 //========================================
1786 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity
,
1787 int32_t growCapacity
,
1789 int32_t **pBufferToDelete
,
1791 // default parameters need to be static, therefore
1792 // the defaults are -1 to have convenience defaults
1793 if(newCapacity
== -1) {
1794 newCapacity
= getCapacity();
1797 // while a getBuffer(minCapacity) is "open",
1798 // prevent any modifications of the string by returning FALSE here
1799 // if the string is bogus, then only an assignment or similar can revive it
1805 * We need to make a copy of the array if
1806 * the buffer is read-only, or
1807 * the buffer is refCounted (shared), and refCount>1, or
1808 * the buffer is too small.
1809 * Return FALSE if memory could not be allocated.
1812 fUnion
.fFields
.fLengthAndFlags
& kBufferIsReadonly
||
1813 (fUnion
.fFields
.fLengthAndFlags
& kRefCounted
&& refCount() > 1) ||
1814 newCapacity
> getCapacity()
1816 // check growCapacity for default value and use of the stack buffer
1817 if(growCapacity
< 0) {
1818 growCapacity
= newCapacity
;
1819 } else if(newCapacity
<= US_STACKBUF_SIZE
&& growCapacity
> US_STACKBUF_SIZE
) {
1820 growCapacity
= US_STACKBUF_SIZE
;
1824 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1826 int32_t oldLength
= length();
1827 int16_t flags
= fUnion
.fFields
.fLengthAndFlags
;
1829 if(flags
&kUsingStackBuffer
) {
1830 U_ASSERT(!(flags
&kRefCounted
)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1831 if(doCopyArray
&& growCapacity
> US_STACKBUF_SIZE
) {
1832 // copy the stack buffer contents because it will be overwritten with
1833 // fUnion.fFields values
1834 us_arrayCopy(fUnion
.fStackFields
.fBuffer
, 0, oldStackBuffer
, 0, oldLength
);
1835 oldArray
= oldStackBuffer
;
1837 oldArray
= NULL
; // no need to copy from the stack buffer to itself
1840 oldArray
= fUnion
.fFields
.fArray
;
1841 U_ASSERT(oldArray
!=NULL
); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1844 // allocate a new array
1845 if(allocate(growCapacity
) ||
1846 (newCapacity
< growCapacity
&& allocate(newCapacity
))
1849 // copy the contents
1850 // do not copy more than what fits - it may be smaller than before
1851 int32_t minLength
= oldLength
;
1852 newCapacity
= getCapacity();
1853 if(newCapacity
< minLength
) {
1854 minLength
= newCapacity
;
1856 if(oldArray
!= NULL
) {
1857 us_arrayCopy(oldArray
, 0, getArrayStart(), 0, minLength
);
1859 setLength(minLength
);
1864 // release the old array
1865 if(flags
& kRefCounted
) {
1866 // the array is refCounted; decrement and release if 0
1867 u_atomic_int32_t
*pRefCount
= ((u_atomic_int32_t
*)oldArray
- 1);
1868 if(umtx_atomic_dec(pRefCount
) == 0) {
1869 if(pBufferToDelete
== 0) {
1870 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1871 // is defined as volatile. (Volatile has useful non-standard behavior
1872 // with this compiler.)
1873 uprv_free((void *)pRefCount
);
1875 // the caller requested to delete it himself
1876 *pBufferToDelete
= (int32_t *)pRefCount
;
1881 // not enough memory for growCapacity and not even for the smaller newCapacity
1882 // reset the old values for setToBogus() to release the array
1883 if(!(flags
&kUsingStackBuffer
)) {
1884 fUnion
.fFields
.fArray
= oldArray
;
1886 fUnion
.fFields
.fLengthAndFlags
= flags
;
1894 // UnicodeStringAppendable ------------------------------------------------- ***
1896 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1899 UnicodeStringAppendable::appendCodeUnit(UChar c
) {
1900 return str
.doAppend(&c
, 0, 1).isWritable();
1904 UnicodeStringAppendable::appendCodePoint(UChar32 c
) {
1905 UChar buffer
[U16_MAX_LENGTH
];
1906 int32_t cLength
= 0;
1907 UBool isError
= FALSE
;
1908 U16_APPEND(buffer
, cLength
, U16_MAX_LENGTH
, c
, isError
);
1909 return !isError
&& str
.doAppend(buffer
, 0, cLength
).isWritable();
1913 UnicodeStringAppendable::appendString(const UChar
*s
, int32_t length
) {
1914 return str
.doAppend(s
, 0, length
).isWritable();
1918 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity
) {
1919 return str
.cloneArrayIfNeeded(str
.length() + appendCapacity
);
1923 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity
,
1924 int32_t desiredCapacityHint
,
1925 UChar
*scratch
, int32_t scratchCapacity
,
1926 int32_t *resultCapacity
) {
1927 if(minCapacity
< 1 || scratchCapacity
< minCapacity
) {
1928 *resultCapacity
= 0;
1931 int32_t oldLength
= str
.length();
1932 if(minCapacity
<= (kMaxCapacity
- oldLength
) &&
1933 desiredCapacityHint
<= (kMaxCapacity
- oldLength
) &&
1934 str
.cloneArrayIfNeeded(oldLength
+ minCapacity
, oldLength
+ desiredCapacityHint
)) {
1935 *resultCapacity
= str
.getCapacity() - oldLength
;
1936 return str
.getArrayStart() + oldLength
;
1938 *resultCapacity
= scratchCapacity
;
1946 U_CAPI
int32_t U_EXPORT2
1947 uhash_hashUnicodeString(const UElement key
) {
1948 const UnicodeString
*str
= (const UnicodeString
*) key
.pointer
;
1949 return (str
== NULL
) ? 0 : str
->hashCode();
1952 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1953 // does not depend on hashtable code.
1954 U_CAPI UBool U_EXPORT2
1955 uhash_compareUnicodeString(const UElement key1
, const UElement key2
) {
1956 const UnicodeString
*str1
= (const UnicodeString
*) key1
.pointer
;
1957 const UnicodeString
*str2
= (const UnicodeString
*) key2
.pointer
;
1961 if (str1
== NULL
|| str2
== NULL
) {
1964 return *str1
== *str2
;
1967 #ifdef U_STATIC_IMPLEMENTATION
1969 This should never be called. It is defined here to make sure that the
1970 virtual vector deleting destructor is defined within unistr.cpp.
1971 The vector deleting destructor is already a part of UObject,
1972 but defining it here makes sure that it is included with this object file.
1973 This makes sure that static library dependencies are kept to a minimum.
1975 static void uprv_UnicodeStringDummy(void) {
1976 delete [] (new UnicodeString
[2]);