2 ******************************************************************************
3 * Copyright (C) 1999-2008, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
21 #include "unicode/utypes.h"
22 #include "unicode/putil.h"
25 #include "unicode/ustring.h"
26 #include "unicode/unistr.h"
33 #if U_IOSTREAM_SOURCE >= 199711
36 #elif U_IOSTREAM_SOURCE >= 198506
42 print(const UnicodeString
& s
,
47 for(int i
= 0; i
< s
.length(); ++i
) {
49 if(c
>= 0x007E || c
< 0x0020)
50 cout
<< "[0x" << hex
<< s
[i
] << "]";
64 for(int i
= 0; i
< len
; ++i
) {
66 if(c
>= 0x007E || c
< 0x0020)
67 cout
<< "[0x" << hex
<< s
[i
] << "]";
76 // Local function definitions for now
78 // need to copy areas that may overlap
81 us_arrayCopy(const UChar
*src
, int32_t srcStart
,
82 UChar
*dst
, int32_t dstStart
, int32_t count
)
85 uprv_memmove(dst
+dstStart
, src
+srcStart
, (size_t)(count
*sizeof(*src
)));
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
91 static UChar U_CALLCONV
92 UnicodeString_charAt(int32_t offset
, void *context
) {
93 return ((U_NAMESPACE_QUALIFIER UnicodeString
*) context
)->charAt(offset
);
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
102 Replaceable::~Replaceable() {}
103 Replaceable::Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString
)
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString
&s1
, const UnicodeString
&s2
) {
109 UnicodeString(s1
.length()+s2
.length()+1, (UChar32
)0, 0).
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
120 UnicodeString::addRef()
121 { umtx_atomic_inc((int32_t *)fUnion
.fFields
.fArray
- 1);}
124 UnicodeString::removeRef()
125 { return umtx_atomic_dec((int32_t *)fUnion
.fFields
.fArray
- 1);}
128 UnicodeString::refCount() const
131 // Note: without the lock to force a memory barrier, we might see a very
132 // stale value on some multi-processor systems.
133 int32_t count
= *((int32_t *)fUnion
.fFields
.fArray
- 1);
139 UnicodeString::releaseArray() {
140 if((fFlags
& kRefCounted
) && removeRef() == 0) {
141 uprv_free((int32_t *)fUnion
.fFields
.fArray
- 1);
147 //========================================
149 //========================================
150 UnicodeString::UnicodeString()
155 UnicodeString::UnicodeString(int32_t capacity
, UChar32 c
, int32_t count
)
159 if(count
<= 0 || (uint32_t)c
> 0x10ffff) {
160 // just allocate and do not do anything else
163 // count > 0, allocate and fill the new string with count c's
164 int32_t unitCount
= UTF_CHAR_LENGTH(c
), length
= count
* unitCount
;
165 if(capacity
< length
) {
168 if(allocate(capacity
)) {
169 UChar
*array
= getArrayStart();
172 // fill the new string with c
174 // fill with length UChars
176 array
[i
++] = (UChar
)c
;
179 // get the code units for c
180 UChar units
[UTF_MAX_CHAR_LENGTH
];
181 UTF_APPEND_CHAR_UNSAFE(units
, i
, c
);
183 // now it must be i==unitCount
186 // for Unicode, unitCount can only be 1, 2, 3, or 4
187 // 1 is handled above
190 while(unitIdx
< unitCount
) {
191 array
[i
++]=units
[unitIdx
++];
200 UnicodeString::UnicodeString(UChar ch
)
204 fUnion
.fStackBuffer
[0] = ch
;
207 UnicodeString::UnicodeString(UChar32 ch
)
212 UBool isError
= FALSE
;
213 U16_APPEND(fUnion
.fStackBuffer
, i
, US_STACKBUF_SIZE
, ch
, isError
);
214 fShortLength
= (int8_t)i
;
217 UnicodeString::UnicodeString(const UChar
*text
)
221 doReplace(0, 0, text
, 0, -1);
224 UnicodeString::UnicodeString(const UChar
*text
,
229 doReplace(0, 0, text
, 0, textLength
);
232 UnicodeString::UnicodeString(UBool isTerminated
,
236 fFlags(kReadonlyAlias
)
239 // treat as an empty string, do not alias
241 } else if(textLength
< -1 ||
242 (textLength
== -1 && !isTerminated
) ||
243 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
247 if(textLength
== -1) {
248 // text is terminated, or else it would have failed the above test
249 textLength
= u_strlen(text
);
251 setArray((UChar
*)text
, textLength
, isTerminated
? textLength
+ 1 : textLength
);
255 UnicodeString::UnicodeString(UChar
*buff
,
257 int32_t buffCapacity
)
259 fFlags(kWritableAlias
)
262 // treat as an empty string, do not alias
264 } else if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
267 if(buffLength
== -1) {
268 // fLength = u_strlen(buff); but do not look beyond buffCapacity
269 const UChar
*p
= buff
, *limit
= buff
+ buffCapacity
;
270 while(p
!= limit
&& *p
!= 0) {
273 buffLength
= (int32_t)(p
- buff
);
275 setArray(buff
, buffLength
, buffCapacity
);
279 UnicodeString::UnicodeString(const char *src
, int32_t length
, EInvariant
)
284 // treat as an empty string
287 length
=(int32_t)uprv_strlen(src
);
289 if(cloneArrayIfNeeded(length
, length
, FALSE
)) {
290 u_charsToUChars(src
, getArrayStart(), length
);
298 UnicodeString::UnicodeString(const UnicodeString
& that
)
306 UnicodeString::UnicodeString(const UnicodeString
& that
,
312 setTo(that
, srcStart
);
315 UnicodeString::UnicodeString(const UnicodeString
& that
,
322 setTo(that
, srcStart
, srcLength
);
325 // Replaceable base class clone() default implementation, does not clone
327 Replaceable::clone() const {
331 // UnicodeString overrides clone() with a real implementation
333 UnicodeString::clone() const {
334 return new UnicodeString(*this);
337 //========================================
339 //========================================
342 UnicodeString::allocate(int32_t capacity
) {
343 if(capacity
<= US_STACKBUF_SIZE
) {
344 fFlags
= kShortString
;
346 // count bytes for the refCounter and the string capacity, and
347 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
348 // to be safely aligned for the refCount
349 int32_t words
= (int32_t)(((sizeof(int32_t) + capacity
* U_SIZEOF_UCHAR
+ 15) & ~15) >> 2);
350 int32_t *array
= (int32_t*) uprv_malloc( sizeof(int32_t) * words
);
352 // set initial refCount and point behind the refCount
355 // have fArray point to the first UChar
356 fUnion
.fFields
.fArray
= (UChar
*)array
;
357 fUnion
.fFields
.fCapacity
= (int32_t)((words
- 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR
));
358 fFlags
= kLongString
;
361 fUnion
.fFields
.fArray
= 0;
362 fUnion
.fFields
.fCapacity
= 0;
370 //========================================
372 //========================================
373 UnicodeString::~UnicodeString()
379 //========================================
381 //========================================
384 UnicodeString::operator=(const UnicodeString
&src
) {
385 return copyFrom(src
);
389 UnicodeString::fastCopyFrom(const UnicodeString
&src
) {
390 return copyFrom(src
, TRUE
);
394 UnicodeString::copyFrom(const UnicodeString
&src
, UBool fastCopy
) {
395 // if assigning to ourselves, do nothing
396 if(this == 0 || this == &src
) {
400 // is the right side bogus?
401 if(&src
== 0 || src
.isBogus()) {
406 // delete the current contents
410 // empty string - use the stack buffer
415 // we always copy the length
416 int32_t srcLength
= src
.length();
417 setLength(srcLength
);
419 // fLength>0 and not an "open" src.getBuffer(minCapacity)
422 // short string using the stack buffer, do the same
423 fFlags
= kShortString
;
424 uprv_memcpy(fUnion
.fStackBuffer
, src
.fUnion
.fStackBuffer
, fShortLength
* U_SIZEOF_UCHAR
);
427 // src uses a refCounted string buffer, use that buffer with refCount
428 // src is const, use a cast - we don't really change it
429 ((UnicodeString
&)src
).addRef();
430 // copy all fields, share the reference-counted buffer
431 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
432 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
437 // src is a readonly alias, do the same
438 // -> maintain the readonly alias as such
439 fUnion
.fFields
.fArray
= src
.fUnion
.fFields
.fArray
;
440 fUnion
.fFields
.fCapacity
= src
.fUnion
.fFields
.fCapacity
;
444 // else if(!fastCopy) fall through to case kWritableAlias
445 // -> allocate a new buffer and copy the contents
447 // src is a writable alias; we make a copy of that instead
448 if(allocate(srcLength
)) {
449 uprv_memcpy(getArrayStart(), src
.getArrayStart(), srcLength
* U_SIZEOF_UCHAR
);
452 // if there is not enough memory, then fall through to setting to bogus
454 // if src is bogus, set ourselves to bogus
455 // do not call setToBogus() here because fArray and fFlags are not consistent here
457 fUnion
.fFields
.fArray
= 0;
458 fUnion
.fFields
.fCapacity
= 0;
466 //========================================
467 // Miscellaneous operations
468 //========================================
470 UnicodeString
UnicodeString::unescape() const {
471 UnicodeString
result(length(), (UChar32
)0, (int32_t)0); // construct with capacity
472 const UChar
*array
= getBuffer();
473 int32_t len
= length();
475 for (int32_t i
=0;;) {
477 result
.append(array
, prev
, len
- prev
);
480 if (array
[i
++] == 0x5C /*'\\'*/) {
481 result
.append(array
, prev
, (i
- 1) - prev
);
482 UChar32 c
= unescapeAt(i
); // advances i
484 result
.remove(); // return empty string
485 break; // invalid escape sequence
494 UChar32
UnicodeString::unescapeAt(int32_t &offset
) const {
495 return u_unescapeAt(UnicodeString_charAt
, &offset
, length(), (void*)this);
498 //========================================
499 // Read-only implementation
500 //========================================
502 UnicodeString::doCompare( int32_t start
,
504 const UChar
*srcChars
,
506 int32_t srcLength
) const
508 // compare illegal string values
509 // treat const UChar *srcChars==NULL as an empty string
514 // pin indices to legal values
515 pinIndices(start
, length
);
517 if(srcChars
== NULL
) {
518 srcStart
= srcLength
= 0;
521 // get the correct pointer
522 const UChar
*chars
= getArrayStart();
525 srcChars
+= srcStart
;
530 // get the srcLength if necessary
532 srcLength
= u_strlen(srcChars
+ srcStart
);
535 // are we comparing different lengths?
536 if(length
!= srcLength
) {
537 if(length
< srcLength
) {
541 minLength
= srcLength
;
550 * note that uprv_memcmp() returns an int but we return an int8_t;
551 * we need to take care not to truncate the result -
552 * one way to do this is to right-shift the value to
553 * move the sign bit into the lower 8 bits and making sure that this
554 * does not become 0 itself
557 if(minLength
> 0 && chars
!= srcChars
) {
561 // big-endian: byte comparison works
562 result
= uprv_memcmp(chars
, srcChars
, minLength
* sizeof(UChar
));
564 return (int8_t)(result
>> 15 | 1);
567 // little-endian: compare UChar units
569 result
= ((int32_t)*(chars
++) - (int32_t)*(srcChars
++));
571 return (int8_t)(result
>> 15 | 1);
573 } while(--minLength
> 0);
579 /* String compare in code point order - doCompare() compares in code unit order. */
581 UnicodeString::doCompareCodePointOrder(int32_t start
,
583 const UChar
*srcChars
,
585 int32_t srcLength
) const
587 // compare illegal string values
588 // treat const UChar *srcChars==NULL as an empty string
593 // pin indices to legal values
594 pinIndices(start
, length
);
596 if(srcChars
== NULL
) {
597 srcStart
= srcLength
= 0;
600 int32_t diff
= uprv_strCompare(getArrayStart() + start
, length
, srcChars
+ srcStart
, srcLength
, FALSE
, TRUE
);
601 /* translate the 32-bit result into an 8-bit one */
603 return (int8_t)(diff
>> 15 | 1);
610 UnicodeString::getLength() const {
615 UnicodeString::getCharAt(int32_t offset
) const {
616 return charAt(offset
);
620 UnicodeString::getChar32At(int32_t offset
) const {
621 return char32At(offset
);
625 UnicodeString::countChar32(int32_t start
, int32_t length
) const {
626 pinIndices(start
, length
);
627 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
628 return u_countChar32(getArrayStart()+start
, length
);
632 UnicodeString::hasMoreChar32Than(int32_t start
, int32_t length
, int32_t number
) const {
633 pinIndices(start
, length
);
634 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
635 return u_strHasMoreChar32Than(getArrayStart()+start
, length
, number
);
639 UnicodeString::moveIndex32(int32_t index
, int32_t delta
) const {
641 int32_t len
= length();
644 } else if(index
>len
) {
648 const UChar
*array
= getArrayStart();
650 UTF_FWD_N(array
, index
, len
, delta
);
652 UTF_BACK_N(array
, 0, index
, -delta
);
659 UnicodeString::doExtract(int32_t start
,
662 int32_t dstStart
) const
664 // pin indices to legal values
665 pinIndices(start
, length
);
667 // do not copy anything if we alias dst itself
668 const UChar
*array
= getArrayStart();
669 if(array
+ start
!= dst
+ dstStart
) {
670 us_arrayCopy(array
, start
, dst
, dstStart
, length
);
675 UnicodeString::extract(UChar
*dest
, int32_t destCapacity
,
676 UErrorCode
&errorCode
) const {
677 int32_t len
= length();
678 if(U_SUCCESS(errorCode
)) {
679 if(isBogus() || destCapacity
<0 || (destCapacity
>0 && dest
==0)) {
680 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
682 const UChar
*array
= getArrayStart();
683 if(len
>0 && len
<=destCapacity
&& array
!=dest
) {
684 uprv_memcpy(dest
, array
, len
*U_SIZEOF_UCHAR
);
686 return u_terminateUChars(dest
, destCapacity
, len
, &errorCode
);
694 UnicodeString::extract(int32_t start
,
697 int32_t targetCapacity
,
698 enum EInvariant
) const
700 // if the arguments are illegal, then do nothing
701 if(targetCapacity
< 0 || (targetCapacity
> 0 && target
== NULL
)) {
705 // pin the indices to legal values
706 pinIndices(start
, length
);
708 if(length
<= targetCapacity
) {
709 u_UCharsToChars(getArrayStart() + start
, target
, length
);
711 UErrorCode status
= U_ZERO_ERROR
;
712 return u_terminateChars(target
, targetCapacity
, length
, &status
);
716 UnicodeString::extractBetween(int32_t start
,
718 UnicodeString
& target
) const {
721 doExtract(start
, limit
- start
, target
);
725 UnicodeString::indexOf(const UChar
*srcChars
,
729 int32_t length
) const
731 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
735 // UnicodeString does not find empty substrings
736 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
740 // get the indices within bounds
741 pinIndices(start
, length
);
743 // find the first occurrence of the substring
744 const UChar
*array
= getArrayStart();
745 const UChar
*match
= u_strFindFirst(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
749 return (int32_t)(match
- array
);
754 UnicodeString::doIndexOf(UChar c
,
756 int32_t length
) const
759 pinIndices(start
, length
);
761 // find the first occurrence of c
762 const UChar
*array
= getArrayStart();
763 const UChar
*match
= u_memchr(array
+ start
, c
, length
);
767 return (int32_t)(match
- array
);
772 UnicodeString::doIndexOf(UChar32 c
,
774 int32_t length
) const {
776 pinIndices(start
, length
);
778 // find the first occurrence of c
779 const UChar
*array
= getArrayStart();
780 const UChar
*match
= u_memchr32(array
+ start
, c
, length
);
784 return (int32_t)(match
- array
);
789 UnicodeString::lastIndexOf(const UChar
*srcChars
,
793 int32_t length
) const
795 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
799 // UnicodeString does not find empty substrings
800 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
804 // get the indices within bounds
805 pinIndices(start
, length
);
807 // find the last occurrence of the substring
808 const UChar
*array
= getArrayStart();
809 const UChar
*match
= u_strFindLast(array
+ start
, length
, srcChars
+ srcStart
, srcLength
);
813 return (int32_t)(match
- array
);
818 UnicodeString::doLastIndexOf(UChar c
,
820 int32_t length
) const
827 pinIndices(start
, length
);
829 // find the last occurrence of c
830 const UChar
*array
= getArrayStart();
831 const UChar
*match
= u_memrchr(array
+ start
, c
, length
);
835 return (int32_t)(match
- array
);
840 UnicodeString::doLastIndexOf(UChar32 c
,
842 int32_t length
) const {
844 pinIndices(start
, length
);
846 // find the last occurrence of c
847 const UChar
*array
= getArrayStart();
848 const UChar
*match
= u_memrchr32(array
+ start
, c
, length
);
852 return (int32_t)(match
- array
);
856 //========================================
857 // Write implementation
858 //========================================
861 UnicodeString::findAndReplace(int32_t start
,
863 const UnicodeString
& oldText
,
866 const UnicodeString
& newText
,
870 if(isBogus() || oldText
.isBogus() || newText
.isBogus()) {
874 pinIndices(start
, length
);
875 oldText
.pinIndices(oldStart
, oldLength
);
876 newText
.pinIndices(newStart
, newLength
);
882 while(length
> 0 && length
>= oldLength
) {
883 int32_t pos
= indexOf(oldText
, oldStart
, oldLength
, start
, length
);
885 // no more oldText's here: done
888 // we found oldText, replace it by newText and go beyond it
889 replace(pos
, oldLength
, newText
, newStart
, newLength
);
890 length
-= pos
+ oldLength
- start
;
891 start
= pos
+ newLength
;
900 UnicodeString::setToBogus()
905 fUnion
.fFields
.fArray
= 0;
906 fUnion
.fFields
.fCapacity
= 0;
910 // turn a bogus string into an empty one
912 UnicodeString::unBogus() {
913 if(fFlags
& kIsBogus
) {
918 // setTo() analogous to the readonly-aliasing constructor with the same signature
920 UnicodeString::setTo(UBool isTerminated
,
924 if(fFlags
& kOpenGetBuffer
) {
925 // do not modify a string that has an "open" getBuffer(minCapacity)
930 // treat as an empty string, do not alias
936 if( textLength
< -1 ||
937 (textLength
== -1 && !isTerminated
) ||
938 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
946 if(textLength
== -1) {
947 // text is terminated, or else it would have failed the above test
948 textLength
= u_strlen(text
);
950 setArray((UChar
*)text
, textLength
, isTerminated
? textLength
+ 1 : textLength
);
952 fFlags
= kReadonlyAlias
;
956 // setTo() analogous to the writable-aliasing constructor with the same signature
958 UnicodeString::setTo(UChar
*buffer
,
960 int32_t buffCapacity
) {
961 if(fFlags
& kOpenGetBuffer
) {
962 // do not modify a string that has an "open" getBuffer(minCapacity)
967 // treat as an empty string, do not alias
973 if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
976 } else if(buffLength
== -1) {
977 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
978 const UChar
*p
= buffer
, *limit
= buffer
+ buffCapacity
;
979 while(p
!= limit
&& *p
!= 0) {
982 buffLength
= (int32_t)(p
- buffer
);
987 setArray(buffer
, buffLength
, buffCapacity
);
988 fFlags
= kWritableAlias
;
993 UnicodeString::setCharAt(int32_t offset
,
996 int32_t len
= length();
997 if(cloneArrayIfNeeded() && len
> 0) {
1000 } else if(offset
>= len
) {
1004 getArrayStart()[offset
] = c
;
1010 UnicodeString::doReplace( int32_t start
,
1012 const UnicodeString
& src
,
1016 if(!src
.isBogus()) {
1017 // pin the indices to legal values
1018 src
.pinIndices(srcStart
, srcLength
);
1020 // get the characters from src
1021 // and replace the range in ourselves with them
1022 return doReplace(start
, length
, src
.getArrayStart(), srcStart
, srcLength
);
1025 return doReplace(start
, length
, 0, 0, 0);
1030 UnicodeString::doReplace(int32_t start
,
1032 const UChar
*srcChars
,
1041 srcStart
= srcLength
= 0;
1042 } else if(srcLength
< 0) {
1043 // get the srcLength if necessary
1044 srcLength
= u_strlen(srcChars
+ srcStart
);
1047 int32_t oldLength
= this->length();
1049 // calculate the size of the string after the replace
1052 // optimize append() onto a large-enough, owned string
1053 if(start
>= oldLength
) {
1054 newSize
= oldLength
+ srcLength
;
1055 if(newSize
<= getCapacity() && isBufferWritable()) {
1056 us_arrayCopy(srcChars
, srcStart
, getArrayStart(), oldLength
, srcLength
);
1060 // pin the indices to legal values
1065 // pin the indices to legal values
1066 pinIndices(start
, length
);
1068 newSize
= oldLength
- length
+ srcLength
;
1071 // the following may change fArray but will not copy the current contents;
1072 // therefore we need to keep the current fArray
1073 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1075 if((fFlags
&kUsingStackBuffer
) && (newSize
> US_STACKBUF_SIZE
)) {
1076 // copy the stack buffer contents because it will be overwritten with
1077 // fUnion.fFields values
1078 u_memcpy(oldStackBuffer
, fUnion
.fStackBuffer
, oldLength
);
1079 oldArray
= oldStackBuffer
;
1081 oldArray
= getArrayStart();
1084 // clone our array and allocate a bigger array if needed
1085 int32_t *bufferToDelete
= 0;
1086 if(!cloneArrayIfNeeded(newSize
, newSize
+ (newSize
>> 2) + kGrowSize
,
1087 FALSE
, &bufferToDelete
)
1092 // now do the replace
1094 UChar
*newArray
= getArrayStart();
1095 if(newArray
!= oldArray
) {
1096 // if fArray changed, then we need to copy everything except what will change
1097 us_arrayCopy(oldArray
, 0, newArray
, 0, start
);
1098 us_arrayCopy(oldArray
, start
+ length
,
1099 newArray
, start
+ srcLength
,
1100 oldLength
- (start
+ length
));
1101 } else if(length
!= srcLength
) {
1102 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1103 us_arrayCopy(oldArray
, start
+ length
,
1104 newArray
, start
+ srcLength
,
1105 oldLength
- (start
+ length
));
1108 // now fill in the hole with the new string
1109 us_arrayCopy(srcChars
, srcStart
, newArray
, start
, srcLength
);
1113 // delayed delete in case srcChars == fArray when we started, and
1114 // to keep oldArray alive for the above operations
1115 if (bufferToDelete
) {
1116 uprv_free(bufferToDelete
);
1126 UnicodeString::handleReplaceBetween(int32_t start
,
1128 const UnicodeString
& text
) {
1129 replaceBetween(start
, limit
, text
);
1136 UnicodeString::copy(int32_t start
, int32_t limit
, int32_t dest
) {
1137 if (limit
<= start
) {
1138 return; // Nothing to do; avoid bogus malloc call
1140 UChar
* text
= (UChar
*) uprv_malloc( sizeof(UChar
) * (limit
- start
) );
1141 // Check to make sure text is not null.
1143 extractBetween(start
, limit
, text
, 0);
1144 insert(dest
, text
, 0, limit
- start
);
1152 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1153 * so we implement this function here.
1155 UBool
Replaceable::hasMetaData() const {
1162 UBool
UnicodeString::hasMetaData() const {
1167 UnicodeString::doReverse(int32_t start
,
1170 if(this->length() <= 1 || !cloneArrayIfNeeded()) {
1174 // pin the indices to legal values
1175 pinIndices(start
, length
);
1177 UChar
*left
= getArrayStart() + start
;
1178 UChar
*right
= left
+ length
;
1180 UBool hasSupplementary
= FALSE
;
1182 while(left
< --right
) {
1183 hasSupplementary
|= (UBool
)UTF_IS_LEAD(swap
= *left
);
1184 hasSupplementary
|= (UBool
)UTF_IS_LEAD(*left
++ = *right
);
1188 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1189 if(hasSupplementary
) {
1192 left
= getArrayStart() + start
;
1193 right
= left
+ length
- 1; // -1 so that we can look at *(left+1) if left<right
1194 while(left
< right
) {
1195 if(UTF_IS_TRAIL(swap
= *left
) && UTF_IS_LEAD(swap2
= *(left
+ 1))) {
1208 UnicodeString::padLeading(int32_t targetLength
,
1211 int32_t oldLength
= length();
1212 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1215 // move contents up by padding width
1216 UChar
*array
= getArrayStart();
1217 int32_t start
= targetLength
- oldLength
;
1218 us_arrayCopy(array
, 0, array
, start
, oldLength
);
1220 // fill in padding character
1221 while(--start
>= 0) {
1222 array
[start
] = padChar
;
1224 setLength(targetLength
);
1230 UnicodeString::padTrailing(int32_t targetLength
,
1233 int32_t oldLength
= length();
1234 if(oldLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1237 // fill in padding character
1238 UChar
*array
= getArrayStart();
1239 int32_t length
= targetLength
;
1240 while(--length
>= oldLength
) {
1241 array
[length
] = padChar
;
1243 setLength(targetLength
);
1248 //========================================
1250 //========================================
1252 UnicodeString::doHashCode() const
1254 /* Delegate hash computation to uhash. This makes UnicodeString
1255 * hashing consistent with UChar* hashing. */
1256 int32_t hashCode
= uhash_hashUCharsN(getArrayStart(), length());
1257 if (hashCode
== kInvalidHashCode
) {
1258 hashCode
= kEmptyHashCode
;
1263 //========================================
1265 //========================================
1268 UnicodeString::getBuffer(int32_t minCapacity
) {
1269 if(minCapacity
>=-1 && cloneArrayIfNeeded(minCapacity
)) {
1270 fFlags
|=kOpenGetBuffer
;
1272 return getArrayStart();
1279 UnicodeString::releaseBuffer(int32_t newLength
) {
1280 if(fFlags
&kOpenGetBuffer
&& newLength
>=-1) {
1281 // set the new fLength
1282 int32_t capacity
=getCapacity();
1284 // the new length is the string length, capped by fCapacity
1285 const UChar
*array
=getArrayStart(), *p
=array
, *limit
=array
+capacity
;
1286 while(p
<limit
&& *p
!=0) {
1289 newLength
=(int32_t)(p
-array
);
1290 } else if(newLength
>capacity
) {
1293 setLength(newLength
);
1294 fFlags
&=~kOpenGetBuffer
;
1298 //========================================
1300 //========================================
1302 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity
,
1303 int32_t growCapacity
,
1305 int32_t **pBufferToDelete
,
1307 // default parameters need to be static, therefore
1308 // the defaults are -1 to have convenience defaults
1309 if(newCapacity
== -1) {
1310 newCapacity
= getCapacity();
1313 // while a getBuffer(minCapacity) is "open",
1314 // prevent any modifications of the string by returning FALSE here
1315 // if the string is bogus, then only an assignment or similar can revive it
1321 * We need to make a copy of the array if
1322 * the buffer is read-only, or
1323 * the buffer is refCounted (shared), and refCount>1, or
1324 * the buffer is too small.
1325 * Return FALSE if memory could not be allocated.
1328 fFlags
& kBufferIsReadonly
||
1329 fFlags
& kRefCounted
&& refCount() > 1 ||
1330 newCapacity
> getCapacity()
1332 // check growCapacity for default value and use of the stack buffer
1333 if(growCapacity
== -1) {
1334 growCapacity
= newCapacity
;
1335 } else if(newCapacity
<= US_STACKBUF_SIZE
&& growCapacity
> US_STACKBUF_SIZE
) {
1336 growCapacity
= US_STACKBUF_SIZE
;
1340 UChar oldStackBuffer
[US_STACKBUF_SIZE
];
1342 uint8_t flags
= fFlags
;
1344 if(flags
&kUsingStackBuffer
) {
1345 if(doCopyArray
&& growCapacity
> US_STACKBUF_SIZE
) {
1346 // copy the stack buffer contents because it will be overwritten with
1347 // fUnion.fFields values
1348 us_arrayCopy(fUnion
.fStackBuffer
, 0, oldStackBuffer
, 0, fShortLength
);
1349 oldArray
= oldStackBuffer
;
1351 oldArray
= 0; // no need to copy from stack buffer to itself
1354 oldArray
= fUnion
.fFields
.fArray
;
1357 // allocate a new array
1358 if(allocate(growCapacity
) ||
1359 newCapacity
< growCapacity
&& allocate(newCapacity
)
1361 if(doCopyArray
&& oldArray
!= 0) {
1362 // copy the contents
1363 // do not copy more than what fits - it may be smaller than before
1364 int32_t minLength
= length();
1365 newCapacity
= getCapacity();
1366 if(newCapacity
< minLength
) {
1367 minLength
= newCapacity
;
1368 setLength(minLength
);
1370 us_arrayCopy(oldArray
, 0, getArrayStart(), 0, minLength
);
1375 // release the old array
1376 if(flags
& kRefCounted
) {
1377 // the array is refCounted; decrement and release if 0
1378 int32_t *pRefCount
= ((int32_t *)oldArray
- 1);
1379 if(umtx_atomic_dec(pRefCount
) == 0) {
1380 if(pBufferToDelete
== 0) {
1381 uprv_free(pRefCount
);
1383 // the caller requested to delete it himself
1384 *pBufferToDelete
= pRefCount
;
1389 // not enough memory for growCapacity and not even for the smaller newCapacity
1390 // reset the old values for setToBogus() to release the array
1391 if(!(flags
&kUsingStackBuffer
)) {
1392 fUnion
.fFields
.fArray
= oldArray
;
1403 #ifdef U_STATIC_IMPLEMENTATION
1405 This should never be called. It is defined here to make sure that the
1406 virtual vector deleting destructor is defined within unistr.cpp.
1407 The vector deleting destructor is already a part of UObject,
1408 but defining it here makes sure that it is included with this object file.
1409 This makes sure that static library dependencies are kept to a minimum.
1411 static void uprv_UnicodeStringDummy(void) {
1413 delete [] (new UnicodeString
[2]);