]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr.cpp
2 ******************************************************************************
3 * Copyright (C) 1999-2005, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
21 #include "unicode/utypes.h"
22 #include "unicode/putil.h"
25 #include "unicode/ustring.h"
26 #include "unicode/unistr.h"
33 #if U_IOSTREAM_SOURCE >= 199711
36 #elif U_IOSTREAM_SOURCE >= 198506
42 print(const UnicodeString
& s
,
47 for(int i
= 0; i
< s
.length(); ++i
) {
49 if(c
>= 0x007E || c
< 0x0020)
50 cout
<< "[0x" << hex
<< s
[i
] << "]";
64 for(int i
= 0; i
< len
; ++i
) {
66 if(c
>= 0x007E || c
< 0x0020)
67 cout
<< "[0x" << hex
<< s
[i
] << "]";
76 // Local function definitions for now
78 // need to copy areas that may overlap
81 us_arrayCopy(const UChar
*src
, int32_t srcStart
,
82 UChar
*dst
, int32_t dstStart
, int32_t count
)
85 uprv_memmove(dst
+dstStart
, src
+srcStart
, (size_t)(count
*sizeof(*src
)));
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
91 static UChar U_CALLCONV
92 UnicodeString_charAt(int32_t offset
, void *context
) {
93 return ((UnicodeString
*) context
)->charAt(offset
);
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
102 Replaceable::~Replaceable() {}
103 Replaceable::Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString
)
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString
&s1
, const UnicodeString
&s2
) {
109 UnicodeString(s1
.length()+s2
.length()+1, (UChar32
)0, 0).
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
120 UnicodeString::addRef()
121 { umtx_atomic_inc((int32_t *)fArray
- 1);}
124 UnicodeString::removeRef()
125 { return umtx_atomic_dec((int32_t *)fArray
- 1);}
128 UnicodeString::refCount() const
131 // Note: without the lock to force a memory barrier, we might see a very
132 // stale value on some multi-processor systems.
133 int32_t count
= *((int32_t *)fArray
- 1);
139 UnicodeString::releaseArray() {
140 if((fFlags
& kRefCounted
) && removeRef() == 0) {
141 uprv_free((int32_t *)fArray
- 1);
147 //========================================
149 //========================================
150 UnicodeString::UnicodeString()
152 fCapacity(US_STACKBUF_SIZE
),
153 fArray(fStackBuffer
),
157 UnicodeString::UnicodeString(int32_t capacity
, UChar32 c
, int32_t count
)
159 fCapacity(US_STACKBUF_SIZE
),
163 if(count
<= 0 || (uint32_t)c
> 0x10ffff) {
164 // just allocate and do not do anything else
167 // count > 0, allocate and fill the new string with count c's
168 int32_t unitCount
= UTF_CHAR_LENGTH(c
), length
= count
* unitCount
;
169 if(capacity
< length
) {
172 if(allocate(capacity
)) {
175 // fill the new string with c
177 // fill with length UChars
179 fArray
[i
++] = (UChar
)c
;
182 // get the code units for c
183 UChar units
[UTF_MAX_CHAR_LENGTH
];
184 UTF_APPEND_CHAR_UNSAFE(units
, i
, c
);
186 // now it must be i==unitCount
189 // for Unicode, unitCount can only be 1, 2, 3, or 4
190 // 1 is handled above
193 while(unitIdx
< unitCount
) {
194 fArray
[i
++]=units
[unitIdx
++];
203 UnicodeString::UnicodeString(UChar ch
)
205 fCapacity(US_STACKBUF_SIZE
),
206 fArray(fStackBuffer
),
209 fStackBuffer
[0] = ch
;
212 UnicodeString::UnicodeString(UChar32 ch
)
214 fCapacity(US_STACKBUF_SIZE
),
215 fArray(fStackBuffer
),
219 UBool isError
= FALSE
;
220 U16_APPEND(fStackBuffer
, i
, US_STACKBUF_SIZE
, ch
, isError
);
224 UnicodeString::UnicodeString(const UChar
*text
)
226 fCapacity(US_STACKBUF_SIZE
),
227 fArray(fStackBuffer
),
230 doReplace(0, 0, text
, 0, -1);
233 UnicodeString::UnicodeString(const UChar
*text
,
236 fCapacity(US_STACKBUF_SIZE
),
237 fArray(fStackBuffer
),
240 doReplace(0, 0, text
, 0, textLength
);
243 UnicodeString::UnicodeString(UBool isTerminated
,
246 : fLength(textLength
),
247 fCapacity(isTerminated
? textLength
+ 1 : textLength
),
248 fArray((UChar
*)text
),
249 fFlags(kReadonlyAlias
)
252 // treat as an empty string, do not alias
254 fCapacity
= US_STACKBUF_SIZE
;
255 fArray
= fStackBuffer
;
256 fFlags
= kShortString
;
257 } else if(textLength
< -1 ||
258 (textLength
== -1 && !isTerminated
) ||
259 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
262 } else if(textLength
== -1) {
263 // text is terminated, or else it would have failed the above test
264 fLength
= u_strlen(text
);
265 fCapacity
= fLength
+ 1;
269 UnicodeString::UnicodeString(UChar
*buff
,
271 int32_t buffCapacity
)
272 : fLength(buffLength
),
273 fCapacity(buffCapacity
),
275 fFlags(kWritableAlias
)
278 // treat as an empty string, do not alias
280 fCapacity
= US_STACKBUF_SIZE
;
281 fArray
= fStackBuffer
;
282 fFlags
= kShortString
;
283 } else if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
285 } else if(buffLength
== -1) {
286 // fLength = u_strlen(buff); but do not look beyond buffCapacity
287 const UChar
*p
= buff
, *limit
= buff
+ buffCapacity
;
288 while(p
!= limit
&& *p
!= 0) {
291 fLength
= (int32_t)(p
- buff
);
295 UnicodeString::UnicodeString(const char *src
, int32_t length
, EInvariant
)
297 fCapacity(US_STACKBUF_SIZE
),
298 fArray(fStackBuffer
),
302 // treat as an empty string
305 length
=(int32_t)uprv_strlen(src
);
307 if(cloneArrayIfNeeded(length
, length
, FALSE
)) {
308 u_charsToUChars(src
, getArrayStart(), length
);
316 UnicodeString::UnicodeString(const UnicodeString
& that
)
319 fCapacity(US_STACKBUF_SIZE
),
320 fArray(fStackBuffer
),
326 UnicodeString::UnicodeString(const UnicodeString
& that
,
330 fCapacity(US_STACKBUF_SIZE
),
331 fArray(fStackBuffer
),
334 setTo(that
, srcStart
);
337 UnicodeString::UnicodeString(const UnicodeString
& that
,
342 fCapacity(US_STACKBUF_SIZE
),
343 fArray(fStackBuffer
),
346 setTo(that
, srcStart
, srcLength
);
349 // Replaceable base class clone() default implementation, does not clone
351 Replaceable::clone() const {
355 // UnicodeString overrides clone() with a real implementation
357 UnicodeString::clone() const {
358 return new UnicodeString(*this);
361 //========================================
363 //========================================
366 UnicodeString::allocate(int32_t capacity
) {
367 if(capacity
<= US_STACKBUF_SIZE
) {
368 fArray
= fStackBuffer
;
369 fCapacity
= US_STACKBUF_SIZE
;
370 fFlags
= kShortString
;
372 // count bytes for the refCounter and the string capacity, and
373 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
374 // to be safely aligned for the refCount
375 int32_t words
= (int32_t)(((sizeof(int32_t) + capacity
* U_SIZEOF_UCHAR
+ 15) & ~15) >> 2);
376 int32_t *array
= (int32_t*) uprv_malloc( sizeof(int32_t) * words
);
378 // set initial refCount and point behind the refCount
381 // have fArray point to the first UChar
382 fArray
= (UChar
*)array
;
383 fCapacity
= (int32_t)((words
- 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR
));
384 fFlags
= kLongString
;
395 //========================================
397 //========================================
398 UnicodeString::~UnicodeString()
404 //========================================
406 //========================================
409 UnicodeString::operator=(const UnicodeString
&src
) {
410 return copyFrom(src
);
414 UnicodeString::fastCopyFrom(const UnicodeString
&src
) {
415 return copyFrom(src
, TRUE
);
419 UnicodeString::copyFrom(const UnicodeString
&src
, UBool fastCopy
) {
420 // if assigning to ourselves, do nothing
421 if(this == 0 || this == &src
) {
425 // is the right side bogus?
426 if(&src
== 0 || src
.isBogus()) {
431 // delete the current contents
434 // we always copy the length
435 fLength
= src
.fLength
;
437 // empty string - use the stack buffer
438 fArray
= fStackBuffer
;
439 fCapacity
= US_STACKBUF_SIZE
;
440 fFlags
= kShortString
;
444 // fLength>0 and not an "open" src.getBuffer(minCapacity)
447 // short string using the stack buffer, do the same
448 fArray
= fStackBuffer
;
449 fCapacity
= US_STACKBUF_SIZE
;
450 fFlags
= kShortString
;
451 uprv_memcpy(fStackBuffer
, src
.fArray
, fLength
* U_SIZEOF_UCHAR
);
454 // src uses a refCounted string buffer, use that buffer with refCount
455 // src is const, use a cast - we don't really change it
456 ((UnicodeString
&)src
).addRef();
457 // copy all fields, share the reference-counted buffer
459 fCapacity
= src
.fCapacity
;
464 // src is a readonly alias, do the same
465 // -> maintain the readonly alias as such
467 fCapacity
= src
.fCapacity
;
471 // else if(!fastCopy) fall through to case kWritableAlias
472 // -> allocate a new buffer and copy the contents
474 // src is a writable alias; we make a copy of that instead
475 if(allocate(fLength
)) {
476 uprv_memcpy(fArray
, src
.fArray
, fLength
* U_SIZEOF_UCHAR
);
479 // if there is not enough memory, then fall through to setting to bogus
481 // if src is bogus, set ourselves to bogus
482 // do not call setToBogus() here because fArray and fFlags are not consistent here
493 //========================================
494 // Miscellaneous operations
495 //========================================
497 UnicodeString
UnicodeString::unescape() const {
498 UnicodeString result
;
499 for (int32_t i
=0; i
<length(); ) {
500 UChar32 c
= charAt(i
++);
501 if (c
== 0x005C /*'\\'*/) {
502 c
= unescapeAt(i
); // advances i
503 if (c
== (UChar32
)0xFFFFFFFF) {
504 result
.remove(); // return empty string
505 break; // invalid escape sequence
513 UChar32
UnicodeString::unescapeAt(int32_t &offset
) const {
514 return u_unescapeAt(UnicodeString_charAt
, &offset
, length(), (void*)this);
517 //========================================
518 // Read-only implementation
519 //========================================
521 UnicodeString::doCompare( int32_t start
,
523 const UChar
*srcChars
,
525 int32_t srcLength
) const
527 // compare illegal string values
528 // treat const UChar *srcChars==NULL as an empty string
533 // pin indices to legal values
534 pinIndices(start
, length
);
536 if(srcChars
== NULL
) {
537 srcStart
= srcLength
= 0;
540 // get the correct pointer
541 const UChar
*chars
= getArrayStart();
544 srcChars
+= srcStart
;
549 // get the srcLength if necessary
551 srcLength
= u_strlen(srcChars
+ srcStart
);
554 // are we comparing different lengths?
555 if(length
!= srcLength
) {
556 if(length
< srcLength
) {
560 minLength
= srcLength
;
569 * note that uprv_memcmp() returns an int but we return an int8_t;
570 * we need to take care not to truncate the result -
571 * one way to do this is to right-shift the value to
572 * move the sign bit into the lower 8 bits and making sure that this
573 * does not become 0 itself
576 if(minLength
> 0 && chars
!= srcChars
) {
580 // big-endian: byte comparison works
581 result
= uprv_memcmp(chars
, srcChars
, minLength
* sizeof(UChar
));
583 return (int8_t)(result
>> 15 | 1);
586 // little-endian: compare UChar units
588 result
= ((int32_t)*(chars
++) - (int32_t)*(srcChars
++));
590 return (int8_t)(result
>> 15 | 1);
592 } while(--minLength
> 0);
598 /* String compare in code point order - doCompare() compares in code unit order. */
600 UnicodeString::doCompareCodePointOrder(int32_t start
,
602 const UChar
*srcChars
,
604 int32_t srcLength
) const
606 // compare illegal string values
607 // treat const UChar *srcChars==NULL as an empty string
612 // pin indices to legal values
613 pinIndices(start
, length
);
615 if(srcChars
== NULL
) {
616 srcStart
= srcLength
= 0;
619 int32_t diff
= uprv_strCompare(fArray
+ start
, length
, srcChars
+ srcStart
, srcLength
, FALSE
, TRUE
);
620 /* translate the 32-bit result into an 8-bit one */
622 return (int8_t)(diff
>> 15 | 1);
629 UnicodeString::getLength() const {
634 UnicodeString::getCharAt(int32_t offset
) const {
635 return charAt(offset
);
639 UnicodeString::getChar32At(int32_t offset
) const {
640 return char32At(offset
);
644 UnicodeString::countChar32(int32_t start
, int32_t length
) const {
645 pinIndices(start
, length
);
646 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
647 return u_countChar32(fArray
+start
, length
);
651 UnicodeString::hasMoreChar32Than(int32_t start
, int32_t length
, int32_t number
) const {
652 pinIndices(start
, length
);
653 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
654 return u_strHasMoreChar32Than(fArray
+start
, length
, number
);
658 UnicodeString::moveIndex32(int32_t index
, int32_t delta
) const {
662 } else if(index
>fLength
) {
667 UTF_FWD_N(fArray
, index
, fLength
, delta
);
669 UTF_BACK_N(fArray
, 0, index
, -delta
);
676 UnicodeString::doExtract(int32_t start
,
679 int32_t dstStart
) const
681 // pin indices to legal values
682 pinIndices(start
, length
);
684 // do not copy anything if we alias dst itself
685 if(fArray
+ start
!= dst
+ dstStart
) {
686 us_arrayCopy(getArrayStart(), start
, dst
, dstStart
, length
);
691 UnicodeString::extract(UChar
*dest
, int32_t destCapacity
,
692 UErrorCode
&errorCode
) const {
693 if(U_SUCCESS(errorCode
)) {
694 if(isBogus() || destCapacity
<0 || (destCapacity
>0 && dest
==0)) {
695 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
697 if(fLength
>0 && fLength
<=destCapacity
&& fArray
!=dest
) {
698 uprv_memcpy(dest
, fArray
, fLength
*U_SIZEOF_UCHAR
);
700 return u_terminateUChars(dest
, destCapacity
, fLength
, &errorCode
);
708 UnicodeString::extract(int32_t start
,
711 int32_t targetCapacity
,
712 enum EInvariant
) const
714 // if the arguments are illegal, then do nothing
715 if(targetCapacity
< 0 || (targetCapacity
> 0 && target
== NULL
)) {
719 // pin the indices to legal values
720 pinIndices(start
, length
);
722 if(length
<= targetCapacity
) {
723 u_UCharsToChars(getArrayStart() + start
, target
, length
);
725 UErrorCode status
= U_ZERO_ERROR
;
726 return u_terminateChars(target
, targetCapacity
, length
, &status
);
730 UnicodeString::extractBetween(int32_t start
,
732 UnicodeString
& target
) const {
735 doExtract(start
, limit
- start
, target
);
739 UnicodeString::indexOf(const UChar
*srcChars
,
743 int32_t length
) const
745 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
749 // UnicodeString does not find empty substrings
750 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
754 // get the indices within bounds
755 pinIndices(start
, length
);
757 // find the first occurrence of the substring
758 const UChar
*match
= u_strFindFirst(fArray
+ start
, length
, srcChars
+ srcStart
, srcLength
);
762 return (int32_t)(match
- fArray
);
767 UnicodeString::doIndexOf(UChar c
,
769 int32_t length
) const
772 pinIndices(start
, length
);
774 // find the first occurrence of c
775 const UChar
*match
= u_memchr(fArray
+ start
, c
, length
);
779 return (int32_t)(match
- fArray
);
784 UnicodeString::doIndexOf(UChar32 c
,
786 int32_t length
) const {
788 pinIndices(start
, length
);
790 // find the first occurrence of c
791 const UChar
*match
= u_memchr32(fArray
+ start
, c
, length
);
795 return (int32_t)(match
- fArray
);
800 UnicodeString::lastIndexOf(const UChar
*srcChars
,
804 int32_t length
) const
806 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
810 // UnicodeString does not find empty substrings
811 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
815 // get the indices within bounds
816 pinIndices(start
, length
);
818 // find the last occurrence of the substring
819 const UChar
*match
= u_strFindLast(fArray
+ start
, length
, srcChars
+ srcStart
, srcLength
);
823 return (int32_t)(match
- fArray
);
828 UnicodeString::doLastIndexOf(UChar c
,
830 int32_t length
) const
837 pinIndices(start
, length
);
839 // find the last occurrence of c
840 const UChar
*match
= u_memrchr(fArray
+ start
, c
, length
);
844 return (int32_t)(match
- fArray
);
849 UnicodeString::doLastIndexOf(UChar32 c
,
851 int32_t length
) const {
853 pinIndices(start
, length
);
855 // find the last occurrence of c
856 const UChar
*match
= u_memrchr32(fArray
+ start
, c
, length
);
860 return (int32_t)(match
- fArray
);
864 //========================================
865 // Write implementation
866 //========================================
869 UnicodeString::findAndReplace(int32_t start
,
871 const UnicodeString
& oldText
,
874 const UnicodeString
& newText
,
878 if(isBogus() || oldText
.isBogus() || newText
.isBogus()) {
882 pinIndices(start
, length
);
883 oldText
.pinIndices(oldStart
, oldLength
);
884 newText
.pinIndices(newStart
, newLength
);
890 while(length
> 0 && length
>= oldLength
) {
891 int32_t pos
= indexOf(oldText
, oldStart
, oldLength
, start
, length
);
893 // no more oldText's here: done
896 // we found oldText, replace it by newText and go beyond it
897 replace(pos
, oldLength
, newText
, newStart
, newLength
);
898 length
-= pos
+ oldLength
- start
;
899 start
= pos
+ newLength
;
908 UnicodeString::setToBogus()
913 fCapacity
= fLength
= 0;
917 // turn a bogus string into an empty one
919 UnicodeString::unBogus() {
920 if(fFlags
& kIsBogus
) {
921 fArray
= fStackBuffer
;
923 fCapacity
= US_STACKBUF_SIZE
;
924 fFlags
= kShortString
;
928 // setTo() analogous to the readonly-aliasing constructor with the same signature
930 UnicodeString::setTo(UBool isTerminated
,
934 if(fFlags
& kOpenGetBuffer
) {
935 // do not modify a string that has an "open" getBuffer(minCapacity)
940 // treat as an empty string, do not alias
943 fCapacity
= US_STACKBUF_SIZE
;
944 fArray
= fStackBuffer
;
945 fFlags
= kShortString
;
949 if( textLength
< -1 ||
950 (textLength
== -1 && !isTerminated
) ||
951 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
959 fArray
= (UChar
*)text
;
960 if(textLength
!= -1) {
961 fLength
= textLength
;
962 fCapacity
= isTerminated
? fLength
+ 1 : fLength
;
964 // text is terminated, or else it would have failed the above test
965 fLength
= u_strlen(text
);
966 fCapacity
= fLength
+ 1;
969 fFlags
= kReadonlyAlias
;
973 // setTo() analogous to the writable-aliasing constructor with the same signature
975 UnicodeString::setTo(UChar
*buffer
,
977 int32_t buffCapacity
) {
978 if(fFlags
& kOpenGetBuffer
) {
979 // do not modify a string that has an "open" getBuffer(minCapacity)
984 // treat as an empty string, do not alias
987 fCapacity
= US_STACKBUF_SIZE
;
988 fArray
= fStackBuffer
;
989 fFlags
= kShortString
;
993 if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
996 } else if(buffLength
== -1) {
997 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
998 const UChar
*p
= buffer
, *limit
= buffer
+ buffCapacity
;
999 while(p
!= limit
&& *p
!= 0) {
1002 buffLength
= (int32_t)(p
- buffer
);
1008 fLength
= buffLength
;
1009 fCapacity
= buffCapacity
;
1010 fFlags
= kWritableAlias
;
1015 UnicodeString::setCharAt(int32_t offset
,
1018 if(cloneArrayIfNeeded() && fLength
> 0) {
1021 } else if(offset
>= fLength
) {
1022 offset
= fLength
- 1;
1031 UnicodeString::doReplace( int32_t start
,
1033 const UnicodeString
& src
,
1037 if(!src
.isBogus()) {
1038 // pin the indices to legal values
1039 src
.pinIndices(srcStart
, srcLength
);
1041 // get the characters from src
1042 // and replace the range in ourselves with them
1043 return doReplace(start
, length
, src
.getArrayStart(), srcStart
, srcLength
);
1046 return doReplace(start
, length
, 0, 0, 0);
1051 UnicodeString::doReplace(int32_t start
,
1053 const UChar
*srcChars
,
1062 srcStart
= srcLength
= 0;
1063 } else if(srcLength
< 0) {
1064 // get the srcLength if necessary
1065 srcLength
= u_strlen(srcChars
+ srcStart
);
1068 int32_t *bufferToDelete
= 0;
1070 // the following may change fArray but will not copy the current contents;
1071 // therefore we need to keep the current fArray
1072 UChar
*oldArray
= fArray
;
1073 int32_t oldLength
= fLength
;
1075 // pin the indices to legal values
1076 pinIndices(start
, length
);
1078 // calculate the size of the string after the replace
1079 int32_t newSize
= oldLength
- length
+ srcLength
;
1081 // clone our array and allocate a bigger array if needed
1082 if(!cloneArrayIfNeeded(newSize
, newSize
+ (newSize
>> 2) + kGrowSize
,
1083 FALSE
, &bufferToDelete
)
1088 // now do the replace
1090 if(fArray
!= oldArray
) {
1091 // if fArray changed, then we need to copy everything except what will change
1092 us_arrayCopy(oldArray
, 0, fArray
, 0, start
);
1093 us_arrayCopy(oldArray
, start
+ length
,
1094 fArray
, start
+ srcLength
,
1095 oldLength
- (start
+ length
));
1096 } else if(length
!= srcLength
) {
1097 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1098 us_arrayCopy(oldArray
, start
+ length
,
1099 fArray
, start
+ srcLength
,
1100 oldLength
- (start
+ length
));
1103 // now fill in the hole with the new string
1104 us_arrayCopy(srcChars
, srcStart
, getArrayStart(), start
, srcLength
);
1108 // delayed delete in case srcChars == fArray when we started, and
1109 // to keep oldArray alive for the above operations
1110 if (bufferToDelete
) {
1111 uprv_free(bufferToDelete
);
1121 UnicodeString::handleReplaceBetween(int32_t start
,
1123 const UnicodeString
& text
) {
1124 replaceBetween(start
, limit
, text
);
1131 UnicodeString::copy(int32_t start
, int32_t limit
, int32_t dest
) {
1132 if (limit
<= start
) {
1133 return; // Nothing to do; avoid bogus malloc call
1135 UChar
* text
= (UChar
*) uprv_malloc( sizeof(UChar
) * (limit
- start
) );
1136 extractBetween(start
, limit
, text
, 0);
1137 insert(dest
, text
, 0, limit
- start
);
1144 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1145 * so we implement this function here.
1147 UBool
Replaceable::hasMetaData() const {
1154 UBool
UnicodeString::hasMetaData() const {
1159 UnicodeString::doReverse(int32_t start
,
1162 if(fLength
<= 1 || !cloneArrayIfNeeded()) {
1166 // pin the indices to legal values
1167 pinIndices(start
, length
);
1169 UChar
*left
= getArrayStart() + start
;
1170 UChar
*right
= getArrayStart() + start
+ length
;
1172 UBool hasSupplementary
= FALSE
;
1174 while(left
< --right
) {
1175 hasSupplementary
|= (UBool
)UTF_IS_LEAD(swap
= *left
);
1176 hasSupplementary
|= (UBool
)UTF_IS_LEAD(*left
++ = *right
);
1180 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1181 if(hasSupplementary
) {
1184 left
= getArrayStart() + start
;
1185 right
= getArrayStart() + start
+ length
- 1; // -1 so that we can look at *(left+1) if left<right
1186 while(left
< right
) {
1187 if(UTF_IS_TRAIL(swap
= *left
) && UTF_IS_LEAD(swap2
= *(left
+ 1))) {
1200 UnicodeString::padLeading(int32_t targetLength
,
1203 if(fLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1206 // move contents up by padding width
1207 int32_t start
= targetLength
- fLength
;
1208 us_arrayCopy(fArray
, 0, fArray
, start
, fLength
);
1210 // fill in padding character
1211 while(--start
>= 0) {
1212 fArray
[start
] = padChar
;
1214 fLength
= targetLength
;
1220 UnicodeString::padTrailing(int32_t targetLength
,
1223 if(fLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1226 // fill in padding character
1227 int32_t length
= targetLength
;
1228 while(--length
>= fLength
) {
1229 fArray
[length
] = padChar
;
1231 fLength
= targetLength
;
1236 //========================================
1238 //========================================
1240 UnicodeString::doHashCode() const
1242 /* Delegate hash computation to uhash. This makes UnicodeString
1243 * hashing consistent with UChar* hashing. */
1244 int32_t hashCode
= uhash_hashUCharsN(getArrayStart(), fLength
);
1245 if (hashCode
== kInvalidHashCode
) {
1246 hashCode
= kEmptyHashCode
;
1251 //========================================
1253 //========================================
1256 UnicodeString::getBuffer(int32_t minCapacity
) {
1257 if(minCapacity
>=-1 && cloneArrayIfNeeded(minCapacity
)) {
1258 fFlags
|=kOpenGetBuffer
;
1267 UnicodeString::releaseBuffer(int32_t newLength
) {
1268 if(fFlags
&kOpenGetBuffer
&& newLength
>=-1) {
1269 // set the new fLength
1271 // the new length is the string length, capped by fCapacity
1272 const UChar
*p
=fArray
, *limit
=fArray
+fCapacity
;
1273 while(p
<limit
&& *p
!=0) {
1276 fLength
=(int32_t)(p
-fArray
);
1277 } else if(newLength
<=fCapacity
) {
1282 fFlags
&=~kOpenGetBuffer
;
1286 //========================================
1288 //========================================
1290 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity
,
1291 int32_t growCapacity
,
1293 int32_t **pBufferToDelete
,
1295 // default parameters need to be static, therefore
1296 // the defaults are -1 to have convenience defaults
1297 if(newCapacity
== -1) {
1298 newCapacity
= fCapacity
;
1301 // while a getBuffer(minCapacity) is "open",
1302 // prevent any modifications of the string by returning FALSE here
1303 // if the string is bogus, then only an assignment or similar can revive it
1304 if((fFlags
&(kOpenGetBuffer
|kIsBogus
))!=0) {
1309 * We need to make a copy of the array if
1310 * the buffer is read-only, or
1311 * the buffer is refCounted (shared), and refCount>1, or
1312 * the buffer is too small.
1313 * Return FALSE if memory could not be allocated.
1316 fFlags
& kBufferIsReadonly
||
1317 fFlags
& kRefCounted
&& refCount() > 1 ||
1318 newCapacity
> fCapacity
1321 UChar
*array
= fArray
;
1322 uint16_t flags
= fFlags
;
1324 // check growCapacity for default value and use of the stack buffer
1325 if(growCapacity
== -1) {
1326 growCapacity
= newCapacity
;
1327 } else if(newCapacity
<= US_STACKBUF_SIZE
&& growCapacity
> US_STACKBUF_SIZE
) {
1328 growCapacity
= US_STACKBUF_SIZE
;
1331 // allocate a new array
1332 if(allocate(growCapacity
) ||
1333 newCapacity
< growCapacity
&& allocate(newCapacity
)
1336 // copy the contents
1337 // do not copy more than what fits - it may be smaller than before
1338 if(fCapacity
< fLength
) {
1339 fLength
= fCapacity
;
1341 us_arrayCopy(array
, 0, fArray
, 0, fLength
);
1346 // release the old array
1347 if(flags
& kRefCounted
) {
1348 // the array is refCounted; decrement and release if 0
1349 int32_t *pRefCount
= ((int32_t *)array
- 1);
1350 if(umtx_atomic_dec(pRefCount
) == 0) {
1351 if(pBufferToDelete
== 0) {
1352 uprv_free(pRefCount
);
1354 // the caller requested to delete it himself
1355 *pBufferToDelete
= pRefCount
;
1360 // not enough memory for growCapacity and not even for the smaller newCapacity
1361 // reset the old values for setToBogus() to release the array
1372 #ifdef U_STATIC_IMPLEMENTATION
1374 This should never be called. It is defined here to make sure that the
1375 virtual vector deleting destructor is defined within unistr.cpp.
1376 The vector deleting destructor is already a part of UObject,
1377 but defining it here makes sure that it is included with this object file.
1378 This makes sure that static library dependencies are kept to a minimum.
1380 static void uprv_UnicodeStringDummy(void) {
1382 delete [] (new UnicodeString
[2]);