]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr.cpp
2 ******************************************************************************
3 * Copyright (C) 1999-2004, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
21 #include "unicode/utypes.h"
22 #include "unicode/putil.h"
25 #include "unicode/ustring.h"
26 #include "unicode/unistr.h"
33 #if U_IOSTREAM_SOURCE >= 199711
36 #elif U_IOSTREAM_SOURCE >= 198506
42 print(const UnicodeString
& s
,
47 for(int i
= 0; i
< s
.length(); ++i
) {
49 if(c
>= 0x007E || c
< 0x0020)
50 cout
<< "[0x" << hex
<< s
[i
] << "]";
64 for(int i
= 0; i
< len
; ++i
) {
66 if(c
>= 0x007E || c
< 0x0020)
67 cout
<< "[0x" << hex
<< s
[i
] << "]";
76 // Local function definitions for now
78 // need to copy areas that may overlap
81 us_arrayCopy(const UChar
*src
, int32_t srcStart
,
82 UChar
*dst
, int32_t dstStart
, int32_t count
)
85 uprv_memmove(dst
+dstStart
, src
+srcStart
, (size_t)(count
*sizeof(*src
)));
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
91 static UChar U_CALLCONV
92 UnicodeString_charAt(int32_t offset
, void *context
) {
93 return ((UnicodeString
*) context
)->charAt(offset
);
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
102 Replaceable::~Replaceable() {}
103 Replaceable::Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString
)
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString
&s1
, const UnicodeString
&s2
) {
109 UnicodeString(s1
.length()+s2
.length()+1, (UChar32
)0, 0).
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
120 UnicodeString::addRef()
121 { umtx_atomic_inc((int32_t *)fArray
- 1);}
124 UnicodeString::removeRef()
125 { return umtx_atomic_dec((int32_t *)fArray
- 1);}
128 UnicodeString::refCount() const
131 // Note: without the lock to force a memory barrier, we might see a very
132 // stale value on some multi-processor systems.
133 int32_t count
= *((int32_t *)fArray
- 1);
139 UnicodeString::releaseArray() {
140 if((fFlags
& kRefCounted
) && removeRef() == 0) {
141 uprv_free((int32_t *)fArray
- 1);
147 //========================================
149 //========================================
150 UnicodeString::UnicodeString()
152 fCapacity(US_STACKBUF_SIZE
),
153 fArray(fStackBuffer
),
157 UnicodeString::UnicodeString(int32_t capacity
, UChar32 c
, int32_t count
)
159 fCapacity(US_STACKBUF_SIZE
),
163 if(count
<= 0 || (uint32_t)c
> 0x10ffff) {
164 // just allocate and do not do anything else
167 // count > 0, allocate and fill the new string with count c's
168 int32_t unitCount
= UTF_CHAR_LENGTH(c
), length
= count
* unitCount
;
169 if(capacity
< length
) {
172 if(allocate(capacity
)) {
175 // fill the new string with c
177 // fill with length UChars
179 fArray
[i
++] = (UChar
)c
;
182 // get the code units for c
183 UChar units
[UTF_MAX_CHAR_LENGTH
];
184 UTF_APPEND_CHAR_UNSAFE(units
, i
, c
);
186 // now it must be i==unitCount
189 // for Unicode, unitCount can only be 1, 2, 3, or 4
190 // 1 is handled above
193 while(unitIdx
< unitCount
) {
194 fArray
[i
++]=units
[unitIdx
++];
203 UnicodeString::UnicodeString(UChar ch
)
205 fCapacity(US_STACKBUF_SIZE
),
206 fArray(fStackBuffer
),
209 fStackBuffer
[0] = ch
;
212 UnicodeString::UnicodeString(UChar32 ch
)
214 fCapacity(US_STACKBUF_SIZE
),
215 fArray(fStackBuffer
),
219 UBool isError
= FALSE
;
220 U16_APPEND(fStackBuffer
, i
, US_STACKBUF_SIZE
, ch
, isError
);
224 UnicodeString::UnicodeString(const UChar
*text
)
226 fCapacity(US_STACKBUF_SIZE
),
227 fArray(fStackBuffer
),
230 doReplace(0, 0, text
, 0, -1);
233 UnicodeString::UnicodeString(const UChar
*text
,
236 fCapacity(US_STACKBUF_SIZE
),
237 fArray(fStackBuffer
),
240 doReplace(0, 0, text
, 0, textLength
);
243 UnicodeString::UnicodeString(UBool isTerminated
,
246 : fLength(textLength
),
247 fCapacity(isTerminated
? textLength
+ 1 : textLength
),
248 fArray((UChar
*)text
),
249 fFlags(kReadonlyAlias
)
252 // treat as an empty string, do not alias
254 fCapacity
= US_STACKBUF_SIZE
;
255 fArray
= fStackBuffer
;
256 fFlags
= kShortString
;
257 } else if(textLength
< -1 ||
258 (textLength
== -1 && !isTerminated
) ||
259 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
262 } else if(textLength
== -1) {
263 // text is terminated, or else it would have failed the above test
264 fLength
= u_strlen(text
);
265 fCapacity
= fLength
+ 1;
269 UnicodeString::UnicodeString(UChar
*buff
,
271 int32_t buffCapacity
)
272 : fLength(buffLength
),
273 fCapacity(buffCapacity
),
275 fFlags(kWritableAlias
)
278 // treat as an empty string, do not alias
280 fCapacity
= US_STACKBUF_SIZE
;
281 fArray
= fStackBuffer
;
282 fFlags
= kShortString
;
283 } else if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
285 } else if(buffLength
== -1) {
286 // fLength = u_strlen(buff); but do not look beyond buffCapacity
287 const UChar
*p
= buff
, *limit
= buff
+ buffCapacity
;
288 while(p
!= limit
&& *p
!= 0) {
291 fLength
= (int32_t)(p
- buff
);
295 UnicodeString::UnicodeString(const char *src
, int32_t length
, EInvariant
)
297 fCapacity(US_STACKBUF_SIZE
),
298 fArray(fStackBuffer
),
302 // treat as an empty string
305 length
=uprv_strlen(src
);
307 if(cloneArrayIfNeeded(length
, length
, FALSE
)) {
308 u_charsToUChars(src
, getArrayStart(), length
);
316 UnicodeString::UnicodeString(const UnicodeString
& that
)
319 fCapacity(US_STACKBUF_SIZE
),
320 fArray(fStackBuffer
),
326 UnicodeString::UnicodeString(const UnicodeString
& that
,
330 fCapacity(US_STACKBUF_SIZE
),
331 fArray(fStackBuffer
),
334 setTo(that
, srcStart
);
337 UnicodeString::UnicodeString(const UnicodeString
& that
,
342 fCapacity(US_STACKBUF_SIZE
),
343 fArray(fStackBuffer
),
346 setTo(that
, srcStart
, srcLength
);
349 // Replaceable base class clone() default implementation, does not clone
351 Replaceable::clone() const {
355 // UnicodeString overrides clone() with a real implementation
357 UnicodeString::clone() const {
358 return new UnicodeString(*this);
361 //========================================
363 //========================================
366 UnicodeString::allocate(int32_t capacity
) {
367 if(capacity
<= US_STACKBUF_SIZE
) {
368 fArray
= fStackBuffer
;
369 fCapacity
= US_STACKBUF_SIZE
;
370 fFlags
= kShortString
;
372 // count bytes for the refCounter and the string capacity, and
373 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
374 // to be safely aligned for the refCount
375 int32_t words
= (int32_t)(((sizeof(int32_t) + capacity
* U_SIZEOF_UCHAR
+ 15) & ~15) >> 2);
376 int32_t *array
= (int32_t*) uprv_malloc( sizeof(int32_t) * words
);
378 // set initial refCount and point behind the refCount
381 // have fArray point to the first UChar
382 fArray
= (UChar
*)array
;
383 fCapacity
= (int32_t)((words
- 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR
));
384 fFlags
= kLongString
;
395 //========================================
397 //========================================
398 UnicodeString::~UnicodeString()
404 //========================================
406 //========================================
409 UnicodeString::operator=(const UnicodeString
&src
) {
410 return copyFrom(src
);
414 UnicodeString::fastCopyFrom(const UnicodeString
&src
) {
415 return copyFrom(src
, TRUE
);
419 UnicodeString::copyFrom(const UnicodeString
&src
, UBool fastCopy
) {
420 // if assigning to ourselves, do nothing
421 if(this == 0 || this == &src
) {
425 // is the right side bogus?
426 if(&src
== 0 || src
.isBogus()) {
431 // delete the current contents
434 // we always copy the length
435 fLength
= src
.fLength
;
437 // empty string - use the stack buffer
438 fArray
= fStackBuffer
;
439 fCapacity
= US_STACKBUF_SIZE
;
440 fFlags
= kShortString
;
444 // fLength>0 and not an "open" src.getBuffer(minCapacity)
447 // short string using the stack buffer, do the same
448 fArray
= fStackBuffer
;
449 fCapacity
= US_STACKBUF_SIZE
;
450 fFlags
= kShortString
;
451 uprv_memcpy(fStackBuffer
, src
.fArray
, fLength
* U_SIZEOF_UCHAR
);
454 // src uses a refCounted string buffer, use that buffer with refCount
455 // src is const, use a cast - we don't really change it
456 ((UnicodeString
&)src
).addRef();
457 // copy all fields, share the reference-counted buffer
459 fCapacity
= src
.fCapacity
;
464 // src is a readonly alias, do the same
465 // -> maintain the readonly alias as such
467 fCapacity
= src
.fCapacity
;
471 // else if(!fastCopy) fall through to case kWritableAlias
472 // -> allocate a new buffer and copy the contents
474 // src is a writable alias; we make a copy of that instead
475 if(allocate(fLength
)) {
476 uprv_memcpy(fArray
, src
.fArray
, fLength
* U_SIZEOF_UCHAR
);
479 // if there is not enough memory, then fall through to setting to bogus
481 // if src is bogus, set ourselves to bogus
482 // do not call setToBogus() here because fArray and fFlags are not consistent here
493 //========================================
494 // Miscellaneous operations
495 //========================================
497 UnicodeString
UnicodeString::unescape() const {
498 UnicodeString result
;
499 for (int32_t i
=0; i
<length(); ) {
500 UChar32 c
= charAt(i
++);
501 if (c
== 0x005C /*'\\'*/) {
502 c
= unescapeAt(i
); // advances i
503 if (c
== (UChar32
)0xFFFFFFFF) {
504 result
.remove(); // return empty string
505 break; // invalid escape sequence
513 UChar32
UnicodeString::unescapeAt(int32_t &offset
) const {
514 return u_unescapeAt(UnicodeString_charAt
, &offset
, length(), (void*)this);
517 //========================================
518 // Read-only implementation
519 //========================================
521 UnicodeString::doCompare( int32_t start
,
523 const UChar
*srcChars
,
525 int32_t srcLength
) const
527 // compare illegal string values
528 // treat const UChar *srcChars==NULL as an empty string
533 // pin indices to legal values
534 pinIndices(start
, length
);
536 if(srcChars
== NULL
) {
537 srcStart
= srcLength
= 0;
540 // get the correct pointer
541 const UChar
*chars
= getArrayStart();
544 srcChars
+= srcStart
;
549 // get the srcLength if necessary
551 srcLength
= u_strlen(srcChars
+ srcStart
);
554 // are we comparing different lengths?
555 if(length
!= srcLength
) {
556 if(length
< srcLength
) {
560 minLength
= srcLength
;
569 * note that uprv_memcmp() returns an int but we return an int8_t;
570 * we need to take care not to truncate the result -
571 * one way to do this is to right-shift the value to
572 * move the sign bit into the lower 8 bits and making sure that this
573 * does not become 0 itself
576 if(minLength
> 0 && chars
!= srcChars
) {
580 // big-endian: byte comparison works
581 result
= uprv_memcmp(chars
, srcChars
, minLength
* sizeof(UChar
));
583 return (int8_t)(result
>> 15 | 1);
586 // little-endian: compare UChar units
588 result
= ((int32_t)*(chars
++) - (int32_t)*(srcChars
++));
590 return (int8_t)(result
>> 15 | 1);
592 } while(--minLength
> 0);
598 /* String compare in code point order - doCompare() compares in code unit order. */
600 UnicodeString::doCompareCodePointOrder(int32_t start
,
602 const UChar
*srcChars
,
604 int32_t srcLength
) const
606 // compare illegal string values
607 // treat const UChar *srcChars==NULL as an empty string
612 // pin indices to legal values
613 pinIndices(start
, length
);
615 if(srcChars
== NULL
) {
616 srcStart
= srcLength
= 0;
619 int32_t diff
= uprv_strCompare(fArray
+ start
, length
, srcChars
+ srcStart
, srcLength
, FALSE
, TRUE
);
620 /* translate the 32-bit result into an 8-bit one */
622 return (int8_t)(diff
>> 15 | 1);
629 UnicodeString::getLength() const {
634 UnicodeString::getCharAt(int32_t offset
) const {
635 return charAt(offset
);
639 UnicodeString::getChar32At(int32_t offset
) const {
640 return char32At(offset
);
644 UnicodeString::countChar32(int32_t start
, int32_t length
) const {
645 pinIndices(start
, length
);
646 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
647 return u_countChar32(fArray
+start
, length
);
651 UnicodeString::hasMoreChar32Than(int32_t start
, int32_t length
, int32_t number
) const {
652 pinIndices(start
, length
);
653 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
654 return u_strHasMoreChar32Than(fArray
+start
, length
, number
);
658 UnicodeString::moveIndex32(int32_t index
, int32_t delta
) const {
662 } else if(index
>fLength
) {
667 UTF_FWD_N(fArray
, index
, fLength
, delta
);
669 UTF_BACK_N(fArray
, 0, index
, -delta
);
676 UnicodeString::doExtract(int32_t start
,
679 int32_t dstStart
) const
681 // pin indices to legal values
682 pinIndices(start
, length
);
684 // do not copy anything if we alias dst itself
685 if(fArray
+ start
!= dst
+ dstStart
) {
686 us_arrayCopy(getArrayStart(), start
, dst
, dstStart
, length
);
691 UnicodeString::extract(UChar
*dest
, int32_t destCapacity
,
692 UErrorCode
&errorCode
) const {
693 if(U_SUCCESS(errorCode
)) {
694 if(isBogus() || destCapacity
<0 || (destCapacity
>0 && dest
==0)) {
695 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
697 if(fLength
>0 && fLength
<=destCapacity
&& fArray
!=dest
) {
698 uprv_memcpy(dest
, fArray
, fLength
*U_SIZEOF_UCHAR
);
700 return u_terminateUChars(dest
, destCapacity
, fLength
, &errorCode
);
708 UnicodeString::extract(int32_t start
,
711 int32_t targetCapacity
,
712 enum EInvariant
) const
714 // if the arguments are illegal, then do nothing
715 if(targetCapacity
< 0 || (targetCapacity
> 0 && target
== NULL
)) {
719 // pin the indices to legal values
720 pinIndices(start
, length
);
722 if(length
<= targetCapacity
) {
723 u_UCharsToChars(getArrayStart() + start
, target
, length
);
725 UErrorCode status
= U_ZERO_ERROR
;
726 return u_terminateChars(target
, targetCapacity
, length
, &status
);
730 UnicodeString::extractBetween(int32_t start
,
732 UnicodeString
& target
) const {
735 doExtract(start
, limit
- start
, target
);
739 UnicodeString::indexOf(const UChar
*srcChars
,
743 int32_t length
) const
745 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
749 // UnicodeString does not find empty substrings
750 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
754 // get the indices within bounds
755 pinIndices(start
, length
);
757 // find the first occurrence of the substring
758 const UChar
*match
= u_strFindFirst(fArray
+ start
, length
, srcChars
+ srcStart
, srcLength
);
762 return match
- fArray
;
767 UnicodeString::doIndexOf(UChar c
,
769 int32_t length
) const
772 pinIndices(start
, length
);
774 // find the first occurrence of c
775 const UChar
*match
= u_memchr(fArray
+ start
, c
, length
);
779 return match
- fArray
;
784 UnicodeString::doIndexOf(UChar32 c
,
786 int32_t length
) const {
788 pinIndices(start
, length
);
790 // find the first occurrence of c
791 const UChar
*match
= u_memchr32(fArray
+ start
, c
, length
);
795 return match
- fArray
;
800 UnicodeString::lastIndexOf(const UChar
*srcChars
,
804 int32_t length
) const
806 if(isBogus() || srcChars
== 0 || srcStart
< 0 || srcLength
== 0) {
810 // UnicodeString does not find empty substrings
811 if(srcLength
< 0 && srcChars
[srcStart
] == 0) {
815 // get the indices within bounds
816 pinIndices(start
, length
);
818 // find the last occurrence of the substring
819 const UChar
*match
= u_strFindLast(fArray
+ start
, length
, srcChars
+ srcStart
, srcLength
);
823 return match
- fArray
;
828 UnicodeString::doLastIndexOf(UChar c
,
830 int32_t length
) const
837 pinIndices(start
, length
);
839 // find the last occurrence of c
840 const UChar
*match
= u_memrchr(fArray
+ start
, c
, length
);
844 return match
- fArray
;
849 UnicodeString::doLastIndexOf(UChar32 c
,
851 int32_t length
) const {
853 pinIndices(start
, length
);
855 // find the last occurrence of c
856 const UChar
*match
= u_memrchr32(fArray
+ start
, c
, length
);
860 return match
- fArray
;
864 //========================================
865 // Write implementation
866 //========================================
869 UnicodeString::findAndReplace(int32_t start
,
871 const UnicodeString
& oldText
,
874 const UnicodeString
& newText
,
878 if(isBogus() || oldText
.isBogus() || newText
.isBogus()) {
882 pinIndices(start
, length
);
883 oldText
.pinIndices(oldStart
, oldLength
);
884 newText
.pinIndices(newStart
, newLength
);
890 while(length
> 0 && length
>= oldLength
) {
891 int32_t pos
= indexOf(oldText
, oldStart
, oldLength
, start
, length
);
893 // no more oldText's here: done
896 // we found oldText, replace it by newText and go beyond it
897 replace(pos
, oldLength
, newText
, newStart
, newLength
);
898 length
-= pos
+ oldLength
- start
;
899 start
= pos
+ newLength
;
908 UnicodeString::setToBogus()
913 fCapacity
= fLength
= 0;
917 // turn a bogus string into an empty one
919 UnicodeString::unBogus() {
920 if(fFlags
& kIsBogus
) {
921 fArray
= fStackBuffer
;
923 fCapacity
= US_STACKBUF_SIZE
;
924 fFlags
= kShortString
;
928 // setTo() analogous to the readonly-aliasing constructor with the same signature
930 UnicodeString::setTo(UBool isTerminated
,
934 if(fFlags
& kOpenGetBuffer
) {
935 // do not modify a string that has an "open" getBuffer(minCapacity)
940 // treat as an empty string, do not alias
943 fCapacity
= US_STACKBUF_SIZE
;
944 fArray
= fStackBuffer
;
945 fFlags
= kShortString
;
949 if( textLength
< -1 ||
950 (textLength
== -1 && !isTerminated
) ||
951 (textLength
>= 0 && isTerminated
&& text
[textLength
] != 0)
959 fArray
= (UChar
*)text
;
960 if(textLength
!= -1) {
961 fLength
= textLength
;
962 fCapacity
= isTerminated
? fLength
+ 1 : fLength
;
964 // text is terminated, or else it would have failed the above test
965 fLength
= u_strlen(text
);
966 fCapacity
= fLength
+ 1;
969 fFlags
= kReadonlyAlias
;
973 // setTo() analogous to the writable-aliasing constructor with the same signature
975 UnicodeString::setTo(UChar
*buffer
,
977 int32_t buffCapacity
) {
978 if(fFlags
& kOpenGetBuffer
) {
979 // do not modify a string that has an "open" getBuffer(minCapacity)
984 // treat as an empty string, do not alias
987 fCapacity
= US_STACKBUF_SIZE
;
988 fArray
= fStackBuffer
;
989 fFlags
= kShortString
;
993 if(buffLength
< -1 || buffCapacity
< 0 || buffLength
> buffCapacity
) {
996 } else if(buffLength
== -1) {
997 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
998 const UChar
*p
= buffer
, *limit
= buffer
+ buffCapacity
;
999 while(p
!= limit
&& *p
!= 0) {
1002 buffLength
= (int32_t)(p
- buffer
);
1008 fLength
= buffLength
;
1009 fCapacity
= buffCapacity
;
1010 fFlags
= kWritableAlias
;
1015 UnicodeString::setCharAt(int32_t offset
,
1018 if(cloneArrayIfNeeded() && fLength
> 0) {
1021 } else if(offset
>= fLength
) {
1022 offset
= fLength
- 1;
1031 UnicodeString::doReplace( int32_t start
,
1033 const UnicodeString
& src
,
1037 if(!src
.isBogus()) {
1038 // pin the indices to legal values
1039 src
.pinIndices(srcStart
, srcLength
);
1041 // get the characters from src
1042 // and replace the range in ourselves with them
1043 return doReplace(start
, length
, src
.getArrayStart(), srcStart
, srcLength
);
1046 return doReplace(start
, length
, 0, 0, 0);
1051 UnicodeString::doReplace(int32_t start
,
1053 const UChar
*srcChars
,
1062 srcStart
= srcLength
= 0;
1063 } else if(srcLength
< 0) {
1064 // get the srcLength if necessary
1065 srcLength
= u_strlen(srcChars
+ srcStart
);
1068 int32_t *bufferToDelete
= 0;
1070 // the following may change fArray but will not copy the current contents;
1071 // therefore we need to keep the current fArray
1072 UChar
*oldArray
= fArray
;
1073 int32_t oldLength
= fLength
;
1075 // pin the indices to legal values
1076 pinIndices(start
, length
);
1078 // calculate the size of the string after the replace
1079 int32_t newSize
= oldLength
- length
+ srcLength
;
1081 // clone our array and allocate a bigger array if needed
1082 if(!cloneArrayIfNeeded(newSize
, newSize
+ (newSize
>> 2) + kGrowSize
,
1083 FALSE
, &bufferToDelete
)
1088 // now do the replace
1090 if(fArray
!= oldArray
) {
1091 // if fArray changed, then we need to copy everything except what will change
1092 us_arrayCopy(oldArray
, 0, fArray
, 0, start
);
1093 us_arrayCopy(oldArray
, start
+ length
,
1094 fArray
, start
+ srcLength
,
1095 oldLength
- (start
+ length
));
1096 } else if(length
!= srcLength
) {
1097 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1098 us_arrayCopy(oldArray
, start
+ length
,
1099 fArray
, start
+ srcLength
,
1100 oldLength
- (start
+ length
));
1103 // now fill in the hole with the new string
1104 us_arrayCopy(srcChars
, srcStart
, getArrayStart(), start
, srcLength
);
1108 // delayed delete in case srcChars == fArray when we started, and
1109 // to keep oldArray alive for the above operations
1110 if (bufferToDelete
) {
1111 uprv_free(bufferToDelete
);
1121 UnicodeString::handleReplaceBetween(int32_t start
,
1123 const UnicodeString
& text
) {
1124 replaceBetween(start
, limit
, text
);
1131 UnicodeString::copy(int32_t start
, int32_t limit
, int32_t dest
) {
1132 if (limit
<= start
) {
1133 return; // Nothing to do; avoid bogus malloc call
1135 UChar
* text
= (UChar
*) uprv_malloc( sizeof(UChar
) * (limit
- start
) );
1136 extractBetween(start
, limit
, text
, 0);
1137 insert(dest
, text
, 0, limit
- start
);
1144 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1145 * so we implement this function here.
1147 UBool
Replaceable::hasMetaData() const {
1154 UBool
UnicodeString::hasMetaData() const {
1159 UnicodeString::doReverse(int32_t start
,
1162 if(fLength
<= 1 || !cloneArrayIfNeeded()) {
1166 // pin the indices to legal values
1167 pinIndices(start
, length
);
1169 UChar
*left
= getArrayStart() + start
;
1170 UChar
*right
= getArrayStart() + start
+ length
;
1172 UBool hasSupplementary
= FALSE
;
1174 while(left
< --right
) {
1175 hasSupplementary
|= (UBool
)UTF_IS_LEAD(swap
= *left
);
1176 hasSupplementary
|= (UBool
)UTF_IS_LEAD(*left
++ = *right
);
1180 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1181 if(hasSupplementary
) {
1184 left
= getArrayStart() + start
;
1185 right
= getArrayStart() + start
+ length
- 1; // -1 so that we can look at *(left+1) if left<right
1186 while(left
< right
) {
1187 if(UTF_IS_TRAIL(swap
= *left
) && UTF_IS_LEAD(swap2
= *(left
+ 1))) {
1200 UnicodeString::padLeading(int32_t targetLength
,
1203 if(fLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1206 // move contents up by padding width
1207 int32_t start
= targetLength
- fLength
;
1208 us_arrayCopy(fArray
, 0, fArray
, start
, fLength
);
1210 // fill in padding character
1211 while(--start
>= 0) {
1212 fArray
[start
] = padChar
;
1214 fLength
= targetLength
;
1220 UnicodeString::padTrailing(int32_t targetLength
,
1223 if(fLength
>= targetLength
|| !cloneArrayIfNeeded(targetLength
)) {
1226 // fill in padding character
1227 int32_t length
= targetLength
;
1228 while(--length
>= fLength
) {
1229 fArray
[length
] = padChar
;
1231 fLength
= targetLength
;
1236 //========================================
1238 //========================================
1240 UnicodeString::doHashCode() const
1242 /* Delegate hash computation to uhash. This makes UnicodeString
1243 * hashing consistent with UChar* hashing. */
1244 int32_t hashCode
= uhash_hashUCharsN(getArrayStart(), fLength
);
1245 if (hashCode
== kInvalidHashCode
) {
1246 hashCode
= kEmptyHashCode
;
1251 //========================================
1253 //========================================
1256 UnicodeString::getBuffer(int32_t minCapacity
) {
1257 if(minCapacity
>=-1 && cloneArrayIfNeeded(minCapacity
)) {
1258 fFlags
|=kOpenGetBuffer
;
1267 UnicodeString::releaseBuffer(int32_t newLength
) {
1268 if(fFlags
&kOpenGetBuffer
&& newLength
>=-1) {
1269 // set the new fLength
1271 // the new length is the string length, capped by fCapacity
1272 const UChar
*p
=fArray
, *limit
=fArray
+fCapacity
;
1273 while(p
<limit
&& *p
!=0) {
1276 fLength
=(int32_t)(p
-fArray
);
1277 } else if(newLength
<=fCapacity
) {
1282 fFlags
&=~kOpenGetBuffer
;
1286 //========================================
1288 //========================================
1290 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity
,
1291 int32_t growCapacity
,
1293 int32_t **pBufferToDelete
,
1295 // default parameters need to be static, therefore
1296 // the defaults are -1 to have convenience defaults
1297 if(newCapacity
== -1) {
1298 newCapacity
= fCapacity
;
1301 // while a getBuffer(minCapacity) is "open",
1302 // prevent any modifications of the string by returning FALSE here
1303 // if the string is bogus, then only an assignment or similar can revive it
1304 if((fFlags
&(kOpenGetBuffer
|kIsBogus
))!=0) {
1309 * We need to make a copy of the array if
1310 * the buffer is read-only, or
1311 * the buffer is refCounted (shared), and refCount>1, or
1312 * the buffer is too small.
1313 * Return FALSE if memory could not be allocated.
1316 fFlags
& kBufferIsReadonly
||
1317 fFlags
& kRefCounted
&& refCount() > 1 ||
1318 newCapacity
> fCapacity
1321 UChar
*array
= fArray
;
1322 uint16_t flags
= fFlags
;
1324 // check growCapacity for default value and use of the stack buffer
1325 if(growCapacity
== -1) {
1326 growCapacity
= newCapacity
;
1327 } else if(newCapacity
<= US_STACKBUF_SIZE
&& growCapacity
> US_STACKBUF_SIZE
) {
1328 growCapacity
= US_STACKBUF_SIZE
;
1331 // allocate a new array
1332 if(allocate(growCapacity
) ||
1333 newCapacity
< growCapacity
&& allocate(newCapacity
)
1336 // copy the contents
1337 // do not copy more than what fits - it may be smaller than before
1338 if(fCapacity
< fLength
) {
1339 fLength
= fCapacity
;
1341 us_arrayCopy(array
, 0, fArray
, 0, fLength
);
1346 // release the old array
1347 if(flags
& kRefCounted
) {
1348 // the array is refCounted; decrement and release if 0
1349 int32_t *pRefCount
= ((int32_t *)array
- 1);
1350 if(umtx_atomic_dec(pRefCount
) == 0) {
1351 if(pBufferToDelete
== 0) {
1352 uprv_free(pRefCount
);
1354 // the caller requested to delete it himself
1355 *pBufferToDelete
= pRefCount
;
1360 // not enough memory for growCapacity and not even for the smaller newCapacity
1361 // reset the old values for setToBogus() to release the array