]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr.cpp
ICU-491.11.2.tar.gz
[apple/icu.git] / icuSources / common / unistr.cpp
1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16 * Replaceable.
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/appendable.h"
23 #include "unicode/putil.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/utf.h"
29 #include "unicode/utf16.h"
30 #include "uelement.h"
31 #include "ustr_imp.h"
32 #include "umutex.h"
33 #include "uassert.h"
34
35 #if 0
36
37 #include <iostream>
38 using namespace std;
39
40 //DEBUGGING
41 void
42 print(const UnicodeString& s,
43 const char *name)
44 {
45 UChar c;
46 cout << name << ":|";
47 for(int i = 0; i < s.length(); ++i) {
48 c = s[i];
49 if(c>= 0x007E || c < 0x0020)
50 cout << "[0x" << hex << s[i] << "]";
51 else
52 cout << (char) s[i];
53 }
54 cout << '|' << endl;
55 }
56
57 void
58 print(const UChar *s,
59 int32_t len,
60 const char *name)
61 {
62 UChar c;
63 cout << name << ":|";
64 for(int i = 0; i < len; ++i) {
65 c = s[i];
66 if(c>= 0x007E || c < 0x0020)
67 cout << "[0x" << hex << s[i] << "]";
68 else
69 cout << (char) s[i];
70 }
71 cout << '|' << endl;
72 }
73 // END DEBUGGING
74 #endif
75
76 // Local function definitions for now
77
78 // need to copy areas that may overlap
79 static
80 inline void
81 us_arrayCopy(const UChar *src, int32_t srcStart,
82 UChar *dst, int32_t dstStart, int32_t count)
83 {
84 if(count>0) {
85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86 }
87 }
88
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
90 U_CDECL_BEGIN
91 static UChar U_CALLCONV
92 UnicodeString_charAt(int32_t offset, void *context) {
93 return ((icu::UnicodeString*) context)->charAt(offset);
94 }
95 U_CDECL_END
96
97 U_NAMESPACE_BEGIN
98
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
101 */
102 Replaceable::~Replaceable() {}
103 Replaceable::Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
105
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108 return
109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110 append(s1).
111 append(s2);
112 }
113
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
118
119 void
120 UnicodeString::addRef()
121 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
122
123 int32_t
124 UnicodeString::removeRef()
125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
126
127 int32_t
128 UnicodeString::refCount() const
129 {
130 umtx_lock(NULL);
131 // Note: without the lock to force a memory barrier, we might see a very
132 // stale value on some multi-processor systems.
133 int32_t count = *((int32_t *)fUnion.fFields.fArray - 1);
134 umtx_unlock(NULL);
135 return count;
136 }
137
138 void
139 UnicodeString::releaseArray() {
140 if((fFlags & kRefCounted) && removeRef() == 0) {
141 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
142 }
143 }
144
145
146
147 //========================================
148 // Constructors
149 //========================================
150 UnicodeString::UnicodeString()
151 : fShortLength(0),
152 fFlags(kShortString)
153 {}
154
155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
156 : fShortLength(0),
157 fFlags(0)
158 {
159 if(count <= 0 || (uint32_t)c > 0x10ffff) {
160 // just allocate and do not do anything else
161 allocate(capacity);
162 } else {
163 // count > 0, allocate and fill the new string with count c's
164 int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
165 if(capacity < length) {
166 capacity = length;
167 }
168 if(allocate(capacity)) {
169 UChar *array = getArrayStart();
170 int32_t i = 0;
171
172 // fill the new string with c
173 if(unitCount == 1) {
174 // fill with length UChars
175 while(i < length) {
176 array[i++] = (UChar)c;
177 }
178 } else {
179 // get the code units for c
180 UChar units[U16_MAX_LENGTH];
181 U16_APPEND_UNSAFE(units, i, c);
182
183 // now it must be i==unitCount
184 i = 0;
185
186 // for Unicode, unitCount can only be 1, 2, 3, or 4
187 // 1 is handled above
188 while(i < length) {
189 int32_t unitIdx = 0;
190 while(unitIdx < unitCount) {
191 array[i++]=units[unitIdx++];
192 }
193 }
194 }
195 }
196 setLength(length);
197 }
198 }
199
200 UnicodeString::UnicodeString(UChar ch)
201 : fShortLength(1),
202 fFlags(kShortString)
203 {
204 fUnion.fStackBuffer[0] = ch;
205 }
206
207 UnicodeString::UnicodeString(UChar32 ch)
208 : fShortLength(0),
209 fFlags(kShortString)
210 {
211 int32_t i = 0;
212 UBool isError = FALSE;
213 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
214 // We test isError so that the compiler does not complain that we don't.
215 // If isError then i==0 which is what we want anyway.
216 if(!isError) {
217 fShortLength = (int8_t)i;
218 }
219 }
220
221 UnicodeString::UnicodeString(const UChar *text)
222 : fShortLength(0),
223 fFlags(kShortString)
224 {
225 doReplace(0, 0, text, 0, -1);
226 }
227
228 UnicodeString::UnicodeString(const UChar *text,
229 int32_t textLength)
230 : fShortLength(0),
231 fFlags(kShortString)
232 {
233 doReplace(0, 0, text, 0, textLength);
234 }
235
236 UnicodeString::UnicodeString(UBool isTerminated,
237 const UChar *text,
238 int32_t textLength)
239 : fShortLength(0),
240 fFlags(kReadonlyAlias)
241 {
242 if(text == NULL) {
243 // treat as an empty string, do not alias
244 setToEmpty();
245 } else if(textLength < -1 ||
246 (textLength == -1 && !isTerminated) ||
247 (textLength >= 0 && isTerminated && text[textLength] != 0)
248 ) {
249 setToBogus();
250 } else {
251 if(textLength == -1) {
252 // text is terminated, or else it would have failed the above test
253 textLength = u_strlen(text);
254 }
255 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
256 }
257 }
258
259 UnicodeString::UnicodeString(UChar *buff,
260 int32_t buffLength,
261 int32_t buffCapacity)
262 : fShortLength(0),
263 fFlags(kWritableAlias)
264 {
265 if(buff == NULL) {
266 // treat as an empty string, do not alias
267 setToEmpty();
268 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
269 setToBogus();
270 } else {
271 if(buffLength == -1) {
272 // fLength = u_strlen(buff); but do not look beyond buffCapacity
273 const UChar *p = buff, *limit = buff + buffCapacity;
274 while(p != limit && *p != 0) {
275 ++p;
276 }
277 buffLength = (int32_t)(p - buff);
278 }
279 setArray(buff, buffLength, buffCapacity);
280 }
281 }
282
283 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
284 : fShortLength(0),
285 fFlags(kShortString)
286 {
287 if(src==NULL) {
288 // treat as an empty string
289 } else {
290 if(length<0) {
291 length=(int32_t)uprv_strlen(src);
292 }
293 if(cloneArrayIfNeeded(length, length, FALSE)) {
294 u_charsToUChars(src, getArrayStart(), length);
295 setLength(length);
296 } else {
297 setToBogus();
298 }
299 }
300 }
301
302 #if U_CHARSET_IS_UTF8
303
304 UnicodeString::UnicodeString(const char *codepageData)
305 : fShortLength(0),
306 fFlags(kShortString) {
307 if(codepageData != 0) {
308 setToUTF8(codepageData);
309 }
310 }
311
312 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
313 : fShortLength(0),
314 fFlags(kShortString) {
315 // if there's nothing to convert, do nothing
316 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
317 return;
318 }
319 if(dataLength == -1) {
320 dataLength = (int32_t)uprv_strlen(codepageData);
321 }
322 setToUTF8(StringPiece(codepageData, dataLength));
323 }
324
325 // else see unistr_cnv.cpp
326 #endif
327
328 UnicodeString::UnicodeString(const UnicodeString& that)
329 : Replaceable(),
330 fShortLength(0),
331 fFlags(kShortString)
332 {
333 copyFrom(that);
334 }
335
336 UnicodeString::UnicodeString(const UnicodeString& that,
337 int32_t srcStart)
338 : Replaceable(),
339 fShortLength(0),
340 fFlags(kShortString)
341 {
342 setTo(that, srcStart);
343 }
344
345 UnicodeString::UnicodeString(const UnicodeString& that,
346 int32_t srcStart,
347 int32_t srcLength)
348 : Replaceable(),
349 fShortLength(0),
350 fFlags(kShortString)
351 {
352 setTo(that, srcStart, srcLength);
353 }
354
355 // Replaceable base class clone() default implementation, does not clone
356 Replaceable *
357 Replaceable::clone() const {
358 return NULL;
359 }
360
361 // UnicodeString overrides clone() with a real implementation
362 Replaceable *
363 UnicodeString::clone() const {
364 return new UnicodeString(*this);
365 }
366
367 //========================================
368 // array allocation
369 //========================================
370
371 UBool
372 UnicodeString::allocate(int32_t capacity) {
373 if(capacity <= US_STACKBUF_SIZE) {
374 fFlags = kShortString;
375 } else {
376 // count bytes for the refCounter and the string capacity, and
377 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
378 // to be safely aligned for the refCount
379 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
380 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
381 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
382 if(array != 0) {
383 // set initial refCount and point behind the refCount
384 *array++ = 1;
385
386 // have fArray point to the first UChar
387 fUnion.fFields.fArray = (UChar *)array;
388 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
389 fFlags = kLongString;
390 } else {
391 fShortLength = 0;
392 fUnion.fFields.fArray = 0;
393 fUnion.fFields.fCapacity = 0;
394 fFlags = kIsBogus;
395 return FALSE;
396 }
397 }
398 return TRUE;
399 }
400
401 //========================================
402 // Destructor
403 //========================================
404 UnicodeString::~UnicodeString()
405 {
406 releaseArray();
407 }
408
409 //========================================
410 // Factory methods
411 //========================================
412
413 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
414 UnicodeString result;
415 result.setToUTF8(utf8);
416 return result;
417 }
418
419 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
420 UnicodeString result;
421 int32_t capacity;
422 // Most UTF-32 strings will be BMP-only and result in a same-length
423 // UTF-16 string. We overestimate the capacity just slightly,
424 // just in case there are a few supplementary characters.
425 if(length <= US_STACKBUF_SIZE) {
426 capacity = US_STACKBUF_SIZE;
427 } else {
428 capacity = length + (length >> 4) + 4;
429 }
430 do {
431 UChar *utf16 = result.getBuffer(capacity);
432 int32_t length16;
433 UErrorCode errorCode = U_ZERO_ERROR;
434 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
435 utf32, length,
436 0xfffd, // Substitution character.
437 NULL, // Don't care about number of substitutions.
438 &errorCode);
439 result.releaseBuffer(length16);
440 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
441 capacity = length16 + 1; // +1 for the terminating NUL.
442 continue;
443 } else if(U_FAILURE(errorCode)) {
444 result.setToBogus();
445 }
446 break;
447 } while(TRUE);
448 return result;
449 }
450
451 //========================================
452 // Assignment
453 //========================================
454
455 UnicodeString &
456 UnicodeString::operator=(const UnicodeString &src) {
457 return copyFrom(src);
458 }
459
460 UnicodeString &
461 UnicodeString::fastCopyFrom(const UnicodeString &src) {
462 return copyFrom(src, TRUE);
463 }
464
465 UnicodeString &
466 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
467 // if assigning to ourselves, do nothing
468 if(this == 0 || this == &src) {
469 return *this;
470 }
471
472 // is the right side bogus?
473 if(&src == 0 || src.isBogus()) {
474 setToBogus();
475 return *this;
476 }
477
478 // delete the current contents
479 releaseArray();
480
481 if(src.isEmpty()) {
482 // empty string - use the stack buffer
483 setToEmpty();
484 return *this;
485 }
486
487 // we always copy the length
488 int32_t srcLength = src.length();
489 setLength(srcLength);
490
491 // fLength>0 and not an "open" src.getBuffer(minCapacity)
492 switch(src.fFlags) {
493 case kShortString:
494 // short string using the stack buffer, do the same
495 fFlags = kShortString;
496 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
497 break;
498 case kLongString:
499 // src uses a refCounted string buffer, use that buffer with refCount
500 // src is const, use a cast - we don't really change it
501 ((UnicodeString &)src).addRef();
502 // copy all fields, share the reference-counted buffer
503 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
504 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
505 fFlags = src.fFlags;
506 break;
507 case kReadonlyAlias:
508 if(fastCopy) {
509 // src is a readonly alias, do the same
510 // -> maintain the readonly alias as such
511 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
512 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
513 fFlags = src.fFlags;
514 break;
515 }
516 // else if(!fastCopy) fall through to case kWritableAlias
517 // -> allocate a new buffer and copy the contents
518 case kWritableAlias:
519 // src is a writable alias; we make a copy of that instead
520 if(allocate(srcLength)) {
521 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
522 break;
523 }
524 // if there is not enough memory, then fall through to setting to bogus
525 default:
526 // if src is bogus, set ourselves to bogus
527 // do not call setToBogus() here because fArray and fFlags are not consistent here
528 fShortLength = 0;
529 fUnion.fFields.fArray = 0;
530 fUnion.fFields.fCapacity = 0;
531 fFlags = kIsBogus;
532 break;
533 }
534
535 return *this;
536 }
537
538 //========================================
539 // Miscellaneous operations
540 //========================================
541
542 UnicodeString UnicodeString::unescape() const {
543 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
544 const UChar *array = getBuffer();
545 int32_t len = length();
546 int32_t prev = 0;
547 for (int32_t i=0;;) {
548 if (i == len) {
549 result.append(array, prev, len - prev);
550 break;
551 }
552 if (array[i++] == 0x5C /*'\\'*/) {
553 result.append(array, prev, (i - 1) - prev);
554 UChar32 c = unescapeAt(i); // advances i
555 if (c < 0) {
556 result.remove(); // return empty string
557 break; // invalid escape sequence
558 }
559 result.append(c);
560 prev = i;
561 }
562 }
563 return result;
564 }
565
566 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
567 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
568 }
569
570 //========================================
571 // Read-only implementation
572 //========================================
573 int8_t
574 UnicodeString::doCompare( int32_t start,
575 int32_t length,
576 const UChar *srcChars,
577 int32_t srcStart,
578 int32_t srcLength) const
579 {
580 // compare illegal string values
581 if(isBogus()) {
582 return -1;
583 }
584
585 // pin indices to legal values
586 pinIndices(start, length);
587
588 if(srcChars == NULL) {
589 // treat const UChar *srcChars==NULL as an empty string
590 return length == 0 ? 0 : 1;
591 }
592
593 // get the correct pointer
594 const UChar *chars = getArrayStart();
595
596 chars += start;
597 srcChars += srcStart;
598
599 int32_t minLength;
600 int8_t lengthResult;
601
602 // get the srcLength if necessary
603 if(srcLength < 0) {
604 srcLength = u_strlen(srcChars + srcStart);
605 }
606
607 // are we comparing different lengths?
608 if(length != srcLength) {
609 if(length < srcLength) {
610 minLength = length;
611 lengthResult = -1;
612 } else {
613 minLength = srcLength;
614 lengthResult = 1;
615 }
616 } else {
617 minLength = length;
618 lengthResult = 0;
619 }
620
621 /*
622 * note that uprv_memcmp() returns an int but we return an int8_t;
623 * we need to take care not to truncate the result -
624 * one way to do this is to right-shift the value to
625 * move the sign bit into the lower 8 bits and making sure that this
626 * does not become 0 itself
627 */
628
629 if(minLength > 0 && chars != srcChars) {
630 int32_t result;
631
632 # if U_IS_BIG_ENDIAN
633 // big-endian: byte comparison works
634 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
635 if(result != 0) {
636 return (int8_t)(result >> 15 | 1);
637 }
638 # else
639 // little-endian: compare UChar units
640 do {
641 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
642 if(result != 0) {
643 return (int8_t)(result >> 15 | 1);
644 }
645 } while(--minLength > 0);
646 # endif
647 }
648 return lengthResult;
649 }
650
651 /* String compare in code point order - doCompare() compares in code unit order. */
652 int8_t
653 UnicodeString::doCompareCodePointOrder(int32_t start,
654 int32_t length,
655 const UChar *srcChars,
656 int32_t srcStart,
657 int32_t srcLength) const
658 {
659 // compare illegal string values
660 // treat const UChar *srcChars==NULL as an empty string
661 if(isBogus()) {
662 return -1;
663 }
664
665 // pin indices to legal values
666 pinIndices(start, length);
667
668 if(srcChars == NULL) {
669 srcStart = srcLength = 0;
670 }
671
672 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
673 /* translate the 32-bit result into an 8-bit one */
674 if(diff!=0) {
675 return (int8_t)(diff >> 15 | 1);
676 } else {
677 return 0;
678 }
679 }
680
681 int32_t
682 UnicodeString::getLength() const {
683 return length();
684 }
685
686 UChar
687 UnicodeString::getCharAt(int32_t offset) const {
688 return charAt(offset);
689 }
690
691 UChar32
692 UnicodeString::getChar32At(int32_t offset) const {
693 return char32At(offset);
694 }
695
696 UChar32
697 UnicodeString::char32At(int32_t offset) const
698 {
699 int32_t len = length();
700 if((uint32_t)offset < (uint32_t)len) {
701 const UChar *array = getArrayStart();
702 UChar32 c;
703 U16_GET(array, 0, offset, len, c);
704 return c;
705 } else {
706 return kInvalidUChar;
707 }
708 }
709
710 int32_t
711 UnicodeString::getChar32Start(int32_t offset) const {
712 if((uint32_t)offset < (uint32_t)length()) {
713 const UChar *array = getArrayStart();
714 U16_SET_CP_START(array, 0, offset);
715 return offset;
716 } else {
717 return 0;
718 }
719 }
720
721 int32_t
722 UnicodeString::getChar32Limit(int32_t offset) const {
723 int32_t len = length();
724 if((uint32_t)offset < (uint32_t)len) {
725 const UChar *array = getArrayStart();
726 U16_SET_CP_LIMIT(array, 0, offset, len);
727 return offset;
728 } else {
729 return len;
730 }
731 }
732
733 int32_t
734 UnicodeString::countChar32(int32_t start, int32_t length) const {
735 pinIndices(start, length);
736 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
737 return u_countChar32(getArrayStart()+start, length);
738 }
739
740 UBool
741 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
742 pinIndices(start, length);
743 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
744 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
745 }
746
747 int32_t
748 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
749 // pin index
750 int32_t len = length();
751 if(index<0) {
752 index=0;
753 } else if(index>len) {
754 index=len;
755 }
756
757 const UChar *array = getArrayStart();
758 if(delta>0) {
759 U16_FWD_N(array, index, len, delta);
760 } else {
761 U16_BACK_N(array, 0, index, -delta);
762 }
763
764 return index;
765 }
766
767 void
768 UnicodeString::doExtract(int32_t start,
769 int32_t length,
770 UChar *dst,
771 int32_t dstStart) const
772 {
773 // pin indices to legal values
774 pinIndices(start, length);
775
776 // do not copy anything if we alias dst itself
777 const UChar *array = getArrayStart();
778 if(array + start != dst + dstStart) {
779 us_arrayCopy(array, start, dst, dstStart, length);
780 }
781 }
782
783 int32_t
784 UnicodeString::extract(UChar *dest, int32_t destCapacity,
785 UErrorCode &errorCode) const {
786 int32_t len = length();
787 if(U_SUCCESS(errorCode)) {
788 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
789 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
790 } else {
791 const UChar *array = getArrayStart();
792 if(len>0 && len<=destCapacity && array!=dest) {
793 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
794 }
795 return u_terminateUChars(dest, destCapacity, len, &errorCode);
796 }
797 }
798
799 return len;
800 }
801
802 int32_t
803 UnicodeString::extract(int32_t start,
804 int32_t length,
805 char *target,
806 int32_t targetCapacity,
807 enum EInvariant) const
808 {
809 // if the arguments are illegal, then do nothing
810 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
811 return 0;
812 }
813
814 // pin the indices to legal values
815 pinIndices(start, length);
816
817 if(length <= targetCapacity) {
818 u_UCharsToChars(getArrayStart() + start, target, length);
819 }
820 UErrorCode status = U_ZERO_ERROR;
821 return u_terminateChars(target, targetCapacity, length, &status);
822 }
823
824 UnicodeString
825 UnicodeString::tempSubString(int32_t start, int32_t len) const {
826 pinIndices(start, len);
827 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
828 if(array==NULL) {
829 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string
830 len=-2; // bogus result string
831 }
832 return UnicodeString(FALSE, array + start, len);
833 }
834
835 int32_t
836 UnicodeString::toUTF8(int32_t start, int32_t len,
837 char *target, int32_t capacity) const {
838 pinIndices(start, len);
839 int32_t length8;
840 UErrorCode errorCode = U_ZERO_ERROR;
841 u_strToUTF8WithSub(target, capacity, &length8,
842 getBuffer() + start, len,
843 0xFFFD, // Standard substitution character.
844 NULL, // Don't care about number of substitutions.
845 &errorCode);
846 return length8;
847 }
848
849 #if U_CHARSET_IS_UTF8
850
851 int32_t
852 UnicodeString::extract(int32_t start, int32_t len,
853 char *target, uint32_t dstSize) const {
854 // if the arguments are illegal, then do nothing
855 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
856 return 0;
857 }
858 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
859 }
860
861 // else see unistr_cnv.cpp
862 #endif
863
864 void
865 UnicodeString::extractBetween(int32_t start,
866 int32_t limit,
867 UnicodeString& target) const {
868 pinIndex(start);
869 pinIndex(limit);
870 doExtract(start, limit - start, target);
871 }
872
873 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
874 // as many bytes as the source has UChars.
875 // The "worst cases" are writing systems like Indic, Thai and CJK with
876 // 3:1 bytes:UChars.
877 void
878 UnicodeString::toUTF8(ByteSink &sink) const {
879 int32_t length16 = length();
880 if(length16 != 0) {
881 char stackBuffer[1024];
882 int32_t capacity = (int32_t)sizeof(stackBuffer);
883 UBool utf8IsOwned = FALSE;
884 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
885 3*length16,
886 stackBuffer, capacity,
887 &capacity);
888 int32_t length8 = 0;
889 UErrorCode errorCode = U_ZERO_ERROR;
890 u_strToUTF8WithSub(utf8, capacity, &length8,
891 getBuffer(), length16,
892 0xFFFD, // Standard substitution character.
893 NULL, // Don't care about number of substitutions.
894 &errorCode);
895 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
896 utf8 = (char *)uprv_malloc(length8);
897 if(utf8 != NULL) {
898 utf8IsOwned = TRUE;
899 errorCode = U_ZERO_ERROR;
900 u_strToUTF8WithSub(utf8, length8, &length8,
901 getBuffer(), length16,
902 0xFFFD, // Standard substitution character.
903 NULL, // Don't care about number of substitutions.
904 &errorCode);
905 } else {
906 errorCode = U_MEMORY_ALLOCATION_ERROR;
907 }
908 }
909 if(U_SUCCESS(errorCode)) {
910 sink.Append(utf8, length8);
911 sink.Flush();
912 }
913 if(utf8IsOwned) {
914 uprv_free(utf8);
915 }
916 }
917 }
918
919 int32_t
920 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
921 int32_t length32=0;
922 if(U_SUCCESS(errorCode)) {
923 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
924 u_strToUTF32WithSub(utf32, capacity, &length32,
925 getBuffer(), length(),
926 0xfffd, // Substitution character.
927 NULL, // Don't care about number of substitutions.
928 &errorCode);
929 }
930 return length32;
931 }
932
933 int32_t
934 UnicodeString::indexOf(const UChar *srcChars,
935 int32_t srcStart,
936 int32_t srcLength,
937 int32_t start,
938 int32_t length) const
939 {
940 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
941 return -1;
942 }
943
944 // UnicodeString does not find empty substrings
945 if(srcLength < 0 && srcChars[srcStart] == 0) {
946 return -1;
947 }
948
949 // get the indices within bounds
950 pinIndices(start, length);
951
952 // find the first occurrence of the substring
953 const UChar *array = getArrayStart();
954 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
955 if(match == NULL) {
956 return -1;
957 } else {
958 return (int32_t)(match - array);
959 }
960 }
961
962 int32_t
963 UnicodeString::doIndexOf(UChar c,
964 int32_t start,
965 int32_t length) const
966 {
967 // pin indices
968 pinIndices(start, length);
969
970 // find the first occurrence of c
971 const UChar *array = getArrayStart();
972 const UChar *match = u_memchr(array + start, c, length);
973 if(match == NULL) {
974 return -1;
975 } else {
976 return (int32_t)(match - array);
977 }
978 }
979
980 int32_t
981 UnicodeString::doIndexOf(UChar32 c,
982 int32_t start,
983 int32_t length) const {
984 // pin indices
985 pinIndices(start, length);
986
987 // find the first occurrence of c
988 const UChar *array = getArrayStart();
989 const UChar *match = u_memchr32(array + start, c, length);
990 if(match == NULL) {
991 return -1;
992 } else {
993 return (int32_t)(match - array);
994 }
995 }
996
997 int32_t
998 UnicodeString::lastIndexOf(const UChar *srcChars,
999 int32_t srcStart,
1000 int32_t srcLength,
1001 int32_t start,
1002 int32_t length) const
1003 {
1004 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1005 return -1;
1006 }
1007
1008 // UnicodeString does not find empty substrings
1009 if(srcLength < 0 && srcChars[srcStart] == 0) {
1010 return -1;
1011 }
1012
1013 // get the indices within bounds
1014 pinIndices(start, length);
1015
1016 // find the last occurrence of the substring
1017 const UChar *array = getArrayStart();
1018 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1019 if(match == NULL) {
1020 return -1;
1021 } else {
1022 return (int32_t)(match - array);
1023 }
1024 }
1025
1026 int32_t
1027 UnicodeString::doLastIndexOf(UChar c,
1028 int32_t start,
1029 int32_t length) const
1030 {
1031 if(isBogus()) {
1032 return -1;
1033 }
1034
1035 // pin indices
1036 pinIndices(start, length);
1037
1038 // find the last occurrence of c
1039 const UChar *array = getArrayStart();
1040 const UChar *match = u_memrchr(array + start, c, length);
1041 if(match == NULL) {
1042 return -1;
1043 } else {
1044 return (int32_t)(match - array);
1045 }
1046 }
1047
1048 int32_t
1049 UnicodeString::doLastIndexOf(UChar32 c,
1050 int32_t start,
1051 int32_t length) const {
1052 // pin indices
1053 pinIndices(start, length);
1054
1055 // find the last occurrence of c
1056 const UChar *array = getArrayStart();
1057 const UChar *match = u_memrchr32(array + start, c, length);
1058 if(match == NULL) {
1059 return -1;
1060 } else {
1061 return (int32_t)(match - array);
1062 }
1063 }
1064
1065 //========================================
1066 // Write implementation
1067 //========================================
1068
1069 UnicodeString&
1070 UnicodeString::findAndReplace(int32_t start,
1071 int32_t length,
1072 const UnicodeString& oldText,
1073 int32_t oldStart,
1074 int32_t oldLength,
1075 const UnicodeString& newText,
1076 int32_t newStart,
1077 int32_t newLength)
1078 {
1079 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1080 return *this;
1081 }
1082
1083 pinIndices(start, length);
1084 oldText.pinIndices(oldStart, oldLength);
1085 newText.pinIndices(newStart, newLength);
1086
1087 if(oldLength == 0) {
1088 return *this;
1089 }
1090
1091 while(length > 0 && length >= oldLength) {
1092 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1093 if(pos < 0) {
1094 // no more oldText's here: done
1095 break;
1096 } else {
1097 // we found oldText, replace it by newText and go beyond it
1098 replace(pos, oldLength, newText, newStart, newLength);
1099 length -= pos + oldLength - start;
1100 start = pos + newLength;
1101 }
1102 }
1103
1104 return *this;
1105 }
1106
1107
1108 void
1109 UnicodeString::setToBogus()
1110 {
1111 releaseArray();
1112
1113 fShortLength = 0;
1114 fUnion.fFields.fArray = 0;
1115 fUnion.fFields.fCapacity = 0;
1116 fFlags = kIsBogus;
1117 }
1118
1119 // turn a bogus string into an empty one
1120 void
1121 UnicodeString::unBogus() {
1122 if(fFlags & kIsBogus) {
1123 setToEmpty();
1124 }
1125 }
1126
1127 // setTo() analogous to the readonly-aliasing constructor with the same signature
1128 UnicodeString &
1129 UnicodeString::setTo(UBool isTerminated,
1130 const UChar *text,
1131 int32_t textLength)
1132 {
1133 if(fFlags & kOpenGetBuffer) {
1134 // do not modify a string that has an "open" getBuffer(minCapacity)
1135 return *this;
1136 }
1137
1138 if(text == NULL) {
1139 // treat as an empty string, do not alias
1140 releaseArray();
1141 setToEmpty();
1142 return *this;
1143 }
1144
1145 if( textLength < -1 ||
1146 (textLength == -1 && !isTerminated) ||
1147 (textLength >= 0 && isTerminated && text[textLength] != 0)
1148 ) {
1149 setToBogus();
1150 return *this;
1151 }
1152
1153 releaseArray();
1154
1155 if(textLength == -1) {
1156 // text is terminated, or else it would have failed the above test
1157 textLength = u_strlen(text);
1158 }
1159 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1160
1161 fFlags = kReadonlyAlias;
1162 return *this;
1163 }
1164
1165 // setTo() analogous to the writable-aliasing constructor with the same signature
1166 UnicodeString &
1167 UnicodeString::setTo(UChar *buffer,
1168 int32_t buffLength,
1169 int32_t buffCapacity) {
1170 if(fFlags & kOpenGetBuffer) {
1171 // do not modify a string that has an "open" getBuffer(minCapacity)
1172 return *this;
1173 }
1174
1175 if(buffer == NULL) {
1176 // treat as an empty string, do not alias
1177 releaseArray();
1178 setToEmpty();
1179 return *this;
1180 }
1181
1182 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1183 setToBogus();
1184 return *this;
1185 } else if(buffLength == -1) {
1186 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1187 const UChar *p = buffer, *limit = buffer + buffCapacity;
1188 while(p != limit && *p != 0) {
1189 ++p;
1190 }
1191 buffLength = (int32_t)(p - buffer);
1192 }
1193
1194 releaseArray();
1195
1196 setArray(buffer, buffLength, buffCapacity);
1197 fFlags = kWritableAlias;
1198 return *this;
1199 }
1200
1201 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1202 unBogus();
1203 int32_t length = utf8.length();
1204 int32_t capacity;
1205 // The UTF-16 string will be at most as long as the UTF-8 string.
1206 if(length <= US_STACKBUF_SIZE) {
1207 capacity = US_STACKBUF_SIZE;
1208 } else {
1209 capacity = length + 1; // +1 for the terminating NUL.
1210 }
1211 UChar *utf16 = getBuffer(capacity);
1212 int32_t length16;
1213 UErrorCode errorCode = U_ZERO_ERROR;
1214 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1215 utf8.data(), length,
1216 0xfffd, // Substitution character.
1217 NULL, // Don't care about number of substitutions.
1218 &errorCode);
1219 releaseBuffer(length16);
1220 if(U_FAILURE(errorCode)) {
1221 setToBogus();
1222 }
1223 return *this;
1224 }
1225
1226 UnicodeString&
1227 UnicodeString::setCharAt(int32_t offset,
1228 UChar c)
1229 {
1230 int32_t len = length();
1231 if(cloneArrayIfNeeded() && len > 0) {
1232 if(offset < 0) {
1233 offset = 0;
1234 } else if(offset >= len) {
1235 offset = len - 1;
1236 }
1237
1238 getArrayStart()[offset] = c;
1239 }
1240 return *this;
1241 }
1242
1243 UnicodeString&
1244 UnicodeString::replace(int32_t start,
1245 int32_t _length,
1246 UChar32 srcChar) {
1247 UChar buffer[U16_MAX_LENGTH];
1248 int32_t count = 0;
1249 UBool isError = FALSE;
1250 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1251 // We test isError so that the compiler does not complain that we don't.
1252 // If isError then count==0 which turns the doReplace() into a no-op anyway.
1253 return isError ? *this : doReplace(start, _length, buffer, 0, count);
1254 }
1255
1256 UnicodeString&
1257 UnicodeString::append(UChar32 srcChar) {
1258 UChar buffer[U16_MAX_LENGTH];
1259 int32_t _length = 0;
1260 UBool isError = FALSE;
1261 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1262 // We test isError so that the compiler does not complain that we don't.
1263 // If isError then _length==0 which turns the doReplace() into a no-op anyway.
1264 return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
1265 }
1266
1267 UnicodeString&
1268 UnicodeString::doReplace( int32_t start,
1269 int32_t length,
1270 const UnicodeString& src,
1271 int32_t srcStart,
1272 int32_t srcLength)
1273 {
1274 if(!src.isBogus()) {
1275 // pin the indices to legal values
1276 src.pinIndices(srcStart, srcLength);
1277
1278 // get the characters from src
1279 // and replace the range in ourselves with them
1280 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1281 } else {
1282 // remove the range
1283 return doReplace(start, length, 0, 0, 0);
1284 }
1285 }
1286
1287 UnicodeString&
1288 UnicodeString::doReplace(int32_t start,
1289 int32_t length,
1290 const UChar *srcChars,
1291 int32_t srcStart,
1292 int32_t srcLength)
1293 {
1294 if(!isWritable()) {
1295 return *this;
1296 }
1297
1298 int32_t oldLength = this->length();
1299
1300 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1301 if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1302 if(start == 0) {
1303 // remove prefix by adjusting the array pointer
1304 pinIndex(length);
1305 fUnion.fFields.fArray += length;
1306 fUnion.fFields.fCapacity -= length;
1307 setLength(oldLength - length);
1308 return *this;
1309 } else {
1310 pinIndex(start);
1311 if(length >= (oldLength - start)) {
1312 // remove suffix by reducing the length (like truncate())
1313 setLength(start);
1314 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1315 return *this;
1316 }
1317 }
1318 }
1319
1320 if(srcChars == 0) {
1321 srcStart = srcLength = 0;
1322 } else if(srcLength < 0) {
1323 // get the srcLength if necessary
1324 srcLength = u_strlen(srcChars + srcStart);
1325 }
1326
1327 // calculate the size of the string after the replace
1328 int32_t newLength;
1329
1330 // optimize append() onto a large-enough, owned string
1331 if(start >= oldLength) {
1332 if(srcLength == 0) {
1333 return *this;
1334 }
1335 newLength = oldLength + srcLength;
1336 if(newLength <= getCapacity() && isBufferWritable()) {
1337 UChar *oldArray = getArrayStart();
1338 // Do not copy characters when
1339 // UChar *buffer=str.getAppendBuffer(...);
1340 // is followed by
1341 // str.append(buffer, length);
1342 // or
1343 // str.appendString(buffer, length)
1344 // or similar.
1345 if(srcChars + srcStart != oldArray + start || start > oldLength) {
1346 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
1347 }
1348 setLength(newLength);
1349 return *this;
1350 } else {
1351 // pin the indices to legal values
1352 start = oldLength;
1353 length = 0;
1354 }
1355 } else {
1356 // pin the indices to legal values
1357 pinIndices(start, length);
1358
1359 newLength = oldLength - length + srcLength;
1360 }
1361
1362 // the following may change fArray but will not copy the current contents;
1363 // therefore we need to keep the current fArray
1364 UChar oldStackBuffer[US_STACKBUF_SIZE];
1365 UChar *oldArray;
1366 if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1367 // copy the stack buffer contents because it will be overwritten with
1368 // fUnion.fFields values
1369 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1370 oldArray = oldStackBuffer;
1371 } else {
1372 oldArray = getArrayStart();
1373 }
1374
1375 // clone our array and allocate a bigger array if needed
1376 int32_t *bufferToDelete = 0;
1377 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
1378 FALSE, &bufferToDelete)
1379 ) {
1380 return *this;
1381 }
1382
1383 // now do the replace
1384
1385 UChar *newArray = getArrayStart();
1386 if(newArray != oldArray) {
1387 // if fArray changed, then we need to copy everything except what will change
1388 us_arrayCopy(oldArray, 0, newArray, 0, start);
1389 us_arrayCopy(oldArray, start + length,
1390 newArray, start + srcLength,
1391 oldLength - (start + length));
1392 } else if(length != srcLength) {
1393 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1394 us_arrayCopy(oldArray, start + length,
1395 newArray, start + srcLength,
1396 oldLength - (start + length));
1397 }
1398
1399 // now fill in the hole with the new string
1400 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1401
1402 setLength(newLength);
1403
1404 // delayed delete in case srcChars == fArray when we started, and
1405 // to keep oldArray alive for the above operations
1406 if (bufferToDelete) {
1407 uprv_free(bufferToDelete);
1408 }
1409
1410 return *this;
1411 }
1412
1413 /**
1414 * Replaceable API
1415 */
1416 void
1417 UnicodeString::handleReplaceBetween(int32_t start,
1418 int32_t limit,
1419 const UnicodeString& text) {
1420 replaceBetween(start, limit, text);
1421 }
1422
1423 /**
1424 * Replaceable API
1425 */
1426 void
1427 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1428 if (limit <= start) {
1429 return; // Nothing to do; avoid bogus malloc call
1430 }
1431 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1432 // Check to make sure text is not null.
1433 if (text != NULL) {
1434 extractBetween(start, limit, text, 0);
1435 insert(dest, text, 0, limit - start);
1436 uprv_free(text);
1437 }
1438 }
1439
1440 /**
1441 * Replaceable API
1442 *
1443 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1444 * so we implement this function here.
1445 */
1446 UBool Replaceable::hasMetaData() const {
1447 return TRUE;
1448 }
1449
1450 /**
1451 * Replaceable API
1452 */
1453 UBool UnicodeString::hasMetaData() const {
1454 return FALSE;
1455 }
1456
1457 UnicodeString&
1458 UnicodeString::doReverse(int32_t start, int32_t length) {
1459 if(length <= 1 || !cloneArrayIfNeeded()) {
1460 return *this;
1461 }
1462
1463 // pin the indices to legal values
1464 pinIndices(start, length);
1465 if(length <= 1) { // pinIndices() might have shrunk the length
1466 return *this;
1467 }
1468
1469 UChar *left = getArrayStart() + start;
1470 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
1471 UChar swap;
1472 UBool hasSupplementary = FALSE;
1473
1474 // Before the loop we know left<right because length>=2.
1475 do {
1476 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1477 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1478 *right-- = swap;
1479 } while(left < right);
1480 // Make sure to test the middle code unit of an odd-length string.
1481 // Redundant if the length is even.
1482 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1483
1484 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1485 if(hasSupplementary) {
1486 UChar swap2;
1487
1488 left = getArrayStart() + start;
1489 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1490 while(left < right) {
1491 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1492 *left++ = swap2;
1493 *left++ = swap;
1494 } else {
1495 ++left;
1496 }
1497 }
1498 }
1499
1500 return *this;
1501 }
1502
1503 UBool
1504 UnicodeString::padLeading(int32_t targetLength,
1505 UChar padChar)
1506 {
1507 int32_t oldLength = length();
1508 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1509 return FALSE;
1510 } else {
1511 // move contents up by padding width
1512 UChar *array = getArrayStart();
1513 int32_t start = targetLength - oldLength;
1514 us_arrayCopy(array, 0, array, start, oldLength);
1515
1516 // fill in padding character
1517 while(--start >= 0) {
1518 array[start] = padChar;
1519 }
1520 setLength(targetLength);
1521 return TRUE;
1522 }
1523 }
1524
1525 UBool
1526 UnicodeString::padTrailing(int32_t targetLength,
1527 UChar padChar)
1528 {
1529 int32_t oldLength = length();
1530 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1531 return FALSE;
1532 } else {
1533 // fill in padding character
1534 UChar *array = getArrayStart();
1535 int32_t length = targetLength;
1536 while(--length >= oldLength) {
1537 array[length] = padChar;
1538 }
1539 setLength(targetLength);
1540 return TRUE;
1541 }
1542 }
1543
1544 //========================================
1545 // Hashing
1546 //========================================
1547 int32_t
1548 UnicodeString::doHashCode() const
1549 {
1550 /* Delegate hash computation to uhash. This makes UnicodeString
1551 * hashing consistent with UChar* hashing. */
1552 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1553 if (hashCode == kInvalidHashCode) {
1554 hashCode = kEmptyHashCode;
1555 }
1556 return hashCode;
1557 }
1558
1559 //========================================
1560 // External Buffer
1561 //========================================
1562
1563 UChar *
1564 UnicodeString::getBuffer(int32_t minCapacity) {
1565 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1566 fFlags|=kOpenGetBuffer;
1567 fShortLength=0;
1568 return getArrayStart();
1569 } else {
1570 return 0;
1571 }
1572 }
1573
1574 void
1575 UnicodeString::releaseBuffer(int32_t newLength) {
1576 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1577 // set the new fLength
1578 int32_t capacity=getCapacity();
1579 if(newLength==-1) {
1580 // the new length is the string length, capped by fCapacity
1581 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1582 while(p<limit && *p!=0) {
1583 ++p;
1584 }
1585 newLength=(int32_t)(p-array);
1586 } else if(newLength>capacity) {
1587 newLength=capacity;
1588 }
1589 setLength(newLength);
1590 fFlags&=~kOpenGetBuffer;
1591 }
1592 }
1593
1594 //========================================
1595 // Miscellaneous
1596 //========================================
1597 UBool
1598 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1599 int32_t growCapacity,
1600 UBool doCopyArray,
1601 int32_t **pBufferToDelete,
1602 UBool forceClone) {
1603 // default parameters need to be static, therefore
1604 // the defaults are -1 to have convenience defaults
1605 if(newCapacity == -1) {
1606 newCapacity = getCapacity();
1607 }
1608
1609 // while a getBuffer(minCapacity) is "open",
1610 // prevent any modifications of the string by returning FALSE here
1611 // if the string is bogus, then only an assignment or similar can revive it
1612 if(!isWritable()) {
1613 return FALSE;
1614 }
1615
1616 /*
1617 * We need to make a copy of the array if
1618 * the buffer is read-only, or
1619 * the buffer is refCounted (shared), and refCount>1, or
1620 * the buffer is too small.
1621 * Return FALSE if memory could not be allocated.
1622 */
1623 if(forceClone ||
1624 fFlags & kBufferIsReadonly ||
1625 (fFlags & kRefCounted && refCount() > 1) ||
1626 newCapacity > getCapacity()
1627 ) {
1628 // check growCapacity for default value and use of the stack buffer
1629 if(growCapacity < 0) {
1630 growCapacity = newCapacity;
1631 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1632 growCapacity = US_STACKBUF_SIZE;
1633 }
1634
1635 // save old values
1636 UChar oldStackBuffer[US_STACKBUF_SIZE];
1637 UChar *oldArray;
1638 uint8_t flags = fFlags;
1639
1640 if(flags&kUsingStackBuffer) {
1641 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1642 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1643 // copy the stack buffer contents because it will be overwritten with
1644 // fUnion.fFields values
1645 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1646 oldArray = oldStackBuffer;
1647 } else {
1648 oldArray = 0; // no need to copy from stack buffer to itself
1649 }
1650 } else {
1651 oldArray = fUnion.fFields.fArray;
1652 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1653 }
1654
1655 // allocate a new array
1656 if(allocate(growCapacity) ||
1657 (newCapacity < growCapacity && allocate(newCapacity))
1658 ) {
1659 if(doCopyArray && oldArray != 0) {
1660 // copy the contents
1661 // do not copy more than what fits - it may be smaller than before
1662 int32_t minLength = length();
1663 newCapacity = getCapacity();
1664 if(newCapacity < minLength) {
1665 minLength = newCapacity;
1666 setLength(minLength);
1667 }
1668 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1669 } else {
1670 fShortLength = 0;
1671 }
1672
1673 // release the old array
1674 if(flags & kRefCounted) {
1675 // the array is refCounted; decrement and release if 0
1676 int32_t *pRefCount = ((int32_t *)oldArray - 1);
1677 if(umtx_atomic_dec(pRefCount) == 0) {
1678 if(pBufferToDelete == 0) {
1679 uprv_free(pRefCount);
1680 } else {
1681 // the caller requested to delete it himself
1682 *pBufferToDelete = pRefCount;
1683 }
1684 }
1685 }
1686 } else {
1687 // not enough memory for growCapacity and not even for the smaller newCapacity
1688 // reset the old values for setToBogus() to release the array
1689 if(!(flags&kUsingStackBuffer)) {
1690 fUnion.fFields.fArray = oldArray;
1691 }
1692 fFlags = flags;
1693 setToBogus();
1694 return FALSE;
1695 }
1696 }
1697 return TRUE;
1698 }
1699
1700 // UnicodeStringAppendable ------------------------------------------------- ***
1701
1702 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1703
1704 UBool
1705 UnicodeStringAppendable::appendCodeUnit(UChar c) {
1706 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
1707 }
1708
1709 UBool
1710 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1711 UChar buffer[U16_MAX_LENGTH];
1712 int32_t cLength = 0;
1713 UBool isError = FALSE;
1714 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1715 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
1716 }
1717
1718 UBool
1719 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1720 return str.doReplace(str.length(), 0, s, 0, length).isWritable();
1721 }
1722
1723 UBool
1724 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1725 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1726 }
1727
1728 UChar *
1729 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1730 int32_t desiredCapacityHint,
1731 UChar *scratch, int32_t scratchCapacity,
1732 int32_t *resultCapacity) {
1733 if(minCapacity < 1 || scratchCapacity < minCapacity) {
1734 *resultCapacity = 0;
1735 return NULL;
1736 }
1737 int32_t oldLength = str.length();
1738 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1739 *resultCapacity = str.getCapacity() - oldLength;
1740 return str.getArrayStart() + oldLength;
1741 }
1742 *resultCapacity = scratchCapacity;
1743 return scratch;
1744 }
1745
1746 U_NAMESPACE_END
1747
1748 U_NAMESPACE_USE
1749
1750 U_CAPI int32_t U_EXPORT2
1751 uhash_hashUnicodeString(const UElement key) {
1752 const UnicodeString *str = (const UnicodeString*) key.pointer;
1753 return (str == NULL) ? 0 : str->hashCode();
1754 }
1755
1756 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1757 // does not depend on hashtable code.
1758 U_CAPI UBool U_EXPORT2
1759 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1760 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1761 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1762 if (str1 == str2) {
1763 return TRUE;
1764 }
1765 if (str1 == NULL || str2 == NULL) {
1766 return FALSE;
1767 }
1768 return *str1 == *str2;
1769 }
1770
1771 #ifdef U_STATIC_IMPLEMENTATION
1772 /*
1773 This should never be called. It is defined here to make sure that the
1774 virtual vector deleting destructor is defined within unistr.cpp.
1775 The vector deleting destructor is already a part of UObject,
1776 but defining it here makes sure that it is included with this object file.
1777 This makes sure that static library dependencies are kept to a minimum.
1778 */
1779 static void uprv_UnicodeStringDummy(void) {
1780 delete [] (new UnicodeString[2]);
1781 }
1782 #endif