]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr.cpp
ICU-511.32.tar.gz
[apple/icu.git] / icuSources / common / unistr.cpp
1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16 * Replaceable.
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/appendable.h"
23 #include "unicode/putil.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/utf.h"
29 #include "unicode/utf16.h"
30 #include "uelement.h"
31 #include "ustr_imp.h"
32 #include "umutex.h"
33 #include "uassert.h"
34
35 #if 0
36
37 #include <iostream>
38 using namespace std;
39
40 //DEBUGGING
41 void
42 print(const UnicodeString& s,
43 const char *name)
44 {
45 UChar c;
46 cout << name << ":|";
47 for(int i = 0; i < s.length(); ++i) {
48 c = s[i];
49 if(c>= 0x007E || c < 0x0020)
50 cout << "[0x" << hex << s[i] << "]";
51 else
52 cout << (char) s[i];
53 }
54 cout << '|' << endl;
55 }
56
57 void
58 print(const UChar *s,
59 int32_t len,
60 const char *name)
61 {
62 UChar c;
63 cout << name << ":|";
64 for(int i = 0; i < len; ++i) {
65 c = s[i];
66 if(c>= 0x007E || c < 0x0020)
67 cout << "[0x" << hex << s[i] << "]";
68 else
69 cout << (char) s[i];
70 }
71 cout << '|' << endl;
72 }
73 // END DEBUGGING
74 #endif
75
76 // Local function definitions for now
77
78 // need to copy areas that may overlap
79 static
80 inline void
81 us_arrayCopy(const UChar *src, int32_t srcStart,
82 UChar *dst, int32_t dstStart, int32_t count)
83 {
84 if(count>0) {
85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86 }
87 }
88
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
90 U_CDECL_BEGIN
91 static UChar U_CALLCONV
92 UnicodeString_charAt(int32_t offset, void *context) {
93 return ((icu::UnicodeString*) context)->charAt(offset);
94 }
95 U_CDECL_END
96
97 U_NAMESPACE_BEGIN
98
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
101 */
102 Replaceable::~Replaceable() {}
103
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
105
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108 return
109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110 append(s1).
111 append(s2);
112 }
113
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
118
119 void
120 UnicodeString::addRef()
121 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
122
123 int32_t
124 UnicodeString::removeRef()
125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
126
127 int32_t
128 UnicodeString::refCount() const
129 {
130 umtx_lock(NULL);
131 // Note: without the lock to force a memory barrier, we might see a very
132 // stale value on some multi-processor systems.
133 int32_t count = *((int32_t *)fUnion.fFields.fArray - 1);
134 umtx_unlock(NULL);
135 return count;
136 }
137
138 void
139 UnicodeString::releaseArray() {
140 if((fFlags & kRefCounted) && removeRef() == 0) {
141 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
142 }
143 }
144
145
146
147 //========================================
148 // Constructors
149 //========================================
150
151 // The default constructor is inline in unistr.h.
152
153 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
154 : fShortLength(0),
155 fFlags(0)
156 {
157 if(count <= 0 || (uint32_t)c > 0x10ffff) {
158 // just allocate and do not do anything else
159 allocate(capacity);
160 } else {
161 // count > 0, allocate and fill the new string with count c's
162 int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
163 if(capacity < length) {
164 capacity = length;
165 }
166 if(allocate(capacity)) {
167 UChar *array = getArrayStart();
168 int32_t i = 0;
169
170 // fill the new string with c
171 if(unitCount == 1) {
172 // fill with length UChars
173 while(i < length) {
174 array[i++] = (UChar)c;
175 }
176 } else {
177 // get the code units for c
178 UChar units[U16_MAX_LENGTH];
179 U16_APPEND_UNSAFE(units, i, c);
180
181 // now it must be i==unitCount
182 i = 0;
183
184 // for Unicode, unitCount can only be 1, 2, 3, or 4
185 // 1 is handled above
186 while(i < length) {
187 int32_t unitIdx = 0;
188 while(unitIdx < unitCount) {
189 array[i++]=units[unitIdx++];
190 }
191 }
192 }
193 }
194 setLength(length);
195 }
196 }
197
198 UnicodeString::UnicodeString(UChar ch)
199 : fShortLength(1),
200 fFlags(kShortString)
201 {
202 fUnion.fStackBuffer[0] = ch;
203 }
204
205 UnicodeString::UnicodeString(UChar32 ch)
206 : fShortLength(0),
207 fFlags(kShortString)
208 {
209 int32_t i = 0;
210 UBool isError = FALSE;
211 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
212 // We test isError so that the compiler does not complain that we don't.
213 // If isError then i==0 which is what we want anyway.
214 if(!isError) {
215 fShortLength = (int8_t)i;
216 }
217 }
218
219 UnicodeString::UnicodeString(const UChar *text)
220 : fShortLength(0),
221 fFlags(kShortString)
222 {
223 doReplace(0, 0, text, 0, -1);
224 }
225
226 UnicodeString::UnicodeString(const UChar *text,
227 int32_t textLength)
228 : fShortLength(0),
229 fFlags(kShortString)
230 {
231 doReplace(0, 0, text, 0, textLength);
232 }
233
234 UnicodeString::UnicodeString(UBool isTerminated,
235 const UChar *text,
236 int32_t textLength)
237 : fShortLength(0),
238 fFlags(kReadonlyAlias)
239 {
240 if(text == NULL) {
241 // treat as an empty string, do not alias
242 setToEmpty();
243 } else if(textLength < -1 ||
244 (textLength == -1 && !isTerminated) ||
245 (textLength >= 0 && isTerminated && text[textLength] != 0)
246 ) {
247 setToBogus();
248 } else {
249 if(textLength == -1) {
250 // text is terminated, or else it would have failed the above test
251 textLength = u_strlen(text);
252 }
253 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
254 }
255 }
256
257 UnicodeString::UnicodeString(UChar *buff,
258 int32_t buffLength,
259 int32_t buffCapacity)
260 : fShortLength(0),
261 fFlags(kWritableAlias)
262 {
263 if(buff == NULL) {
264 // treat as an empty string, do not alias
265 setToEmpty();
266 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
267 setToBogus();
268 } else {
269 if(buffLength == -1) {
270 // fLength = u_strlen(buff); but do not look beyond buffCapacity
271 const UChar *p = buff, *limit = buff + buffCapacity;
272 while(p != limit && *p != 0) {
273 ++p;
274 }
275 buffLength = (int32_t)(p - buff);
276 }
277 setArray(buff, buffLength, buffCapacity);
278 }
279 }
280
281 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
282 : fShortLength(0),
283 fFlags(kShortString)
284 {
285 if(src==NULL) {
286 // treat as an empty string
287 } else {
288 if(length<0) {
289 length=(int32_t)uprv_strlen(src);
290 }
291 if(cloneArrayIfNeeded(length, length, FALSE)) {
292 u_charsToUChars(src, getArrayStart(), length);
293 setLength(length);
294 } else {
295 setToBogus();
296 }
297 }
298 }
299
300 #if U_CHARSET_IS_UTF8
301
302 UnicodeString::UnicodeString(const char *codepageData)
303 : fShortLength(0),
304 fFlags(kShortString) {
305 if(codepageData != 0) {
306 setToUTF8(codepageData);
307 }
308 }
309
310 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
311 : fShortLength(0),
312 fFlags(kShortString) {
313 // if there's nothing to convert, do nothing
314 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
315 return;
316 }
317 if(dataLength == -1) {
318 dataLength = (int32_t)uprv_strlen(codepageData);
319 }
320 setToUTF8(StringPiece(codepageData, dataLength));
321 }
322
323 // else see unistr_cnv.cpp
324 #endif
325
326 UnicodeString::UnicodeString(const UnicodeString& that)
327 : Replaceable(),
328 fShortLength(0),
329 fFlags(kShortString)
330 {
331 copyFrom(that);
332 }
333
334 UnicodeString::UnicodeString(const UnicodeString& that,
335 int32_t srcStart)
336 : Replaceable(),
337 fShortLength(0),
338 fFlags(kShortString)
339 {
340 setTo(that, srcStart);
341 }
342
343 UnicodeString::UnicodeString(const UnicodeString& that,
344 int32_t srcStart,
345 int32_t srcLength)
346 : Replaceable(),
347 fShortLength(0),
348 fFlags(kShortString)
349 {
350 setTo(that, srcStart, srcLength);
351 }
352
353 // Replaceable base class clone() default implementation, does not clone
354 Replaceable *
355 Replaceable::clone() const {
356 return NULL;
357 }
358
359 // UnicodeString overrides clone() with a real implementation
360 Replaceable *
361 UnicodeString::clone() const {
362 return new UnicodeString(*this);
363 }
364
365 //========================================
366 // array allocation
367 //========================================
368
369 UBool
370 UnicodeString::allocate(int32_t capacity) {
371 if(capacity <= US_STACKBUF_SIZE) {
372 fFlags = kShortString;
373 } else {
374 // count bytes for the refCounter and the string capacity, and
375 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
376 // to be safely aligned for the refCount
377 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
378 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
379 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
380 if(array != 0) {
381 // set initial refCount and point behind the refCount
382 *array++ = 1;
383
384 // have fArray point to the first UChar
385 fUnion.fFields.fArray = (UChar *)array;
386 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
387 fFlags = kLongString;
388 } else {
389 fShortLength = 0;
390 fUnion.fFields.fArray = 0;
391 fUnion.fFields.fCapacity = 0;
392 fFlags = kIsBogus;
393 return FALSE;
394 }
395 }
396 return TRUE;
397 }
398
399 //========================================
400 // Destructor
401 //========================================
402 UnicodeString::~UnicodeString()
403 {
404 releaseArray();
405 }
406
407 //========================================
408 // Factory methods
409 //========================================
410
411 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
412 UnicodeString result;
413 result.setToUTF8(utf8);
414 return result;
415 }
416
417 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
418 UnicodeString result;
419 int32_t capacity;
420 // Most UTF-32 strings will be BMP-only and result in a same-length
421 // UTF-16 string. We overestimate the capacity just slightly,
422 // just in case there are a few supplementary characters.
423 if(length <= US_STACKBUF_SIZE) {
424 capacity = US_STACKBUF_SIZE;
425 } else {
426 capacity = length + (length >> 4) + 4;
427 }
428 do {
429 UChar *utf16 = result.getBuffer(capacity);
430 int32_t length16;
431 UErrorCode errorCode = U_ZERO_ERROR;
432 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
433 utf32, length,
434 0xfffd, // Substitution character.
435 NULL, // Don't care about number of substitutions.
436 &errorCode);
437 result.releaseBuffer(length16);
438 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
439 capacity = length16 + 1; // +1 for the terminating NUL.
440 continue;
441 } else if(U_FAILURE(errorCode)) {
442 result.setToBogus();
443 }
444 break;
445 } while(TRUE);
446 return result;
447 }
448
449 //========================================
450 // Assignment
451 //========================================
452
453 UnicodeString &
454 UnicodeString::operator=(const UnicodeString &src) {
455 return copyFrom(src);
456 }
457
458 UnicodeString &
459 UnicodeString::fastCopyFrom(const UnicodeString &src) {
460 return copyFrom(src, TRUE);
461 }
462
463 UnicodeString &
464 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
465 // if assigning to ourselves, do nothing
466 if(this == 0 || this == &src) {
467 return *this;
468 }
469
470 // is the right side bogus?
471 if(&src == 0 || src.isBogus()) {
472 setToBogus();
473 return *this;
474 }
475
476 // delete the current contents
477 releaseArray();
478
479 if(src.isEmpty()) {
480 // empty string - use the stack buffer
481 setToEmpty();
482 return *this;
483 }
484
485 // we always copy the length
486 int32_t srcLength = src.length();
487 setLength(srcLength);
488
489 // fLength>0 and not an "open" src.getBuffer(minCapacity)
490 switch(src.fFlags) {
491 case kShortString:
492 // short string using the stack buffer, do the same
493 fFlags = kShortString;
494 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
495 break;
496 case kLongString:
497 // src uses a refCounted string buffer, use that buffer with refCount
498 // src is const, use a cast - we don't really change it
499 ((UnicodeString &)src).addRef();
500 // copy all fields, share the reference-counted buffer
501 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
502 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
503 fFlags = src.fFlags;
504 break;
505 case kReadonlyAlias:
506 if(fastCopy) {
507 // src is a readonly alias, do the same
508 // -> maintain the readonly alias as such
509 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
510 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
511 fFlags = src.fFlags;
512 break;
513 }
514 // else if(!fastCopy) fall through to case kWritableAlias
515 // -> allocate a new buffer and copy the contents
516 case kWritableAlias:
517 // src is a writable alias; we make a copy of that instead
518 if(allocate(srcLength)) {
519 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
520 break;
521 }
522 // if there is not enough memory, then fall through to setting to bogus
523 default:
524 // if src is bogus, set ourselves to bogus
525 // do not call setToBogus() here because fArray and fFlags are not consistent here
526 fShortLength = 0;
527 fUnion.fFields.fArray = 0;
528 fUnion.fFields.fCapacity = 0;
529 fFlags = kIsBogus;
530 break;
531 }
532
533 return *this;
534 }
535
536 //========================================
537 // Miscellaneous operations
538 //========================================
539
540 UnicodeString UnicodeString::unescape() const {
541 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
542 const UChar *array = getBuffer();
543 int32_t len = length();
544 int32_t prev = 0;
545 for (int32_t i=0;;) {
546 if (i == len) {
547 result.append(array, prev, len - prev);
548 break;
549 }
550 if (array[i++] == 0x5C /*'\\'*/) {
551 result.append(array, prev, (i - 1) - prev);
552 UChar32 c = unescapeAt(i); // advances i
553 if (c < 0) {
554 result.remove(); // return empty string
555 break; // invalid escape sequence
556 }
557 result.append(c);
558 prev = i;
559 }
560 }
561 return result;
562 }
563
564 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
565 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
566 }
567
568 //========================================
569 // Read-only implementation
570 //========================================
571 UBool
572 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
573 // Requires: this & text not bogus and have same lengths.
574 // Byte-wise comparison works for equality regardless of endianness.
575 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
576 }
577
578 int8_t
579 UnicodeString::doCompare( int32_t start,
580 int32_t length,
581 const UChar *srcChars,
582 int32_t srcStart,
583 int32_t srcLength) const
584 {
585 // compare illegal string values
586 if(isBogus()) {
587 return -1;
588 }
589
590 // pin indices to legal values
591 pinIndices(start, length);
592
593 if(srcChars == NULL) {
594 // treat const UChar *srcChars==NULL as an empty string
595 return length == 0 ? 0 : 1;
596 }
597
598 // get the correct pointer
599 const UChar *chars = getArrayStart();
600
601 chars += start;
602 srcChars += srcStart;
603
604 int32_t minLength;
605 int8_t lengthResult;
606
607 // get the srcLength if necessary
608 if(srcLength < 0) {
609 srcLength = u_strlen(srcChars + srcStart);
610 }
611
612 // are we comparing different lengths?
613 if(length != srcLength) {
614 if(length < srcLength) {
615 minLength = length;
616 lengthResult = -1;
617 } else {
618 minLength = srcLength;
619 lengthResult = 1;
620 }
621 } else {
622 minLength = length;
623 lengthResult = 0;
624 }
625
626 /*
627 * note that uprv_memcmp() returns an int but we return an int8_t;
628 * we need to take care not to truncate the result -
629 * one way to do this is to right-shift the value to
630 * move the sign bit into the lower 8 bits and making sure that this
631 * does not become 0 itself
632 */
633
634 if(minLength > 0 && chars != srcChars) {
635 int32_t result;
636
637 # if U_IS_BIG_ENDIAN
638 // big-endian: byte comparison works
639 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
640 if(result != 0) {
641 return (int8_t)(result >> 15 | 1);
642 }
643 # else
644 // little-endian: compare UChar units
645 do {
646 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
647 if(result != 0) {
648 return (int8_t)(result >> 15 | 1);
649 }
650 } while(--minLength > 0);
651 # endif
652 }
653 return lengthResult;
654 }
655
656 /* String compare in code point order - doCompare() compares in code unit order. */
657 int8_t
658 UnicodeString::doCompareCodePointOrder(int32_t start,
659 int32_t length,
660 const UChar *srcChars,
661 int32_t srcStart,
662 int32_t srcLength) const
663 {
664 // compare illegal string values
665 // treat const UChar *srcChars==NULL as an empty string
666 if(isBogus()) {
667 return -1;
668 }
669
670 // pin indices to legal values
671 pinIndices(start, length);
672
673 if(srcChars == NULL) {
674 srcStart = srcLength = 0;
675 }
676
677 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
678 /* translate the 32-bit result into an 8-bit one */
679 if(diff!=0) {
680 return (int8_t)(diff >> 15 | 1);
681 } else {
682 return 0;
683 }
684 }
685
686 int32_t
687 UnicodeString::getLength() const {
688 return length();
689 }
690
691 UChar
692 UnicodeString::getCharAt(int32_t offset) const {
693 return charAt(offset);
694 }
695
696 UChar32
697 UnicodeString::getChar32At(int32_t offset) const {
698 return char32At(offset);
699 }
700
701 UChar32
702 UnicodeString::char32At(int32_t offset) const
703 {
704 int32_t len = length();
705 if((uint32_t)offset < (uint32_t)len) {
706 const UChar *array = getArrayStart();
707 UChar32 c;
708 U16_GET(array, 0, offset, len, c);
709 return c;
710 } else {
711 return kInvalidUChar;
712 }
713 }
714
715 int32_t
716 UnicodeString::getChar32Start(int32_t offset) const {
717 if((uint32_t)offset < (uint32_t)length()) {
718 const UChar *array = getArrayStart();
719 U16_SET_CP_START(array, 0, offset);
720 return offset;
721 } else {
722 return 0;
723 }
724 }
725
726 int32_t
727 UnicodeString::getChar32Limit(int32_t offset) const {
728 int32_t len = length();
729 if((uint32_t)offset < (uint32_t)len) {
730 const UChar *array = getArrayStart();
731 U16_SET_CP_LIMIT(array, 0, offset, len);
732 return offset;
733 } else {
734 return len;
735 }
736 }
737
738 int32_t
739 UnicodeString::countChar32(int32_t start, int32_t length) const {
740 pinIndices(start, length);
741 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
742 return u_countChar32(getArrayStart()+start, length);
743 }
744
745 UBool
746 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
747 pinIndices(start, length);
748 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
749 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
750 }
751
752 int32_t
753 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
754 // pin index
755 int32_t len = length();
756 if(index<0) {
757 index=0;
758 } else if(index>len) {
759 index=len;
760 }
761
762 const UChar *array = getArrayStart();
763 if(delta>0) {
764 U16_FWD_N(array, index, len, delta);
765 } else {
766 U16_BACK_N(array, 0, index, -delta);
767 }
768
769 return index;
770 }
771
772 void
773 UnicodeString::doExtract(int32_t start,
774 int32_t length,
775 UChar *dst,
776 int32_t dstStart) const
777 {
778 // pin indices to legal values
779 pinIndices(start, length);
780
781 // do not copy anything if we alias dst itself
782 const UChar *array = getArrayStart();
783 if(array + start != dst + dstStart) {
784 us_arrayCopy(array, start, dst, dstStart, length);
785 }
786 }
787
788 int32_t
789 UnicodeString::extract(UChar *dest, int32_t destCapacity,
790 UErrorCode &errorCode) const {
791 int32_t len = length();
792 if(U_SUCCESS(errorCode)) {
793 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
794 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
795 } else {
796 const UChar *array = getArrayStart();
797 if(len>0 && len<=destCapacity && array!=dest) {
798 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
799 }
800 return u_terminateUChars(dest, destCapacity, len, &errorCode);
801 }
802 }
803
804 return len;
805 }
806
807 int32_t
808 UnicodeString::extract(int32_t start,
809 int32_t length,
810 char *target,
811 int32_t targetCapacity,
812 enum EInvariant) const
813 {
814 // if the arguments are illegal, then do nothing
815 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
816 return 0;
817 }
818
819 // pin the indices to legal values
820 pinIndices(start, length);
821
822 if(length <= targetCapacity) {
823 u_UCharsToChars(getArrayStart() + start, target, length);
824 }
825 UErrorCode status = U_ZERO_ERROR;
826 return u_terminateChars(target, targetCapacity, length, &status);
827 }
828
829 UnicodeString
830 UnicodeString::tempSubString(int32_t start, int32_t len) const {
831 pinIndices(start, len);
832 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
833 if(array==NULL) {
834 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string
835 len=-2; // bogus result string
836 }
837 return UnicodeString(FALSE, array + start, len);
838 }
839
840 int32_t
841 UnicodeString::toUTF8(int32_t start, int32_t len,
842 char *target, int32_t capacity) const {
843 pinIndices(start, len);
844 int32_t length8;
845 UErrorCode errorCode = U_ZERO_ERROR;
846 u_strToUTF8WithSub(target, capacity, &length8,
847 getBuffer() + start, len,
848 0xFFFD, // Standard substitution character.
849 NULL, // Don't care about number of substitutions.
850 &errorCode);
851 return length8;
852 }
853
854 #if U_CHARSET_IS_UTF8
855
856 int32_t
857 UnicodeString::extract(int32_t start, int32_t len,
858 char *target, uint32_t dstSize) const {
859 // if the arguments are illegal, then do nothing
860 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
861 return 0;
862 }
863 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
864 }
865
866 // else see unistr_cnv.cpp
867 #endif
868
869 void
870 UnicodeString::extractBetween(int32_t start,
871 int32_t limit,
872 UnicodeString& target) const {
873 pinIndex(start);
874 pinIndex(limit);
875 doExtract(start, limit - start, target);
876 }
877
878 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
879 // as many bytes as the source has UChars.
880 // The "worst cases" are writing systems like Indic, Thai and CJK with
881 // 3:1 bytes:UChars.
882 void
883 UnicodeString::toUTF8(ByteSink &sink) const {
884 int32_t length16 = length();
885 if(length16 != 0) {
886 char stackBuffer[1024];
887 int32_t capacity = (int32_t)sizeof(stackBuffer);
888 UBool utf8IsOwned = FALSE;
889 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
890 3*length16,
891 stackBuffer, capacity,
892 &capacity);
893 int32_t length8 = 0;
894 UErrorCode errorCode = U_ZERO_ERROR;
895 u_strToUTF8WithSub(utf8, capacity, &length8,
896 getBuffer(), length16,
897 0xFFFD, // Standard substitution character.
898 NULL, // Don't care about number of substitutions.
899 &errorCode);
900 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
901 utf8 = (char *)uprv_malloc(length8);
902 if(utf8 != NULL) {
903 utf8IsOwned = TRUE;
904 errorCode = U_ZERO_ERROR;
905 u_strToUTF8WithSub(utf8, length8, &length8,
906 getBuffer(), length16,
907 0xFFFD, // Standard substitution character.
908 NULL, // Don't care about number of substitutions.
909 &errorCode);
910 } else {
911 errorCode = U_MEMORY_ALLOCATION_ERROR;
912 }
913 }
914 if(U_SUCCESS(errorCode)) {
915 sink.Append(utf8, length8);
916 sink.Flush();
917 }
918 if(utf8IsOwned) {
919 uprv_free(utf8);
920 }
921 }
922 }
923
924 int32_t
925 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
926 int32_t length32=0;
927 if(U_SUCCESS(errorCode)) {
928 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
929 u_strToUTF32WithSub(utf32, capacity, &length32,
930 getBuffer(), length(),
931 0xfffd, // Substitution character.
932 NULL, // Don't care about number of substitutions.
933 &errorCode);
934 }
935 return length32;
936 }
937
938 int32_t
939 UnicodeString::indexOf(const UChar *srcChars,
940 int32_t srcStart,
941 int32_t srcLength,
942 int32_t start,
943 int32_t length) const
944 {
945 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
946 return -1;
947 }
948
949 // UnicodeString does not find empty substrings
950 if(srcLength < 0 && srcChars[srcStart] == 0) {
951 return -1;
952 }
953
954 // get the indices within bounds
955 pinIndices(start, length);
956
957 // find the first occurrence of the substring
958 const UChar *array = getArrayStart();
959 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
960 if(match == NULL) {
961 return -1;
962 } else {
963 return (int32_t)(match - array);
964 }
965 }
966
967 int32_t
968 UnicodeString::doIndexOf(UChar c,
969 int32_t start,
970 int32_t length) const
971 {
972 // pin indices
973 pinIndices(start, length);
974
975 // find the first occurrence of c
976 const UChar *array = getArrayStart();
977 const UChar *match = u_memchr(array + start, c, length);
978 if(match == NULL) {
979 return -1;
980 } else {
981 return (int32_t)(match - array);
982 }
983 }
984
985 int32_t
986 UnicodeString::doIndexOf(UChar32 c,
987 int32_t start,
988 int32_t length) const {
989 // pin indices
990 pinIndices(start, length);
991
992 // find the first occurrence of c
993 const UChar *array = getArrayStart();
994 const UChar *match = u_memchr32(array + start, c, length);
995 if(match == NULL) {
996 return -1;
997 } else {
998 return (int32_t)(match - array);
999 }
1000 }
1001
1002 int32_t
1003 UnicodeString::lastIndexOf(const UChar *srcChars,
1004 int32_t srcStart,
1005 int32_t srcLength,
1006 int32_t start,
1007 int32_t length) const
1008 {
1009 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1010 return -1;
1011 }
1012
1013 // UnicodeString does not find empty substrings
1014 if(srcLength < 0 && srcChars[srcStart] == 0) {
1015 return -1;
1016 }
1017
1018 // get the indices within bounds
1019 pinIndices(start, length);
1020
1021 // find the last occurrence of the substring
1022 const UChar *array = getArrayStart();
1023 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1024 if(match == NULL) {
1025 return -1;
1026 } else {
1027 return (int32_t)(match - array);
1028 }
1029 }
1030
1031 int32_t
1032 UnicodeString::doLastIndexOf(UChar c,
1033 int32_t start,
1034 int32_t length) const
1035 {
1036 if(isBogus()) {
1037 return -1;
1038 }
1039
1040 // pin indices
1041 pinIndices(start, length);
1042
1043 // find the last occurrence of c
1044 const UChar *array = getArrayStart();
1045 const UChar *match = u_memrchr(array + start, c, length);
1046 if(match == NULL) {
1047 return -1;
1048 } else {
1049 return (int32_t)(match - array);
1050 }
1051 }
1052
1053 int32_t
1054 UnicodeString::doLastIndexOf(UChar32 c,
1055 int32_t start,
1056 int32_t length) const {
1057 // pin indices
1058 pinIndices(start, length);
1059
1060 // find the last occurrence of c
1061 const UChar *array = getArrayStart();
1062 const UChar *match = u_memrchr32(array + start, c, length);
1063 if(match == NULL) {
1064 return -1;
1065 } else {
1066 return (int32_t)(match - array);
1067 }
1068 }
1069
1070 //========================================
1071 // Write implementation
1072 //========================================
1073
1074 UnicodeString&
1075 UnicodeString::findAndReplace(int32_t start,
1076 int32_t length,
1077 const UnicodeString& oldText,
1078 int32_t oldStart,
1079 int32_t oldLength,
1080 const UnicodeString& newText,
1081 int32_t newStart,
1082 int32_t newLength)
1083 {
1084 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1085 return *this;
1086 }
1087
1088 pinIndices(start, length);
1089 oldText.pinIndices(oldStart, oldLength);
1090 newText.pinIndices(newStart, newLength);
1091
1092 if(oldLength == 0) {
1093 return *this;
1094 }
1095
1096 while(length > 0 && length >= oldLength) {
1097 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1098 if(pos < 0) {
1099 // no more oldText's here: done
1100 break;
1101 } else {
1102 // we found oldText, replace it by newText and go beyond it
1103 replace(pos, oldLength, newText, newStart, newLength);
1104 length -= pos + oldLength - start;
1105 start = pos + newLength;
1106 }
1107 }
1108
1109 return *this;
1110 }
1111
1112
1113 void
1114 UnicodeString::setToBogus()
1115 {
1116 releaseArray();
1117
1118 fShortLength = 0;
1119 fUnion.fFields.fArray = 0;
1120 fUnion.fFields.fCapacity = 0;
1121 fFlags = kIsBogus;
1122 }
1123
1124 // turn a bogus string into an empty one
1125 void
1126 UnicodeString::unBogus() {
1127 if(fFlags & kIsBogus) {
1128 setToEmpty();
1129 }
1130 }
1131
1132 // setTo() analogous to the readonly-aliasing constructor with the same signature
1133 UnicodeString &
1134 UnicodeString::setTo(UBool isTerminated,
1135 const UChar *text,
1136 int32_t textLength)
1137 {
1138 if(fFlags & kOpenGetBuffer) {
1139 // do not modify a string that has an "open" getBuffer(minCapacity)
1140 return *this;
1141 }
1142
1143 if(text == NULL) {
1144 // treat as an empty string, do not alias
1145 releaseArray();
1146 setToEmpty();
1147 return *this;
1148 }
1149
1150 if( textLength < -1 ||
1151 (textLength == -1 && !isTerminated) ||
1152 (textLength >= 0 && isTerminated && text[textLength] != 0)
1153 ) {
1154 setToBogus();
1155 return *this;
1156 }
1157
1158 releaseArray();
1159
1160 if(textLength == -1) {
1161 // text is terminated, or else it would have failed the above test
1162 textLength = u_strlen(text);
1163 }
1164 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1165
1166 fFlags = kReadonlyAlias;
1167 return *this;
1168 }
1169
1170 // setTo() analogous to the writable-aliasing constructor with the same signature
1171 UnicodeString &
1172 UnicodeString::setTo(UChar *buffer,
1173 int32_t buffLength,
1174 int32_t buffCapacity) {
1175 if(fFlags & kOpenGetBuffer) {
1176 // do not modify a string that has an "open" getBuffer(minCapacity)
1177 return *this;
1178 }
1179
1180 if(buffer == NULL) {
1181 // treat as an empty string, do not alias
1182 releaseArray();
1183 setToEmpty();
1184 return *this;
1185 }
1186
1187 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1188 setToBogus();
1189 return *this;
1190 } else if(buffLength == -1) {
1191 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1192 const UChar *p = buffer, *limit = buffer + buffCapacity;
1193 while(p != limit && *p != 0) {
1194 ++p;
1195 }
1196 buffLength = (int32_t)(p - buffer);
1197 }
1198
1199 releaseArray();
1200
1201 setArray(buffer, buffLength, buffCapacity);
1202 fFlags = kWritableAlias;
1203 return *this;
1204 }
1205
1206 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1207 unBogus();
1208 int32_t length = utf8.length();
1209 int32_t capacity;
1210 // The UTF-16 string will be at most as long as the UTF-8 string.
1211 if(length <= US_STACKBUF_SIZE) {
1212 capacity = US_STACKBUF_SIZE;
1213 } else {
1214 capacity = length + 1; // +1 for the terminating NUL.
1215 }
1216 UChar *utf16 = getBuffer(capacity);
1217 int32_t length16;
1218 UErrorCode errorCode = U_ZERO_ERROR;
1219 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1220 utf8.data(), length,
1221 0xfffd, // Substitution character.
1222 NULL, // Don't care about number of substitutions.
1223 &errorCode);
1224 releaseBuffer(length16);
1225 if(U_FAILURE(errorCode)) {
1226 setToBogus();
1227 }
1228 return *this;
1229 }
1230
1231 UnicodeString&
1232 UnicodeString::setCharAt(int32_t offset,
1233 UChar c)
1234 {
1235 int32_t len = length();
1236 if(cloneArrayIfNeeded() && len > 0) {
1237 if(offset < 0) {
1238 offset = 0;
1239 } else if(offset >= len) {
1240 offset = len - 1;
1241 }
1242
1243 getArrayStart()[offset] = c;
1244 }
1245 return *this;
1246 }
1247
1248 UnicodeString&
1249 UnicodeString::replace(int32_t start,
1250 int32_t _length,
1251 UChar32 srcChar) {
1252 UChar buffer[U16_MAX_LENGTH];
1253 int32_t count = 0;
1254 UBool isError = FALSE;
1255 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1256 // We test isError so that the compiler does not complain that we don't.
1257 // If isError (srcChar is not a valid code point) then count==0 which means
1258 // we remove the source segment rather than replacing it with srcChar.
1259 return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1260 }
1261
1262 UnicodeString&
1263 UnicodeString::append(UChar32 srcChar) {
1264 UChar buffer[U16_MAX_LENGTH];
1265 int32_t _length = 0;
1266 UBool isError = FALSE;
1267 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1268 // We test isError so that the compiler does not complain that we don't.
1269 // If isError then _length==0 which turns the doReplace() into a no-op anyway.
1270 return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
1271 }
1272
1273 UnicodeString&
1274 UnicodeString::doReplace( int32_t start,
1275 int32_t length,
1276 const UnicodeString& src,
1277 int32_t srcStart,
1278 int32_t srcLength)
1279 {
1280 if(!src.isBogus()) {
1281 // pin the indices to legal values
1282 src.pinIndices(srcStart, srcLength);
1283
1284 // get the characters from src
1285 // and replace the range in ourselves with them
1286 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1287 } else {
1288 // remove the range
1289 return doReplace(start, length, 0, 0, 0);
1290 }
1291 }
1292
1293 UnicodeString&
1294 UnicodeString::doReplace(int32_t start,
1295 int32_t length,
1296 const UChar *srcChars,
1297 int32_t srcStart,
1298 int32_t srcLength)
1299 {
1300 if(!isWritable()) {
1301 return *this;
1302 }
1303
1304 int32_t oldLength = this->length();
1305
1306 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1307 if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1308 if(start == 0) {
1309 // remove prefix by adjusting the array pointer
1310 pinIndex(length);
1311 fUnion.fFields.fArray += length;
1312 fUnion.fFields.fCapacity -= length;
1313 setLength(oldLength - length);
1314 return *this;
1315 } else {
1316 pinIndex(start);
1317 if(length >= (oldLength - start)) {
1318 // remove suffix by reducing the length (like truncate())
1319 setLength(start);
1320 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1321 return *this;
1322 }
1323 }
1324 }
1325
1326 if(srcChars == 0) {
1327 srcStart = srcLength = 0;
1328 } else if(srcLength < 0) {
1329 // get the srcLength if necessary
1330 srcLength = u_strlen(srcChars + srcStart);
1331 }
1332
1333 // calculate the size of the string after the replace
1334 int32_t newLength;
1335
1336 // optimize append() onto a large-enough, owned string
1337 if(start >= oldLength) {
1338 if(srcLength == 0) {
1339 return *this;
1340 }
1341 newLength = oldLength + srcLength;
1342 if(newLength <= getCapacity() && isBufferWritable()) {
1343 UChar *oldArray = getArrayStart();
1344 // Do not copy characters when
1345 // UChar *buffer=str.getAppendBuffer(...);
1346 // is followed by
1347 // str.append(buffer, length);
1348 // or
1349 // str.appendString(buffer, length)
1350 // or similar.
1351 if(srcChars + srcStart != oldArray + start || start > oldLength) {
1352 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
1353 }
1354 setLength(newLength);
1355 return *this;
1356 } else {
1357 // pin the indices to legal values
1358 start = oldLength;
1359 length = 0;
1360 }
1361 } else {
1362 // pin the indices to legal values
1363 pinIndices(start, length);
1364
1365 newLength = oldLength - length + srcLength;
1366 }
1367
1368 // the following may change fArray but will not copy the current contents;
1369 // therefore we need to keep the current fArray
1370 UChar oldStackBuffer[US_STACKBUF_SIZE];
1371 UChar *oldArray;
1372 if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1373 // copy the stack buffer contents because it will be overwritten with
1374 // fUnion.fFields values
1375 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1376 oldArray = oldStackBuffer;
1377 } else {
1378 oldArray = getArrayStart();
1379 }
1380
1381 // clone our array and allocate a bigger array if needed
1382 int32_t *bufferToDelete = 0;
1383 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
1384 FALSE, &bufferToDelete)
1385 ) {
1386 return *this;
1387 }
1388
1389 // now do the replace
1390
1391 UChar *newArray = getArrayStart();
1392 if(newArray != oldArray) {
1393 // if fArray changed, then we need to copy everything except what will change
1394 us_arrayCopy(oldArray, 0, newArray, 0, start);
1395 us_arrayCopy(oldArray, start + length,
1396 newArray, start + srcLength,
1397 oldLength - (start + length));
1398 } else if(length != srcLength) {
1399 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1400 us_arrayCopy(oldArray, start + length,
1401 newArray, start + srcLength,
1402 oldLength - (start + length));
1403 }
1404
1405 // now fill in the hole with the new string
1406 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1407
1408 setLength(newLength);
1409
1410 // delayed delete in case srcChars == fArray when we started, and
1411 // to keep oldArray alive for the above operations
1412 if (bufferToDelete) {
1413 uprv_free(bufferToDelete);
1414 }
1415
1416 return *this;
1417 }
1418
1419 /**
1420 * Replaceable API
1421 */
1422 void
1423 UnicodeString::handleReplaceBetween(int32_t start,
1424 int32_t limit,
1425 const UnicodeString& text) {
1426 replaceBetween(start, limit, text);
1427 }
1428
1429 /**
1430 * Replaceable API
1431 */
1432 void
1433 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1434 if (limit <= start) {
1435 return; // Nothing to do; avoid bogus malloc call
1436 }
1437 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1438 // Check to make sure text is not null.
1439 if (text != NULL) {
1440 extractBetween(start, limit, text, 0);
1441 insert(dest, text, 0, limit - start);
1442 uprv_free(text);
1443 }
1444 }
1445
1446 /**
1447 * Replaceable API
1448 *
1449 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1450 * so we implement this function here.
1451 */
1452 UBool Replaceable::hasMetaData() const {
1453 return TRUE;
1454 }
1455
1456 /**
1457 * Replaceable API
1458 */
1459 UBool UnicodeString::hasMetaData() const {
1460 return FALSE;
1461 }
1462
1463 UnicodeString&
1464 UnicodeString::doReverse(int32_t start, int32_t length) {
1465 if(length <= 1 || !cloneArrayIfNeeded()) {
1466 return *this;
1467 }
1468
1469 // pin the indices to legal values
1470 pinIndices(start, length);
1471 if(length <= 1) { // pinIndices() might have shrunk the length
1472 return *this;
1473 }
1474
1475 UChar *left = getArrayStart() + start;
1476 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
1477 UChar swap;
1478 UBool hasSupplementary = FALSE;
1479
1480 // Before the loop we know left<right because length>=2.
1481 do {
1482 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1483 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1484 *right-- = swap;
1485 } while(left < right);
1486 // Make sure to test the middle code unit of an odd-length string.
1487 // Redundant if the length is even.
1488 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1489
1490 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1491 if(hasSupplementary) {
1492 UChar swap2;
1493
1494 left = getArrayStart() + start;
1495 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1496 while(left < right) {
1497 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1498 *left++ = swap2;
1499 *left++ = swap;
1500 } else {
1501 ++left;
1502 }
1503 }
1504 }
1505
1506 return *this;
1507 }
1508
1509 UBool
1510 UnicodeString::padLeading(int32_t targetLength,
1511 UChar padChar)
1512 {
1513 int32_t oldLength = length();
1514 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1515 return FALSE;
1516 } else {
1517 // move contents up by padding width
1518 UChar *array = getArrayStart();
1519 int32_t start = targetLength - oldLength;
1520 us_arrayCopy(array, 0, array, start, oldLength);
1521
1522 // fill in padding character
1523 while(--start >= 0) {
1524 array[start] = padChar;
1525 }
1526 setLength(targetLength);
1527 return TRUE;
1528 }
1529 }
1530
1531 UBool
1532 UnicodeString::padTrailing(int32_t targetLength,
1533 UChar padChar)
1534 {
1535 int32_t oldLength = length();
1536 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1537 return FALSE;
1538 } else {
1539 // fill in padding character
1540 UChar *array = getArrayStart();
1541 int32_t length = targetLength;
1542 while(--length >= oldLength) {
1543 array[length] = padChar;
1544 }
1545 setLength(targetLength);
1546 return TRUE;
1547 }
1548 }
1549
1550 //========================================
1551 // Hashing
1552 //========================================
1553 int32_t
1554 UnicodeString::doHashCode() const
1555 {
1556 /* Delegate hash computation to uhash. This makes UnicodeString
1557 * hashing consistent with UChar* hashing. */
1558 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1559 if (hashCode == kInvalidHashCode) {
1560 hashCode = kEmptyHashCode;
1561 }
1562 return hashCode;
1563 }
1564
1565 //========================================
1566 // External Buffer
1567 //========================================
1568
1569 UChar *
1570 UnicodeString::getBuffer(int32_t minCapacity) {
1571 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1572 fFlags|=kOpenGetBuffer;
1573 fShortLength=0;
1574 return getArrayStart();
1575 } else {
1576 return 0;
1577 }
1578 }
1579
1580 void
1581 UnicodeString::releaseBuffer(int32_t newLength) {
1582 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1583 // set the new fLength
1584 int32_t capacity=getCapacity();
1585 if(newLength==-1) {
1586 // the new length is the string length, capped by fCapacity
1587 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1588 while(p<limit && *p!=0) {
1589 ++p;
1590 }
1591 newLength=(int32_t)(p-array);
1592 } else if(newLength>capacity) {
1593 newLength=capacity;
1594 }
1595 setLength(newLength);
1596 fFlags&=~kOpenGetBuffer;
1597 }
1598 }
1599
1600 //========================================
1601 // Miscellaneous
1602 //========================================
1603 UBool
1604 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1605 int32_t growCapacity,
1606 UBool doCopyArray,
1607 int32_t **pBufferToDelete,
1608 UBool forceClone) {
1609 // default parameters need to be static, therefore
1610 // the defaults are -1 to have convenience defaults
1611 if(newCapacity == -1) {
1612 newCapacity = getCapacity();
1613 }
1614
1615 // while a getBuffer(minCapacity) is "open",
1616 // prevent any modifications of the string by returning FALSE here
1617 // if the string is bogus, then only an assignment or similar can revive it
1618 if(!isWritable()) {
1619 return FALSE;
1620 }
1621
1622 /*
1623 * We need to make a copy of the array if
1624 * the buffer is read-only, or
1625 * the buffer is refCounted (shared), and refCount>1, or
1626 * the buffer is too small.
1627 * Return FALSE if memory could not be allocated.
1628 */
1629 if(forceClone ||
1630 fFlags & kBufferIsReadonly ||
1631 (fFlags & kRefCounted && refCount() > 1) ||
1632 newCapacity > getCapacity()
1633 ) {
1634 // check growCapacity for default value and use of the stack buffer
1635 if(growCapacity < 0) {
1636 growCapacity = newCapacity;
1637 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1638 growCapacity = US_STACKBUF_SIZE;
1639 }
1640
1641 // save old values
1642 UChar oldStackBuffer[US_STACKBUF_SIZE];
1643 UChar *oldArray;
1644 uint8_t flags = fFlags;
1645
1646 if(flags&kUsingStackBuffer) {
1647 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1648 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1649 // copy the stack buffer contents because it will be overwritten with
1650 // fUnion.fFields values
1651 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1652 oldArray = oldStackBuffer;
1653 } else {
1654 oldArray = 0; // no need to copy from stack buffer to itself
1655 }
1656 } else {
1657 oldArray = fUnion.fFields.fArray;
1658 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1659 }
1660
1661 // allocate a new array
1662 if(allocate(growCapacity) ||
1663 (newCapacity < growCapacity && allocate(newCapacity))
1664 ) {
1665 if(doCopyArray && oldArray != 0) {
1666 // copy the contents
1667 // do not copy more than what fits - it may be smaller than before
1668 int32_t minLength = length();
1669 newCapacity = getCapacity();
1670 if(newCapacity < minLength) {
1671 minLength = newCapacity;
1672 setLength(minLength);
1673 }
1674 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1675 } else {
1676 fShortLength = 0;
1677 }
1678
1679 // release the old array
1680 if(flags & kRefCounted) {
1681 // the array is refCounted; decrement and release if 0
1682 int32_t *pRefCount = ((int32_t *)oldArray - 1);
1683 if(umtx_atomic_dec(pRefCount) == 0) {
1684 if(pBufferToDelete == 0) {
1685 uprv_free(pRefCount);
1686 } else {
1687 // the caller requested to delete it himself
1688 *pBufferToDelete = pRefCount;
1689 }
1690 }
1691 }
1692 } else {
1693 // not enough memory for growCapacity and not even for the smaller newCapacity
1694 // reset the old values for setToBogus() to release the array
1695 if(!(flags&kUsingStackBuffer)) {
1696 fUnion.fFields.fArray = oldArray;
1697 }
1698 fFlags = flags;
1699 setToBogus();
1700 return FALSE;
1701 }
1702 }
1703 return TRUE;
1704 }
1705
1706 // UnicodeStringAppendable ------------------------------------------------- ***
1707
1708 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1709
1710 UBool
1711 UnicodeStringAppendable::appendCodeUnit(UChar c) {
1712 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
1713 }
1714
1715 UBool
1716 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1717 UChar buffer[U16_MAX_LENGTH];
1718 int32_t cLength = 0;
1719 UBool isError = FALSE;
1720 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1721 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
1722 }
1723
1724 UBool
1725 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1726 return str.doReplace(str.length(), 0, s, 0, length).isWritable();
1727 }
1728
1729 UBool
1730 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1731 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1732 }
1733
1734 UChar *
1735 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1736 int32_t desiredCapacityHint,
1737 UChar *scratch, int32_t scratchCapacity,
1738 int32_t *resultCapacity) {
1739 if(minCapacity < 1 || scratchCapacity < minCapacity) {
1740 *resultCapacity = 0;
1741 return NULL;
1742 }
1743 int32_t oldLength = str.length();
1744 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1745 *resultCapacity = str.getCapacity() - oldLength;
1746 return str.getArrayStart() + oldLength;
1747 }
1748 *resultCapacity = scratchCapacity;
1749 return scratch;
1750 }
1751
1752 U_NAMESPACE_END
1753
1754 U_NAMESPACE_USE
1755
1756 U_CAPI int32_t U_EXPORT2
1757 uhash_hashUnicodeString(const UElement key) {
1758 const UnicodeString *str = (const UnicodeString*) key.pointer;
1759 return (str == NULL) ? 0 : str->hashCode();
1760 }
1761
1762 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1763 // does not depend on hashtable code.
1764 U_CAPI UBool U_EXPORT2
1765 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1766 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1767 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1768 if (str1 == str2) {
1769 return TRUE;
1770 }
1771 if (str1 == NULL || str2 == NULL) {
1772 return FALSE;
1773 }
1774 return *str1 == *str2;
1775 }
1776
1777 #ifdef U_STATIC_IMPLEMENTATION
1778 /*
1779 This should never be called. It is defined here to make sure that the
1780 virtual vector deleting destructor is defined within unistr.cpp.
1781 The vector deleting destructor is already a part of UObject,
1782 but defining it here makes sure that it is included with this object file.
1783 This makes sure that static library dependencies are kept to a minimum.
1784 */
1785 static void uprv_UnicodeStringDummy(void) {
1786 delete [] (new UnicodeString[2]);
1787 }
1788 #endif