]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr.cpp
ICU-531.31.tar.gz
[apple/icu.git] / icuSources / common / unistr.cpp
1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16 * Replaceable.
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/appendable.h"
23 #include "unicode/putil.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/utf.h"
29 #include "unicode/utf16.h"
30 #include "uelement.h"
31 #include "ustr_imp.h"
32 #include "umutex.h"
33 #include "uassert.h"
34
35 #if 0
36
37 #include <iostream>
38 using namespace std;
39
40 //DEBUGGING
41 void
42 print(const UnicodeString& s,
43 const char *name)
44 {
45 UChar c;
46 cout << name << ":|";
47 for(int i = 0; i < s.length(); ++i) {
48 c = s[i];
49 if(c>= 0x007E || c < 0x0020)
50 cout << "[0x" << hex << s[i] << "]";
51 else
52 cout << (char) s[i];
53 }
54 cout << '|' << endl;
55 }
56
57 void
58 print(const UChar *s,
59 int32_t len,
60 const char *name)
61 {
62 UChar c;
63 cout << name << ":|";
64 for(int i = 0; i < len; ++i) {
65 c = s[i];
66 if(c>= 0x007E || c < 0x0020)
67 cout << "[0x" << hex << s[i] << "]";
68 else
69 cout << (char) s[i];
70 }
71 cout << '|' << endl;
72 }
73 // END DEBUGGING
74 #endif
75
76 // Local function definitions for now
77
78 // need to copy areas that may overlap
79 static
80 inline void
81 us_arrayCopy(const UChar *src, int32_t srcStart,
82 UChar *dst, int32_t dstStart, int32_t count)
83 {
84 if(count>0) {
85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86 }
87 }
88
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
90 U_CDECL_BEGIN
91 static UChar U_CALLCONV
92 UnicodeString_charAt(int32_t offset, void *context) {
93 return ((icu::UnicodeString*) context)->charAt(offset);
94 }
95 U_CDECL_END
96
97 U_NAMESPACE_BEGIN
98
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
101 */
102 Replaceable::~Replaceable() {}
103
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
105
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108 return
109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110 append(s1).
111 append(s2);
112 }
113
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
118
119 void
120 UnicodeString::addRef() {
121 umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
122 }
123
124 int32_t
125 UnicodeString::removeRef() {
126 return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
127 }
128
129 int32_t
130 UnicodeString::refCount() const {
131 return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
132 }
133
134 void
135 UnicodeString::releaseArray() {
136 if((fFlags & kRefCounted) && removeRef() == 0) {
137 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
138 }
139 }
140
141
142
143 //========================================
144 // Constructors
145 //========================================
146
147 // The default constructor is inline in unistr.h.
148
149 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
150 : fShortLength(0),
151 fFlags(0)
152 {
153 if(count <= 0 || (uint32_t)c > 0x10ffff) {
154 // just allocate and do not do anything else
155 allocate(capacity);
156 } else {
157 // count > 0, allocate and fill the new string with count c's
158 int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
159 if(capacity < length) {
160 capacity = length;
161 }
162 if(allocate(capacity)) {
163 UChar *array = getArrayStart();
164 int32_t i = 0;
165
166 // fill the new string with c
167 if(unitCount == 1) {
168 // fill with length UChars
169 while(i < length) {
170 array[i++] = (UChar)c;
171 }
172 } else {
173 // get the code units for c
174 UChar units[U16_MAX_LENGTH];
175 U16_APPEND_UNSAFE(units, i, c);
176
177 // now it must be i==unitCount
178 i = 0;
179
180 // for Unicode, unitCount can only be 1, 2, 3, or 4
181 // 1 is handled above
182 while(i < length) {
183 int32_t unitIdx = 0;
184 while(unitIdx < unitCount) {
185 array[i++]=units[unitIdx++];
186 }
187 }
188 }
189 }
190 setLength(length);
191 }
192 }
193
194 UnicodeString::UnicodeString(UChar ch)
195 : fShortLength(1),
196 fFlags(kShortString)
197 {
198 fUnion.fStackBuffer[0] = ch;
199 }
200
201 UnicodeString::UnicodeString(UChar32 ch)
202 : fShortLength(0),
203 fFlags(kShortString)
204 {
205 int32_t i = 0;
206 UBool isError = FALSE;
207 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
208 // We test isError so that the compiler does not complain that we don't.
209 // If isError then i==0 which is what we want anyway.
210 if(!isError) {
211 fShortLength = (int8_t)i;
212 }
213 }
214
215 UnicodeString::UnicodeString(const UChar *text)
216 : fShortLength(0),
217 fFlags(kShortString)
218 {
219 doReplace(0, 0, text, 0, -1);
220 }
221
222 UnicodeString::UnicodeString(const UChar *text,
223 int32_t textLength)
224 : fShortLength(0),
225 fFlags(kShortString)
226 {
227 doReplace(0, 0, text, 0, textLength);
228 }
229
230 UnicodeString::UnicodeString(UBool isTerminated,
231 const UChar *text,
232 int32_t textLength)
233 : fShortLength(0),
234 fFlags(kReadonlyAlias)
235 {
236 if(text == NULL) {
237 // treat as an empty string, do not alias
238 setToEmpty();
239 } else if(textLength < -1 ||
240 (textLength == -1 && !isTerminated) ||
241 (textLength >= 0 && isTerminated && text[textLength] != 0)
242 ) {
243 setToBogus();
244 } else {
245 if(textLength == -1) {
246 // text is terminated, or else it would have failed the above test
247 textLength = u_strlen(text);
248 }
249 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
250 }
251 }
252
253 UnicodeString::UnicodeString(UChar *buff,
254 int32_t buffLength,
255 int32_t buffCapacity)
256 : fShortLength(0),
257 fFlags(kWritableAlias)
258 {
259 if(buff == NULL) {
260 // treat as an empty string, do not alias
261 setToEmpty();
262 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
263 setToBogus();
264 } else {
265 if(buffLength == -1) {
266 // fLength = u_strlen(buff); but do not look beyond buffCapacity
267 const UChar *p = buff, *limit = buff + buffCapacity;
268 while(p != limit && *p != 0) {
269 ++p;
270 }
271 buffLength = (int32_t)(p - buff);
272 }
273 setArray(buff, buffLength, buffCapacity);
274 }
275 }
276
277 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
278 : fShortLength(0),
279 fFlags(kShortString)
280 {
281 if(src==NULL) {
282 // treat as an empty string
283 } else {
284 if(length<0) {
285 length=(int32_t)uprv_strlen(src);
286 }
287 if(cloneArrayIfNeeded(length, length, FALSE)) {
288 u_charsToUChars(src, getArrayStart(), length);
289 setLength(length);
290 } else {
291 setToBogus();
292 }
293 }
294 }
295
296 #if U_CHARSET_IS_UTF8
297
298 UnicodeString::UnicodeString(const char *codepageData)
299 : fShortLength(0),
300 fFlags(kShortString) {
301 if(codepageData != 0) {
302 setToUTF8(codepageData);
303 }
304 }
305
306 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
307 : fShortLength(0),
308 fFlags(kShortString) {
309 // if there's nothing to convert, do nothing
310 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
311 return;
312 }
313 if(dataLength == -1) {
314 dataLength = (int32_t)uprv_strlen(codepageData);
315 }
316 setToUTF8(StringPiece(codepageData, dataLength));
317 }
318
319 // else see unistr_cnv.cpp
320 #endif
321
322 UnicodeString::UnicodeString(const UnicodeString& that)
323 : Replaceable(),
324 fShortLength(0),
325 fFlags(kShortString)
326 {
327 copyFrom(that);
328 }
329
330 UnicodeString::UnicodeString(const UnicodeString& that,
331 int32_t srcStart)
332 : Replaceable(),
333 fShortLength(0),
334 fFlags(kShortString)
335 {
336 setTo(that, srcStart);
337 }
338
339 UnicodeString::UnicodeString(const UnicodeString& that,
340 int32_t srcStart,
341 int32_t srcLength)
342 : Replaceable(),
343 fShortLength(0),
344 fFlags(kShortString)
345 {
346 setTo(that, srcStart, srcLength);
347 }
348
349 // Replaceable base class clone() default implementation, does not clone
350 Replaceable *
351 Replaceable::clone() const {
352 return NULL;
353 }
354
355 // UnicodeString overrides clone() with a real implementation
356 Replaceable *
357 UnicodeString::clone() const {
358 return new UnicodeString(*this);
359 }
360
361 //========================================
362 // array allocation
363 //========================================
364
365 UBool
366 UnicodeString::allocate(int32_t capacity) {
367 if(capacity <= US_STACKBUF_SIZE) {
368 fFlags = kShortString;
369 } else {
370 // count bytes for the refCounter and the string capacity, and
371 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
372 // to be safely aligned for the refCount
373 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
374 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
375 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
376 if(array != 0) {
377 // set initial refCount and point behind the refCount
378 *array++ = 1;
379
380 // have fArray point to the first UChar
381 fUnion.fFields.fArray = (UChar *)array;
382 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
383 fFlags = kLongString;
384 } else {
385 fShortLength = 0;
386 fUnion.fFields.fArray = 0;
387 fUnion.fFields.fCapacity = 0;
388 fFlags = kIsBogus;
389 return FALSE;
390 }
391 }
392 return TRUE;
393 }
394
395 //========================================
396 // Destructor
397 //========================================
398 UnicodeString::~UnicodeString()
399 {
400 releaseArray();
401 }
402
403 //========================================
404 // Factory methods
405 //========================================
406
407 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
408 UnicodeString result;
409 result.setToUTF8(utf8);
410 return result;
411 }
412
413 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
414 UnicodeString result;
415 int32_t capacity;
416 // Most UTF-32 strings will be BMP-only and result in a same-length
417 // UTF-16 string. We overestimate the capacity just slightly,
418 // just in case there are a few supplementary characters.
419 if(length <= US_STACKBUF_SIZE) {
420 capacity = US_STACKBUF_SIZE;
421 } else {
422 capacity = length + (length >> 4) + 4;
423 }
424 do {
425 UChar *utf16 = result.getBuffer(capacity);
426 int32_t length16;
427 UErrorCode errorCode = U_ZERO_ERROR;
428 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
429 utf32, length,
430 0xfffd, // Substitution character.
431 NULL, // Don't care about number of substitutions.
432 &errorCode);
433 result.releaseBuffer(length16);
434 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
435 capacity = length16 + 1; // +1 for the terminating NUL.
436 continue;
437 } else if(U_FAILURE(errorCode)) {
438 result.setToBogus();
439 }
440 break;
441 } while(TRUE);
442 return result;
443 }
444
445 //========================================
446 // Assignment
447 //========================================
448
449 UnicodeString &
450 UnicodeString::operator=(const UnicodeString &src) {
451 return copyFrom(src);
452 }
453
454 UnicodeString &
455 UnicodeString::fastCopyFrom(const UnicodeString &src) {
456 return copyFrom(src, TRUE);
457 }
458
459 UnicodeString &
460 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
461 // if assigning to ourselves, do nothing
462 if(this == 0 || this == &src) {
463 return *this;
464 }
465
466 // is the right side bogus?
467 if(&src == 0 || src.isBogus()) {
468 setToBogus();
469 return *this;
470 }
471
472 // delete the current contents
473 releaseArray();
474
475 if(src.isEmpty()) {
476 // empty string - use the stack buffer
477 setToEmpty();
478 return *this;
479 }
480
481 // we always copy the length
482 int32_t srcLength = src.length();
483 setLength(srcLength);
484
485 // fLength>0 and not an "open" src.getBuffer(minCapacity)
486 switch(src.fFlags) {
487 case kShortString:
488 // short string using the stack buffer, do the same
489 fFlags = kShortString;
490 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
491 break;
492 case kLongString:
493 // src uses a refCounted string buffer, use that buffer with refCount
494 // src is const, use a cast - we don't really change it
495 ((UnicodeString &)src).addRef();
496 // copy all fields, share the reference-counted buffer
497 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
498 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
499 fFlags = src.fFlags;
500 break;
501 case kReadonlyAlias:
502 if(fastCopy) {
503 // src is a readonly alias, do the same
504 // -> maintain the readonly alias as such
505 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
506 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
507 fFlags = src.fFlags;
508 break;
509 }
510 // else if(!fastCopy) fall through to case kWritableAlias
511 // -> allocate a new buffer and copy the contents
512 case kWritableAlias:
513 // src is a writable alias; we make a copy of that instead
514 if(allocate(srcLength)) {
515 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
516 break;
517 }
518 // if there is not enough memory, then fall through to setting to bogus
519 default:
520 // if src is bogus, set ourselves to bogus
521 // do not call setToBogus() here because fArray and fFlags are not consistent here
522 fShortLength = 0;
523 fUnion.fFields.fArray = 0;
524 fUnion.fFields.fCapacity = 0;
525 fFlags = kIsBogus;
526 break;
527 }
528
529 return *this;
530 }
531
532 //========================================
533 // Miscellaneous operations
534 //========================================
535
536 UnicodeString UnicodeString::unescape() const {
537 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
538 const UChar *array = getBuffer();
539 int32_t len = length();
540 int32_t prev = 0;
541 for (int32_t i=0;;) {
542 if (i == len) {
543 result.append(array, prev, len - prev);
544 break;
545 }
546 if (array[i++] == 0x5C /*'\\'*/) {
547 result.append(array, prev, (i - 1) - prev);
548 UChar32 c = unescapeAt(i); // advances i
549 if (c < 0) {
550 result.remove(); // return empty string
551 break; // invalid escape sequence
552 }
553 result.append(c);
554 prev = i;
555 }
556 }
557 return result;
558 }
559
560 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
561 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
562 }
563
564 //========================================
565 // Read-only implementation
566 //========================================
567 UBool
568 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
569 // Requires: this & text not bogus and have same lengths.
570 // Byte-wise comparison works for equality regardless of endianness.
571 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
572 }
573
574 int8_t
575 UnicodeString::doCompare( int32_t start,
576 int32_t length,
577 const UChar *srcChars,
578 int32_t srcStart,
579 int32_t srcLength) const
580 {
581 // compare illegal string values
582 if(isBogus()) {
583 return -1;
584 }
585
586 // pin indices to legal values
587 pinIndices(start, length);
588
589 if(srcChars == NULL) {
590 // treat const UChar *srcChars==NULL as an empty string
591 return length == 0 ? 0 : 1;
592 }
593
594 // get the correct pointer
595 const UChar *chars = getArrayStart();
596
597 chars += start;
598 srcChars += srcStart;
599
600 int32_t minLength;
601 int8_t lengthResult;
602
603 // get the srcLength if necessary
604 if(srcLength < 0) {
605 srcLength = u_strlen(srcChars + srcStart);
606 }
607
608 // are we comparing different lengths?
609 if(length != srcLength) {
610 if(length < srcLength) {
611 minLength = length;
612 lengthResult = -1;
613 } else {
614 minLength = srcLength;
615 lengthResult = 1;
616 }
617 } else {
618 minLength = length;
619 lengthResult = 0;
620 }
621
622 /*
623 * note that uprv_memcmp() returns an int but we return an int8_t;
624 * we need to take care not to truncate the result -
625 * one way to do this is to right-shift the value to
626 * move the sign bit into the lower 8 bits and making sure that this
627 * does not become 0 itself
628 */
629
630 if(minLength > 0 && chars != srcChars) {
631 int32_t result;
632
633 # if U_IS_BIG_ENDIAN
634 // big-endian: byte comparison works
635 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
636 if(result != 0) {
637 return (int8_t)(result >> 15 | 1);
638 }
639 # else
640 // little-endian: compare UChar units
641 do {
642 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
643 if(result != 0) {
644 return (int8_t)(result >> 15 | 1);
645 }
646 } while(--minLength > 0);
647 # endif
648 }
649 return lengthResult;
650 }
651
652 /* String compare in code point order - doCompare() compares in code unit order. */
653 int8_t
654 UnicodeString::doCompareCodePointOrder(int32_t start,
655 int32_t length,
656 const UChar *srcChars,
657 int32_t srcStart,
658 int32_t srcLength) const
659 {
660 // compare illegal string values
661 // treat const UChar *srcChars==NULL as an empty string
662 if(isBogus()) {
663 return -1;
664 }
665
666 // pin indices to legal values
667 pinIndices(start, length);
668
669 if(srcChars == NULL) {
670 srcStart = srcLength = 0;
671 }
672
673 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
674 /* translate the 32-bit result into an 8-bit one */
675 if(diff!=0) {
676 return (int8_t)(diff >> 15 | 1);
677 } else {
678 return 0;
679 }
680 }
681
682 int32_t
683 UnicodeString::getLength() const {
684 return length();
685 }
686
687 UChar
688 UnicodeString::getCharAt(int32_t offset) const {
689 return charAt(offset);
690 }
691
692 UChar32
693 UnicodeString::getChar32At(int32_t offset) const {
694 return char32At(offset);
695 }
696
697 UChar32
698 UnicodeString::char32At(int32_t offset) const
699 {
700 int32_t len = length();
701 if((uint32_t)offset < (uint32_t)len) {
702 const UChar *array = getArrayStart();
703 UChar32 c;
704 U16_GET(array, 0, offset, len, c);
705 return c;
706 } else {
707 return kInvalidUChar;
708 }
709 }
710
711 int32_t
712 UnicodeString::getChar32Start(int32_t offset) const {
713 if((uint32_t)offset < (uint32_t)length()) {
714 const UChar *array = getArrayStart();
715 U16_SET_CP_START(array, 0, offset);
716 return offset;
717 } else {
718 return 0;
719 }
720 }
721
722 int32_t
723 UnicodeString::getChar32Limit(int32_t offset) const {
724 int32_t len = length();
725 if((uint32_t)offset < (uint32_t)len) {
726 const UChar *array = getArrayStart();
727 U16_SET_CP_LIMIT(array, 0, offset, len);
728 return offset;
729 } else {
730 return len;
731 }
732 }
733
734 int32_t
735 UnicodeString::countChar32(int32_t start, int32_t length) const {
736 pinIndices(start, length);
737 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
738 return u_countChar32(getArrayStart()+start, length);
739 }
740
741 UBool
742 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
743 pinIndices(start, length);
744 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
745 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
746 }
747
748 int32_t
749 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
750 // pin index
751 int32_t len = length();
752 if(index<0) {
753 index=0;
754 } else if(index>len) {
755 index=len;
756 }
757
758 const UChar *array = getArrayStart();
759 if(delta>0) {
760 U16_FWD_N(array, index, len, delta);
761 } else {
762 U16_BACK_N(array, 0, index, -delta);
763 }
764
765 return index;
766 }
767
768 void
769 UnicodeString::doExtract(int32_t start,
770 int32_t length,
771 UChar *dst,
772 int32_t dstStart) const
773 {
774 // pin indices to legal values
775 pinIndices(start, length);
776
777 // do not copy anything if we alias dst itself
778 const UChar *array = getArrayStart();
779 if(array + start != dst + dstStart) {
780 us_arrayCopy(array, start, dst, dstStart, length);
781 }
782 }
783
784 int32_t
785 UnicodeString::extract(UChar *dest, int32_t destCapacity,
786 UErrorCode &errorCode) const {
787 int32_t len = length();
788 if(U_SUCCESS(errorCode)) {
789 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
790 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
791 } else {
792 const UChar *array = getArrayStart();
793 if(len>0 && len<=destCapacity && array!=dest) {
794 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
795 }
796 return u_terminateUChars(dest, destCapacity, len, &errorCode);
797 }
798 }
799
800 return len;
801 }
802
803 int32_t
804 UnicodeString::extract(int32_t start,
805 int32_t length,
806 char *target,
807 int32_t targetCapacity,
808 enum EInvariant) const
809 {
810 // if the arguments are illegal, then do nothing
811 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
812 return 0;
813 }
814
815 // pin the indices to legal values
816 pinIndices(start, length);
817
818 if(length <= targetCapacity) {
819 u_UCharsToChars(getArrayStart() + start, target, length);
820 }
821 UErrorCode status = U_ZERO_ERROR;
822 return u_terminateChars(target, targetCapacity, length, &status);
823 }
824
825 UnicodeString
826 UnicodeString::tempSubString(int32_t start, int32_t len) const {
827 pinIndices(start, len);
828 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
829 if(array==NULL) {
830 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string
831 len=-2; // bogus result string
832 }
833 return UnicodeString(FALSE, array + start, len);
834 }
835
836 int32_t
837 UnicodeString::toUTF8(int32_t start, int32_t len,
838 char *target, int32_t capacity) const {
839 pinIndices(start, len);
840 int32_t length8;
841 UErrorCode errorCode = U_ZERO_ERROR;
842 u_strToUTF8WithSub(target, capacity, &length8,
843 getBuffer() + start, len,
844 0xFFFD, // Standard substitution character.
845 NULL, // Don't care about number of substitutions.
846 &errorCode);
847 return length8;
848 }
849
850 #if U_CHARSET_IS_UTF8
851
852 int32_t
853 UnicodeString::extract(int32_t start, int32_t len,
854 char *target, uint32_t dstSize) const {
855 // if the arguments are illegal, then do nothing
856 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
857 return 0;
858 }
859 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
860 }
861
862 // else see unistr_cnv.cpp
863 #endif
864
865 void
866 UnicodeString::extractBetween(int32_t start,
867 int32_t limit,
868 UnicodeString& target) const {
869 pinIndex(start);
870 pinIndex(limit);
871 doExtract(start, limit - start, target);
872 }
873
874 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
875 // as many bytes as the source has UChars.
876 // The "worst cases" are writing systems like Indic, Thai and CJK with
877 // 3:1 bytes:UChars.
878 void
879 UnicodeString::toUTF8(ByteSink &sink) const {
880 int32_t length16 = length();
881 if(length16 != 0) {
882 char stackBuffer[1024];
883 int32_t capacity = (int32_t)sizeof(stackBuffer);
884 UBool utf8IsOwned = FALSE;
885 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
886 3*length16,
887 stackBuffer, capacity,
888 &capacity);
889 int32_t length8 = 0;
890 UErrorCode errorCode = U_ZERO_ERROR;
891 u_strToUTF8WithSub(utf8, capacity, &length8,
892 getBuffer(), length16,
893 0xFFFD, // Standard substitution character.
894 NULL, // Don't care about number of substitutions.
895 &errorCode);
896 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
897 utf8 = (char *)uprv_malloc(length8);
898 if(utf8 != NULL) {
899 utf8IsOwned = TRUE;
900 errorCode = U_ZERO_ERROR;
901 u_strToUTF8WithSub(utf8, length8, &length8,
902 getBuffer(), length16,
903 0xFFFD, // Standard substitution character.
904 NULL, // Don't care about number of substitutions.
905 &errorCode);
906 } else {
907 errorCode = U_MEMORY_ALLOCATION_ERROR;
908 }
909 }
910 if(U_SUCCESS(errorCode)) {
911 sink.Append(utf8, length8);
912 sink.Flush();
913 }
914 if(utf8IsOwned) {
915 uprv_free(utf8);
916 }
917 }
918 }
919
920 int32_t
921 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
922 int32_t length32=0;
923 if(U_SUCCESS(errorCode)) {
924 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
925 u_strToUTF32WithSub(utf32, capacity, &length32,
926 getBuffer(), length(),
927 0xfffd, // Substitution character.
928 NULL, // Don't care about number of substitutions.
929 &errorCode);
930 }
931 return length32;
932 }
933
934 int32_t
935 UnicodeString::indexOf(const UChar *srcChars,
936 int32_t srcStart,
937 int32_t srcLength,
938 int32_t start,
939 int32_t length) const
940 {
941 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
942 return -1;
943 }
944
945 // UnicodeString does not find empty substrings
946 if(srcLength < 0 && srcChars[srcStart] == 0) {
947 return -1;
948 }
949
950 // get the indices within bounds
951 pinIndices(start, length);
952
953 // find the first occurrence of the substring
954 const UChar *array = getArrayStart();
955 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
956 if(match == NULL) {
957 return -1;
958 } else {
959 return (int32_t)(match - array);
960 }
961 }
962
963 int32_t
964 UnicodeString::doIndexOf(UChar c,
965 int32_t start,
966 int32_t length) const
967 {
968 // pin indices
969 pinIndices(start, length);
970
971 // find the first occurrence of c
972 const UChar *array = getArrayStart();
973 const UChar *match = u_memchr(array + start, c, length);
974 if(match == NULL) {
975 return -1;
976 } else {
977 return (int32_t)(match - array);
978 }
979 }
980
981 int32_t
982 UnicodeString::doIndexOf(UChar32 c,
983 int32_t start,
984 int32_t length) const {
985 // pin indices
986 pinIndices(start, length);
987
988 // find the first occurrence of c
989 const UChar *array = getArrayStart();
990 const UChar *match = u_memchr32(array + start, c, length);
991 if(match == NULL) {
992 return -1;
993 } else {
994 return (int32_t)(match - array);
995 }
996 }
997
998 int32_t
999 UnicodeString::lastIndexOf(const UChar *srcChars,
1000 int32_t srcStart,
1001 int32_t srcLength,
1002 int32_t start,
1003 int32_t length) const
1004 {
1005 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1006 return -1;
1007 }
1008
1009 // UnicodeString does not find empty substrings
1010 if(srcLength < 0 && srcChars[srcStart] == 0) {
1011 return -1;
1012 }
1013
1014 // get the indices within bounds
1015 pinIndices(start, length);
1016
1017 // find the last occurrence of the substring
1018 const UChar *array = getArrayStart();
1019 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1020 if(match == NULL) {
1021 return -1;
1022 } else {
1023 return (int32_t)(match - array);
1024 }
1025 }
1026
1027 int32_t
1028 UnicodeString::doLastIndexOf(UChar c,
1029 int32_t start,
1030 int32_t length) const
1031 {
1032 if(isBogus()) {
1033 return -1;
1034 }
1035
1036 // pin indices
1037 pinIndices(start, length);
1038
1039 // find the last occurrence of c
1040 const UChar *array = getArrayStart();
1041 const UChar *match = u_memrchr(array + start, c, length);
1042 if(match == NULL) {
1043 return -1;
1044 } else {
1045 return (int32_t)(match - array);
1046 }
1047 }
1048
1049 int32_t
1050 UnicodeString::doLastIndexOf(UChar32 c,
1051 int32_t start,
1052 int32_t length) const {
1053 // pin indices
1054 pinIndices(start, length);
1055
1056 // find the last occurrence of c
1057 const UChar *array = getArrayStart();
1058 const UChar *match = u_memrchr32(array + start, c, length);
1059 if(match == NULL) {
1060 return -1;
1061 } else {
1062 return (int32_t)(match - array);
1063 }
1064 }
1065
1066 //========================================
1067 // Write implementation
1068 //========================================
1069
1070 UnicodeString&
1071 UnicodeString::findAndReplace(int32_t start,
1072 int32_t length,
1073 const UnicodeString& oldText,
1074 int32_t oldStart,
1075 int32_t oldLength,
1076 const UnicodeString& newText,
1077 int32_t newStart,
1078 int32_t newLength)
1079 {
1080 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1081 return *this;
1082 }
1083
1084 pinIndices(start, length);
1085 oldText.pinIndices(oldStart, oldLength);
1086 newText.pinIndices(newStart, newLength);
1087
1088 if(oldLength == 0) {
1089 return *this;
1090 }
1091
1092 while(length > 0 && length >= oldLength) {
1093 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1094 if(pos < 0) {
1095 // no more oldText's here: done
1096 break;
1097 } else {
1098 // we found oldText, replace it by newText and go beyond it
1099 replace(pos, oldLength, newText, newStart, newLength);
1100 length -= pos + oldLength - start;
1101 start = pos + newLength;
1102 }
1103 }
1104
1105 return *this;
1106 }
1107
1108
1109 void
1110 UnicodeString::setToBogus()
1111 {
1112 releaseArray();
1113
1114 fShortLength = 0;
1115 fUnion.fFields.fArray = 0;
1116 fUnion.fFields.fCapacity = 0;
1117 fFlags = kIsBogus;
1118 }
1119
1120 // turn a bogus string into an empty one
1121 void
1122 UnicodeString::unBogus() {
1123 if(fFlags & kIsBogus) {
1124 setToEmpty();
1125 }
1126 }
1127
1128 const UChar *
1129 UnicodeString::getTerminatedBuffer() {
1130 if(!isWritable()) {
1131 return 0;
1132 }
1133 UChar *array = getArrayStart();
1134 int32_t len = length();
1135 if(len < getCapacity()) {
1136 if(fFlags & kBufferIsReadonly) {
1137 // If len<capacity on a read-only alias, then array[len] is
1138 // either the original NUL (if constructed with (TRUE, s, length))
1139 // or one of the original string contents characters (if later truncated),
1140 // therefore we can assume that array[len] is initialized memory.
1141 if(array[len] == 0) {
1142 return array;
1143 }
1144 } else if(((fFlags & kRefCounted) == 0 || refCount() == 1)) {
1145 // kRefCounted: Do not write the NUL if the buffer is shared.
1146 // That is mostly safe, except when the length of one copy was modified
1147 // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1148 // Then the NUL would be written into the middle of another copy's string.
1149
1150 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1151 // Do not test if there is a NUL already because it might be uninitialized memory.
1152 // (That would be safe, but tools like valgrind & Purify would complain.)
1153 array[len] = 0;
1154 return array;
1155 }
1156 }
1157 if(cloneArrayIfNeeded(len+1)) {
1158 array = getArrayStart();
1159 array[len] = 0;
1160 return array;
1161 } else {
1162 return NULL;
1163 }
1164 }
1165
1166 // setTo() analogous to the readonly-aliasing constructor with the same signature
1167 UnicodeString &
1168 UnicodeString::setTo(UBool isTerminated,
1169 const UChar *text,
1170 int32_t textLength)
1171 {
1172 if(fFlags & kOpenGetBuffer) {
1173 // do not modify a string that has an "open" getBuffer(minCapacity)
1174 return *this;
1175 }
1176
1177 if(text == NULL) {
1178 // treat as an empty string, do not alias
1179 releaseArray();
1180 setToEmpty();
1181 return *this;
1182 }
1183
1184 if( textLength < -1 ||
1185 (textLength == -1 && !isTerminated) ||
1186 (textLength >= 0 && isTerminated && text[textLength] != 0)
1187 ) {
1188 setToBogus();
1189 return *this;
1190 }
1191
1192 releaseArray();
1193
1194 if(textLength == -1) {
1195 // text is terminated, or else it would have failed the above test
1196 textLength = u_strlen(text);
1197 }
1198 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1199
1200 fFlags = kReadonlyAlias;
1201 return *this;
1202 }
1203
1204 // setTo() analogous to the writable-aliasing constructor with the same signature
1205 UnicodeString &
1206 UnicodeString::setTo(UChar *buffer,
1207 int32_t buffLength,
1208 int32_t buffCapacity) {
1209 if(fFlags & kOpenGetBuffer) {
1210 // do not modify a string that has an "open" getBuffer(minCapacity)
1211 return *this;
1212 }
1213
1214 if(buffer == NULL) {
1215 // treat as an empty string, do not alias
1216 releaseArray();
1217 setToEmpty();
1218 return *this;
1219 }
1220
1221 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1222 setToBogus();
1223 return *this;
1224 } else if(buffLength == -1) {
1225 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1226 const UChar *p = buffer, *limit = buffer + buffCapacity;
1227 while(p != limit && *p != 0) {
1228 ++p;
1229 }
1230 buffLength = (int32_t)(p - buffer);
1231 }
1232
1233 releaseArray();
1234
1235 setArray(buffer, buffLength, buffCapacity);
1236 fFlags = kWritableAlias;
1237 return *this;
1238 }
1239
1240 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1241 unBogus();
1242 int32_t length = utf8.length();
1243 int32_t capacity;
1244 // The UTF-16 string will be at most as long as the UTF-8 string.
1245 if(length <= US_STACKBUF_SIZE) {
1246 capacity = US_STACKBUF_SIZE;
1247 } else {
1248 capacity = length + 1; // +1 for the terminating NUL.
1249 }
1250 UChar *utf16 = getBuffer(capacity);
1251 int32_t length16;
1252 UErrorCode errorCode = U_ZERO_ERROR;
1253 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1254 utf8.data(), length,
1255 0xfffd, // Substitution character.
1256 NULL, // Don't care about number of substitutions.
1257 &errorCode);
1258 releaseBuffer(length16);
1259 if(U_FAILURE(errorCode)) {
1260 setToBogus();
1261 }
1262 return *this;
1263 }
1264
1265 UnicodeString&
1266 UnicodeString::setCharAt(int32_t offset,
1267 UChar c)
1268 {
1269 int32_t len = length();
1270 if(cloneArrayIfNeeded() && len > 0) {
1271 if(offset < 0) {
1272 offset = 0;
1273 } else if(offset >= len) {
1274 offset = len - 1;
1275 }
1276
1277 getArrayStart()[offset] = c;
1278 }
1279 return *this;
1280 }
1281
1282 UnicodeString&
1283 UnicodeString::replace(int32_t start,
1284 int32_t _length,
1285 UChar32 srcChar) {
1286 UChar buffer[U16_MAX_LENGTH];
1287 int32_t count = 0;
1288 UBool isError = FALSE;
1289 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1290 // We test isError so that the compiler does not complain that we don't.
1291 // If isError (srcChar is not a valid code point) then count==0 which means
1292 // we remove the source segment rather than replacing it with srcChar.
1293 return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1294 }
1295
1296 UnicodeString&
1297 UnicodeString::append(UChar32 srcChar) {
1298 UChar buffer[U16_MAX_LENGTH];
1299 int32_t _length = 0;
1300 UBool isError = FALSE;
1301 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1302 // We test isError so that the compiler does not complain that we don't.
1303 // If isError then _length==0 which turns the doReplace() into a no-op anyway.
1304 return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
1305 }
1306
1307 UnicodeString&
1308 UnicodeString::doReplace( int32_t start,
1309 int32_t length,
1310 const UnicodeString& src,
1311 int32_t srcStart,
1312 int32_t srcLength)
1313 {
1314 if(!src.isBogus()) {
1315 // pin the indices to legal values
1316 src.pinIndices(srcStart, srcLength);
1317
1318 // get the characters from src
1319 // and replace the range in ourselves with them
1320 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1321 } else {
1322 // remove the range
1323 return doReplace(start, length, 0, 0, 0);
1324 }
1325 }
1326
1327 UnicodeString&
1328 UnicodeString::doReplace(int32_t start,
1329 int32_t length,
1330 const UChar *srcChars,
1331 int32_t srcStart,
1332 int32_t srcLength)
1333 {
1334 if(!isWritable()) {
1335 return *this;
1336 }
1337
1338 int32_t oldLength = this->length();
1339
1340 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1341 if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1342 if(start == 0) {
1343 // remove prefix by adjusting the array pointer
1344 pinIndex(length);
1345 fUnion.fFields.fArray += length;
1346 fUnion.fFields.fCapacity -= length;
1347 setLength(oldLength - length);
1348 return *this;
1349 } else {
1350 pinIndex(start);
1351 if(length >= (oldLength - start)) {
1352 // remove suffix by reducing the length (like truncate())
1353 setLength(start);
1354 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1355 return *this;
1356 }
1357 }
1358 }
1359
1360 if(srcChars == 0) {
1361 srcStart = srcLength = 0;
1362 } else if(srcLength < 0) {
1363 // get the srcLength if necessary
1364 srcLength = u_strlen(srcChars + srcStart);
1365 }
1366
1367 // calculate the size of the string after the replace
1368 int32_t newLength;
1369
1370 // optimize append() onto a large-enough, owned string
1371 if(start >= oldLength) {
1372 if(srcLength == 0) {
1373 return *this;
1374 }
1375 newLength = oldLength + srcLength;
1376 if(newLength <= getCapacity() && isBufferWritable()) {
1377 UChar *oldArray = getArrayStart();
1378 // Do not copy characters when
1379 // UChar *buffer=str.getAppendBuffer(...);
1380 // is followed by
1381 // str.append(buffer, length);
1382 // or
1383 // str.appendString(buffer, length)
1384 // or similar.
1385 if(srcChars + srcStart != oldArray + start || start > oldLength) {
1386 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
1387 }
1388 setLength(newLength);
1389 return *this;
1390 } else {
1391 // pin the indices to legal values
1392 start = oldLength;
1393 length = 0;
1394 }
1395 } else {
1396 // pin the indices to legal values
1397 pinIndices(start, length);
1398
1399 newLength = oldLength - length + srcLength;
1400 }
1401
1402 // the following may change fArray but will not copy the current contents;
1403 // therefore we need to keep the current fArray
1404 UChar oldStackBuffer[US_STACKBUF_SIZE];
1405 UChar *oldArray;
1406 if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1407 // copy the stack buffer contents because it will be overwritten with
1408 // fUnion.fFields values
1409 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1410 oldArray = oldStackBuffer;
1411 } else {
1412 oldArray = getArrayStart();
1413 }
1414
1415 // clone our array and allocate a bigger array if needed
1416 int32_t *bufferToDelete = 0;
1417 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
1418 FALSE, &bufferToDelete)
1419 ) {
1420 return *this;
1421 }
1422
1423 // now do the replace
1424
1425 UChar *newArray = getArrayStart();
1426 if(newArray != oldArray) {
1427 // if fArray changed, then we need to copy everything except what will change
1428 us_arrayCopy(oldArray, 0, newArray, 0, start);
1429 us_arrayCopy(oldArray, start + length,
1430 newArray, start + srcLength,
1431 oldLength - (start + length));
1432 } else if(length != srcLength) {
1433 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1434 us_arrayCopy(oldArray, start + length,
1435 newArray, start + srcLength,
1436 oldLength - (start + length));
1437 }
1438
1439 // now fill in the hole with the new string
1440 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1441
1442 setLength(newLength);
1443
1444 // delayed delete in case srcChars == fArray when we started, and
1445 // to keep oldArray alive for the above operations
1446 if (bufferToDelete) {
1447 uprv_free(bufferToDelete);
1448 }
1449
1450 return *this;
1451 }
1452
1453 /**
1454 * Replaceable API
1455 */
1456 void
1457 UnicodeString::handleReplaceBetween(int32_t start,
1458 int32_t limit,
1459 const UnicodeString& text) {
1460 replaceBetween(start, limit, text);
1461 }
1462
1463 /**
1464 * Replaceable API
1465 */
1466 void
1467 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1468 if (limit <= start) {
1469 return; // Nothing to do; avoid bogus malloc call
1470 }
1471 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1472 // Check to make sure text is not null.
1473 if (text != NULL) {
1474 extractBetween(start, limit, text, 0);
1475 insert(dest, text, 0, limit - start);
1476 uprv_free(text);
1477 }
1478 }
1479
1480 /**
1481 * Replaceable API
1482 *
1483 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1484 * so we implement this function here.
1485 */
1486 UBool Replaceable::hasMetaData() const {
1487 return TRUE;
1488 }
1489
1490 /**
1491 * Replaceable API
1492 */
1493 UBool UnicodeString::hasMetaData() const {
1494 return FALSE;
1495 }
1496
1497 UnicodeString&
1498 UnicodeString::doReverse(int32_t start, int32_t length) {
1499 if(length <= 1 || !cloneArrayIfNeeded()) {
1500 return *this;
1501 }
1502
1503 // pin the indices to legal values
1504 pinIndices(start, length);
1505 if(length <= 1) { // pinIndices() might have shrunk the length
1506 return *this;
1507 }
1508
1509 UChar *left = getArrayStart() + start;
1510 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
1511 UChar swap;
1512 UBool hasSupplementary = FALSE;
1513
1514 // Before the loop we know left<right because length>=2.
1515 do {
1516 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1517 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1518 *right-- = swap;
1519 } while(left < right);
1520 // Make sure to test the middle code unit of an odd-length string.
1521 // Redundant if the length is even.
1522 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1523
1524 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1525 if(hasSupplementary) {
1526 UChar swap2;
1527
1528 left = getArrayStart() + start;
1529 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1530 while(left < right) {
1531 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1532 *left++ = swap2;
1533 *left++ = swap;
1534 } else {
1535 ++left;
1536 }
1537 }
1538 }
1539
1540 return *this;
1541 }
1542
1543 UBool
1544 UnicodeString::padLeading(int32_t targetLength,
1545 UChar padChar)
1546 {
1547 int32_t oldLength = length();
1548 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1549 return FALSE;
1550 } else {
1551 // move contents up by padding width
1552 UChar *array = getArrayStart();
1553 int32_t start = targetLength - oldLength;
1554 us_arrayCopy(array, 0, array, start, oldLength);
1555
1556 // fill in padding character
1557 while(--start >= 0) {
1558 array[start] = padChar;
1559 }
1560 setLength(targetLength);
1561 return TRUE;
1562 }
1563 }
1564
1565 UBool
1566 UnicodeString::padTrailing(int32_t targetLength,
1567 UChar padChar)
1568 {
1569 int32_t oldLength = length();
1570 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1571 return FALSE;
1572 } else {
1573 // fill in padding character
1574 UChar *array = getArrayStart();
1575 int32_t length = targetLength;
1576 while(--length >= oldLength) {
1577 array[length] = padChar;
1578 }
1579 setLength(targetLength);
1580 return TRUE;
1581 }
1582 }
1583
1584 //========================================
1585 // Hashing
1586 //========================================
1587 int32_t
1588 UnicodeString::doHashCode() const
1589 {
1590 /* Delegate hash computation to uhash. This makes UnicodeString
1591 * hashing consistent with UChar* hashing. */
1592 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1593 if (hashCode == kInvalidHashCode) {
1594 hashCode = kEmptyHashCode;
1595 }
1596 return hashCode;
1597 }
1598
1599 //========================================
1600 // External Buffer
1601 //========================================
1602
1603 UChar *
1604 UnicodeString::getBuffer(int32_t minCapacity) {
1605 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1606 fFlags|=kOpenGetBuffer;
1607 fShortLength=0;
1608 return getArrayStart();
1609 } else {
1610 return 0;
1611 }
1612 }
1613
1614 void
1615 UnicodeString::releaseBuffer(int32_t newLength) {
1616 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1617 // set the new fLength
1618 int32_t capacity=getCapacity();
1619 if(newLength==-1) {
1620 // the new length is the string length, capped by fCapacity
1621 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1622 while(p<limit && *p!=0) {
1623 ++p;
1624 }
1625 newLength=(int32_t)(p-array);
1626 } else if(newLength>capacity) {
1627 newLength=capacity;
1628 }
1629 setLength(newLength);
1630 fFlags&=~kOpenGetBuffer;
1631 }
1632 }
1633
1634 //========================================
1635 // Miscellaneous
1636 //========================================
1637 UBool
1638 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1639 int32_t growCapacity,
1640 UBool doCopyArray,
1641 int32_t **pBufferToDelete,
1642 UBool forceClone) {
1643 // default parameters need to be static, therefore
1644 // the defaults are -1 to have convenience defaults
1645 if(newCapacity == -1) {
1646 newCapacity = getCapacity();
1647 }
1648
1649 // while a getBuffer(minCapacity) is "open",
1650 // prevent any modifications of the string by returning FALSE here
1651 // if the string is bogus, then only an assignment or similar can revive it
1652 if(!isWritable()) {
1653 return FALSE;
1654 }
1655
1656 /*
1657 * We need to make a copy of the array if
1658 * the buffer is read-only, or
1659 * the buffer is refCounted (shared), and refCount>1, or
1660 * the buffer is too small.
1661 * Return FALSE if memory could not be allocated.
1662 */
1663 if(forceClone ||
1664 fFlags & kBufferIsReadonly ||
1665 (fFlags & kRefCounted && refCount() > 1) ||
1666 newCapacity > getCapacity()
1667 ) {
1668 // check growCapacity for default value and use of the stack buffer
1669 if(growCapacity < 0) {
1670 growCapacity = newCapacity;
1671 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1672 growCapacity = US_STACKBUF_SIZE;
1673 }
1674
1675 // save old values
1676 UChar oldStackBuffer[US_STACKBUF_SIZE];
1677 UChar *oldArray;
1678 uint8_t flags = fFlags;
1679
1680 if(flags&kUsingStackBuffer) {
1681 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1682 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1683 // copy the stack buffer contents because it will be overwritten with
1684 // fUnion.fFields values
1685 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1686 oldArray = oldStackBuffer;
1687 } else {
1688 oldArray = 0; // no need to copy from stack buffer to itself
1689 }
1690 } else {
1691 oldArray = fUnion.fFields.fArray;
1692 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1693 }
1694
1695 // allocate a new array
1696 if(allocate(growCapacity) ||
1697 (newCapacity < growCapacity && allocate(newCapacity))
1698 ) {
1699 if(doCopyArray && oldArray != 0) {
1700 // copy the contents
1701 // do not copy more than what fits - it may be smaller than before
1702 int32_t minLength = length();
1703 newCapacity = getCapacity();
1704 if(newCapacity < minLength) {
1705 minLength = newCapacity;
1706 setLength(minLength);
1707 }
1708 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1709 } else {
1710 fShortLength = 0;
1711 }
1712
1713 // release the old array
1714 if(flags & kRefCounted) {
1715 // the array is refCounted; decrement and release if 0
1716 u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1717 if(umtx_atomic_dec(pRefCount) == 0) {
1718 if(pBufferToDelete == 0) {
1719 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1720 // is defined as volatile. (Volatile has useful non-standard behavior
1721 // with this compiler.)
1722 uprv_free((void *)pRefCount);
1723 } else {
1724 // the caller requested to delete it himself
1725 *pBufferToDelete = (int32_t *)pRefCount;
1726 }
1727 }
1728 }
1729 } else {
1730 // not enough memory for growCapacity and not even for the smaller newCapacity
1731 // reset the old values for setToBogus() to release the array
1732 if(!(flags&kUsingStackBuffer)) {
1733 fUnion.fFields.fArray = oldArray;
1734 }
1735 fFlags = flags;
1736 setToBogus();
1737 return FALSE;
1738 }
1739 }
1740 return TRUE;
1741 }
1742
1743 // UnicodeStringAppendable ------------------------------------------------- ***
1744
1745 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1746
1747 UBool
1748 UnicodeStringAppendable::appendCodeUnit(UChar c) {
1749 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
1750 }
1751
1752 UBool
1753 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1754 UChar buffer[U16_MAX_LENGTH];
1755 int32_t cLength = 0;
1756 UBool isError = FALSE;
1757 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1758 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
1759 }
1760
1761 UBool
1762 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1763 return str.doReplace(str.length(), 0, s, 0, length).isWritable();
1764 }
1765
1766 UBool
1767 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1768 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1769 }
1770
1771 UChar *
1772 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1773 int32_t desiredCapacityHint,
1774 UChar *scratch, int32_t scratchCapacity,
1775 int32_t *resultCapacity) {
1776 if(minCapacity < 1 || scratchCapacity < minCapacity) {
1777 *resultCapacity = 0;
1778 return NULL;
1779 }
1780 int32_t oldLength = str.length();
1781 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1782 *resultCapacity = str.getCapacity() - oldLength;
1783 return str.getArrayStart() + oldLength;
1784 }
1785 *resultCapacity = scratchCapacity;
1786 return scratch;
1787 }
1788
1789 U_NAMESPACE_END
1790
1791 U_NAMESPACE_USE
1792
1793 U_CAPI int32_t U_EXPORT2
1794 uhash_hashUnicodeString(const UElement key) {
1795 const UnicodeString *str = (const UnicodeString*) key.pointer;
1796 return (str == NULL) ? 0 : str->hashCode();
1797 }
1798
1799 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1800 // does not depend on hashtable code.
1801 U_CAPI UBool U_EXPORT2
1802 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1803 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1804 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1805 if (str1 == str2) {
1806 return TRUE;
1807 }
1808 if (str1 == NULL || str2 == NULL) {
1809 return FALSE;
1810 }
1811 return *str1 == *str2;
1812 }
1813
1814 #ifdef U_STATIC_IMPLEMENTATION
1815 /*
1816 This should never be called. It is defined here to make sure that the
1817 virtual vector deleting destructor is defined within unistr.cpp.
1818 The vector deleting destructor is already a part of UObject,
1819 but defining it here makes sure that it is included with this object file.
1820 This makes sure that static library dependencies are kept to a minimum.
1821 */
1822 static void uprv_UnicodeStringDummy(void) {
1823 delete [] (new UnicodeString[2]);
1824 }
1825 #endif