]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unistr.cpp
ICU-400.38.tar.gz
[apple/icu.git] / icuSources / common / unistr.cpp
1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2008, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16 * Replaceable.
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/putil.h"
23 #include "cstring.h"
24 #include "cmemory.h"
25 #include "unicode/ustring.h"
26 #include "unicode/unistr.h"
27 #include "uhash.h"
28 #include "ustr_imp.h"
29 #include "umutex.h"
30
31 #if 0
32
33 #if U_IOSTREAM_SOURCE >= 199711
34 #include <iostream>
35 using namespace std;
36 #elif U_IOSTREAM_SOURCE >= 198506
37 #include <iostream.h>
38 #endif
39
40 //DEBUGGING
41 void
42 print(const UnicodeString& s,
43 const char *name)
44 {
45 UChar c;
46 cout << name << ":|";
47 for(int i = 0; i < s.length(); ++i) {
48 c = s[i];
49 if(c>= 0x007E || c < 0x0020)
50 cout << "[0x" << hex << s[i] << "]";
51 else
52 cout << (char) s[i];
53 }
54 cout << '|' << endl;
55 }
56
57 void
58 print(const UChar *s,
59 int32_t len,
60 const char *name)
61 {
62 UChar c;
63 cout << name << ":|";
64 for(int i = 0; i < len; ++i) {
65 c = s[i];
66 if(c>= 0x007E || c < 0x0020)
67 cout << "[0x" << hex << s[i] << "]";
68 else
69 cout << (char) s[i];
70 }
71 cout << '|' << endl;
72 }
73 // END DEBUGGING
74 #endif
75
76 // Local function definitions for now
77
78 // need to copy areas that may overlap
79 static
80 inline void
81 us_arrayCopy(const UChar *src, int32_t srcStart,
82 UChar *dst, int32_t dstStart, int32_t count)
83 {
84 if(count>0) {
85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86 }
87 }
88
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
90 U_CDECL_BEGIN
91 static UChar U_CALLCONV
92 UnicodeString_charAt(int32_t offset, void *context) {
93 return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
94 }
95 U_CDECL_END
96
97 U_NAMESPACE_BEGIN
98
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
101 */
102 Replaceable::~Replaceable() {}
103 Replaceable::Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
105
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108 return
109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110 append(s1).
111 append(s2);
112 }
113
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
118
119 void
120 UnicodeString::addRef()
121 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
122
123 int32_t
124 UnicodeString::removeRef()
125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
126
127 int32_t
128 UnicodeString::refCount() const
129 {
130 umtx_lock(NULL);
131 // Note: without the lock to force a memory barrier, we might see a very
132 // stale value on some multi-processor systems.
133 int32_t count = *((int32_t *)fUnion.fFields.fArray - 1);
134 umtx_unlock(NULL);
135 return count;
136 }
137
138 void
139 UnicodeString::releaseArray() {
140 if((fFlags & kRefCounted) && removeRef() == 0) {
141 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
142 }
143 }
144
145
146
147 //========================================
148 // Constructors
149 //========================================
150 UnicodeString::UnicodeString()
151 : fShortLength(0),
152 fFlags(kShortString)
153 {}
154
155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
156 : fShortLength(0),
157 fFlags(0)
158 {
159 if(count <= 0 || (uint32_t)c > 0x10ffff) {
160 // just allocate and do not do anything else
161 allocate(capacity);
162 } else {
163 // count > 0, allocate and fill the new string with count c's
164 int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
165 if(capacity < length) {
166 capacity = length;
167 }
168 if(allocate(capacity)) {
169 UChar *array = getArrayStart();
170 int32_t i = 0;
171
172 // fill the new string with c
173 if(unitCount == 1) {
174 // fill with length UChars
175 while(i < length) {
176 array[i++] = (UChar)c;
177 }
178 } else {
179 // get the code units for c
180 UChar units[UTF_MAX_CHAR_LENGTH];
181 UTF_APPEND_CHAR_UNSAFE(units, i, c);
182
183 // now it must be i==unitCount
184 i = 0;
185
186 // for Unicode, unitCount can only be 1, 2, 3, or 4
187 // 1 is handled above
188 while(i < length) {
189 int32_t unitIdx = 0;
190 while(unitIdx < unitCount) {
191 array[i++]=units[unitIdx++];
192 }
193 }
194 }
195 }
196 setLength(length);
197 }
198 }
199
200 UnicodeString::UnicodeString(UChar ch)
201 : fShortLength(1),
202 fFlags(kShortString)
203 {
204 fUnion.fStackBuffer[0] = ch;
205 }
206
207 UnicodeString::UnicodeString(UChar32 ch)
208 : fShortLength(0),
209 fFlags(kShortString)
210 {
211 int32_t i = 0;
212 UBool isError = FALSE;
213 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
214 fShortLength = (int8_t)i;
215 }
216
217 UnicodeString::UnicodeString(const UChar *text)
218 : fShortLength(0),
219 fFlags(kShortString)
220 {
221 doReplace(0, 0, text, 0, -1);
222 }
223
224 UnicodeString::UnicodeString(const UChar *text,
225 int32_t textLength)
226 : fShortLength(0),
227 fFlags(kShortString)
228 {
229 doReplace(0, 0, text, 0, textLength);
230 }
231
232 UnicodeString::UnicodeString(UBool isTerminated,
233 const UChar *text,
234 int32_t textLength)
235 : fShortLength(0),
236 fFlags(kReadonlyAlias)
237 {
238 if(text == NULL) {
239 // treat as an empty string, do not alias
240 setToEmpty();
241 } else if(textLength < -1 ||
242 (textLength == -1 && !isTerminated) ||
243 (textLength >= 0 && isTerminated && text[textLength] != 0)
244 ) {
245 setToBogus();
246 } else {
247 if(textLength == -1) {
248 // text is terminated, or else it would have failed the above test
249 textLength = u_strlen(text);
250 }
251 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
252 }
253 }
254
255 UnicodeString::UnicodeString(UChar *buff,
256 int32_t buffLength,
257 int32_t buffCapacity)
258 : fShortLength(0),
259 fFlags(kWritableAlias)
260 {
261 if(buff == NULL) {
262 // treat as an empty string, do not alias
263 setToEmpty();
264 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
265 setToBogus();
266 } else {
267 if(buffLength == -1) {
268 // fLength = u_strlen(buff); but do not look beyond buffCapacity
269 const UChar *p = buff, *limit = buff + buffCapacity;
270 while(p != limit && *p != 0) {
271 ++p;
272 }
273 buffLength = (int32_t)(p - buff);
274 }
275 setArray(buff, buffLength, buffCapacity);
276 }
277 }
278
279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
280 : fShortLength(0),
281 fFlags(kShortString)
282 {
283 if(src==NULL) {
284 // treat as an empty string
285 } else {
286 if(length<0) {
287 length=(int32_t)uprv_strlen(src);
288 }
289 if(cloneArrayIfNeeded(length, length, FALSE)) {
290 u_charsToUChars(src, getArrayStart(), length);
291 setLength(length);
292 } else {
293 setToBogus();
294 }
295 }
296 }
297
298 UnicodeString::UnicodeString(const UnicodeString& that)
299 : Replaceable(),
300 fShortLength(0),
301 fFlags(kShortString)
302 {
303 copyFrom(that);
304 }
305
306 UnicodeString::UnicodeString(const UnicodeString& that,
307 int32_t srcStart)
308 : Replaceable(),
309 fShortLength(0),
310 fFlags(kShortString)
311 {
312 setTo(that, srcStart);
313 }
314
315 UnicodeString::UnicodeString(const UnicodeString& that,
316 int32_t srcStart,
317 int32_t srcLength)
318 : Replaceable(),
319 fShortLength(0),
320 fFlags(kShortString)
321 {
322 setTo(that, srcStart, srcLength);
323 }
324
325 // Replaceable base class clone() default implementation, does not clone
326 Replaceable *
327 Replaceable::clone() const {
328 return NULL;
329 }
330
331 // UnicodeString overrides clone() with a real implementation
332 Replaceable *
333 UnicodeString::clone() const {
334 return new UnicodeString(*this);
335 }
336
337 //========================================
338 // array allocation
339 //========================================
340
341 UBool
342 UnicodeString::allocate(int32_t capacity) {
343 if(capacity <= US_STACKBUF_SIZE) {
344 fFlags = kShortString;
345 } else {
346 // count bytes for the refCounter and the string capacity, and
347 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
348 // to be safely aligned for the refCount
349 int32_t words = (int32_t)(((sizeof(int32_t) + capacity * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
350 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
351 if(array != 0) {
352 // set initial refCount and point behind the refCount
353 *array++ = 1;
354
355 // have fArray point to the first UChar
356 fUnion.fFields.fArray = (UChar *)array;
357 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
358 fFlags = kLongString;
359 } else {
360 fShortLength = 0;
361 fUnion.fFields.fArray = 0;
362 fUnion.fFields.fCapacity = 0;
363 fFlags = kIsBogus;
364 return FALSE;
365 }
366 }
367 return TRUE;
368 }
369
370 //========================================
371 // Destructor
372 //========================================
373 UnicodeString::~UnicodeString()
374 {
375 releaseArray();
376 }
377
378
379 //========================================
380 // Assignment
381 //========================================
382
383 UnicodeString &
384 UnicodeString::operator=(const UnicodeString &src) {
385 return copyFrom(src);
386 }
387
388 UnicodeString &
389 UnicodeString::fastCopyFrom(const UnicodeString &src) {
390 return copyFrom(src, TRUE);
391 }
392
393 UnicodeString &
394 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
395 // if assigning to ourselves, do nothing
396 if(this == 0 || this == &src) {
397 return *this;
398 }
399
400 // is the right side bogus?
401 if(&src == 0 || src.isBogus()) {
402 setToBogus();
403 return *this;
404 }
405
406 // delete the current contents
407 releaseArray();
408
409 if(src.isEmpty()) {
410 // empty string - use the stack buffer
411 setToEmpty();
412 return *this;
413 }
414
415 // we always copy the length
416 int32_t srcLength = src.length();
417 setLength(srcLength);
418
419 // fLength>0 and not an "open" src.getBuffer(minCapacity)
420 switch(src.fFlags) {
421 case kShortString:
422 // short string using the stack buffer, do the same
423 fFlags = kShortString;
424 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, fShortLength * U_SIZEOF_UCHAR);
425 break;
426 case kLongString:
427 // src uses a refCounted string buffer, use that buffer with refCount
428 // src is const, use a cast - we don't really change it
429 ((UnicodeString &)src).addRef();
430 // copy all fields, share the reference-counted buffer
431 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
432 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
433 fFlags = src.fFlags;
434 break;
435 case kReadonlyAlias:
436 if(fastCopy) {
437 // src is a readonly alias, do the same
438 // -> maintain the readonly alias as such
439 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
440 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
441 fFlags = src.fFlags;
442 break;
443 }
444 // else if(!fastCopy) fall through to case kWritableAlias
445 // -> allocate a new buffer and copy the contents
446 case kWritableAlias:
447 // src is a writable alias; we make a copy of that instead
448 if(allocate(srcLength)) {
449 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
450 break;
451 }
452 // if there is not enough memory, then fall through to setting to bogus
453 default:
454 // if src is bogus, set ourselves to bogus
455 // do not call setToBogus() here because fArray and fFlags are not consistent here
456 fShortLength = 0;
457 fUnion.fFields.fArray = 0;
458 fUnion.fFields.fCapacity = 0;
459 fFlags = kIsBogus;
460 break;
461 }
462
463 return *this;
464 }
465
466 //========================================
467 // Miscellaneous operations
468 //========================================
469
470 UnicodeString UnicodeString::unescape() const {
471 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
472 const UChar *array = getBuffer();
473 int32_t len = length();
474 int32_t prev = 0;
475 for (int32_t i=0;;) {
476 if (i == len) {
477 result.append(array, prev, len - prev);
478 break;
479 }
480 if (array[i++] == 0x5C /*'\\'*/) {
481 result.append(array, prev, (i - 1) - prev);
482 UChar32 c = unescapeAt(i); // advances i
483 if (c < 0) {
484 result.remove(); // return empty string
485 break; // invalid escape sequence
486 }
487 result.append(c);
488 prev = i;
489 }
490 }
491 return result;
492 }
493
494 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
495 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
496 }
497
498 //========================================
499 // Read-only implementation
500 //========================================
501 int8_t
502 UnicodeString::doCompare( int32_t start,
503 int32_t length,
504 const UChar *srcChars,
505 int32_t srcStart,
506 int32_t srcLength) const
507 {
508 // compare illegal string values
509 // treat const UChar *srcChars==NULL as an empty string
510 if(isBogus()) {
511 return -1;
512 }
513
514 // pin indices to legal values
515 pinIndices(start, length);
516
517 if(srcChars == NULL) {
518 srcStart = srcLength = 0;
519 }
520
521 // get the correct pointer
522 const UChar *chars = getArrayStart();
523
524 chars += start;
525 srcChars += srcStart;
526
527 int32_t minLength;
528 int8_t lengthResult;
529
530 // get the srcLength if necessary
531 if(srcLength < 0) {
532 srcLength = u_strlen(srcChars + srcStart);
533 }
534
535 // are we comparing different lengths?
536 if(length != srcLength) {
537 if(length < srcLength) {
538 minLength = length;
539 lengthResult = -1;
540 } else {
541 minLength = srcLength;
542 lengthResult = 1;
543 }
544 } else {
545 minLength = length;
546 lengthResult = 0;
547 }
548
549 /*
550 * note that uprv_memcmp() returns an int but we return an int8_t;
551 * we need to take care not to truncate the result -
552 * one way to do this is to right-shift the value to
553 * move the sign bit into the lower 8 bits and making sure that this
554 * does not become 0 itself
555 */
556
557 if(minLength > 0 && chars != srcChars) {
558 int32_t result;
559
560 # if U_IS_BIG_ENDIAN
561 // big-endian: byte comparison works
562 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
563 if(result != 0) {
564 return (int8_t)(result >> 15 | 1);
565 }
566 # else
567 // little-endian: compare UChar units
568 do {
569 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
570 if(result != 0) {
571 return (int8_t)(result >> 15 | 1);
572 }
573 } while(--minLength > 0);
574 # endif
575 }
576 return lengthResult;
577 }
578
579 /* String compare in code point order - doCompare() compares in code unit order. */
580 int8_t
581 UnicodeString::doCompareCodePointOrder(int32_t start,
582 int32_t length,
583 const UChar *srcChars,
584 int32_t srcStart,
585 int32_t srcLength) const
586 {
587 // compare illegal string values
588 // treat const UChar *srcChars==NULL as an empty string
589 if(isBogus()) {
590 return -1;
591 }
592
593 // pin indices to legal values
594 pinIndices(start, length);
595
596 if(srcChars == NULL) {
597 srcStart = srcLength = 0;
598 }
599
600 int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
601 /* translate the 32-bit result into an 8-bit one */
602 if(diff!=0) {
603 return (int8_t)(diff >> 15 | 1);
604 } else {
605 return 0;
606 }
607 }
608
609 int32_t
610 UnicodeString::getLength() const {
611 return length();
612 }
613
614 UChar
615 UnicodeString::getCharAt(int32_t offset) const {
616 return charAt(offset);
617 }
618
619 UChar32
620 UnicodeString::getChar32At(int32_t offset) const {
621 return char32At(offset);
622 }
623
624 int32_t
625 UnicodeString::countChar32(int32_t start, int32_t length) const {
626 pinIndices(start, length);
627 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
628 return u_countChar32(getArrayStart()+start, length);
629 }
630
631 UBool
632 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
633 pinIndices(start, length);
634 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
635 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
636 }
637
638 int32_t
639 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
640 // pin index
641 int32_t len = length();
642 if(index<0) {
643 index=0;
644 } else if(index>len) {
645 index=len;
646 }
647
648 const UChar *array = getArrayStart();
649 if(delta>0) {
650 UTF_FWD_N(array, index, len, delta);
651 } else {
652 UTF_BACK_N(array, 0, index, -delta);
653 }
654
655 return index;
656 }
657
658 void
659 UnicodeString::doExtract(int32_t start,
660 int32_t length,
661 UChar *dst,
662 int32_t dstStart) const
663 {
664 // pin indices to legal values
665 pinIndices(start, length);
666
667 // do not copy anything if we alias dst itself
668 const UChar *array = getArrayStart();
669 if(array + start != dst + dstStart) {
670 us_arrayCopy(array, start, dst, dstStart, length);
671 }
672 }
673
674 int32_t
675 UnicodeString::extract(UChar *dest, int32_t destCapacity,
676 UErrorCode &errorCode) const {
677 int32_t len = length();
678 if(U_SUCCESS(errorCode)) {
679 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
680 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
681 } else {
682 const UChar *array = getArrayStart();
683 if(len>0 && len<=destCapacity && array!=dest) {
684 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
685 }
686 return u_terminateUChars(dest, destCapacity, len, &errorCode);
687 }
688 }
689
690 return len;
691 }
692
693 int32_t
694 UnicodeString::extract(int32_t start,
695 int32_t length,
696 char *target,
697 int32_t targetCapacity,
698 enum EInvariant) const
699 {
700 // if the arguments are illegal, then do nothing
701 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
702 return 0;
703 }
704
705 // pin the indices to legal values
706 pinIndices(start, length);
707
708 if(length <= targetCapacity) {
709 u_UCharsToChars(getArrayStart() + start, target, length);
710 }
711 UErrorCode status = U_ZERO_ERROR;
712 return u_terminateChars(target, targetCapacity, length, &status);
713 }
714
715 void
716 UnicodeString::extractBetween(int32_t start,
717 int32_t limit,
718 UnicodeString& target) const {
719 pinIndex(start);
720 pinIndex(limit);
721 doExtract(start, limit - start, target);
722 }
723
724 int32_t
725 UnicodeString::indexOf(const UChar *srcChars,
726 int32_t srcStart,
727 int32_t srcLength,
728 int32_t start,
729 int32_t length) const
730 {
731 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
732 return -1;
733 }
734
735 // UnicodeString does not find empty substrings
736 if(srcLength < 0 && srcChars[srcStart] == 0) {
737 return -1;
738 }
739
740 // get the indices within bounds
741 pinIndices(start, length);
742
743 // find the first occurrence of the substring
744 const UChar *array = getArrayStart();
745 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
746 if(match == NULL) {
747 return -1;
748 } else {
749 return (int32_t)(match - array);
750 }
751 }
752
753 int32_t
754 UnicodeString::doIndexOf(UChar c,
755 int32_t start,
756 int32_t length) const
757 {
758 // pin indices
759 pinIndices(start, length);
760
761 // find the first occurrence of c
762 const UChar *array = getArrayStart();
763 const UChar *match = u_memchr(array + start, c, length);
764 if(match == NULL) {
765 return -1;
766 } else {
767 return (int32_t)(match - array);
768 }
769 }
770
771 int32_t
772 UnicodeString::doIndexOf(UChar32 c,
773 int32_t start,
774 int32_t length) const {
775 // pin indices
776 pinIndices(start, length);
777
778 // find the first occurrence of c
779 const UChar *array = getArrayStart();
780 const UChar *match = u_memchr32(array + start, c, length);
781 if(match == NULL) {
782 return -1;
783 } else {
784 return (int32_t)(match - array);
785 }
786 }
787
788 int32_t
789 UnicodeString::lastIndexOf(const UChar *srcChars,
790 int32_t srcStart,
791 int32_t srcLength,
792 int32_t start,
793 int32_t length) const
794 {
795 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
796 return -1;
797 }
798
799 // UnicodeString does not find empty substrings
800 if(srcLength < 0 && srcChars[srcStart] == 0) {
801 return -1;
802 }
803
804 // get the indices within bounds
805 pinIndices(start, length);
806
807 // find the last occurrence of the substring
808 const UChar *array = getArrayStart();
809 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
810 if(match == NULL) {
811 return -1;
812 } else {
813 return (int32_t)(match - array);
814 }
815 }
816
817 int32_t
818 UnicodeString::doLastIndexOf(UChar c,
819 int32_t start,
820 int32_t length) const
821 {
822 if(isBogus()) {
823 return -1;
824 }
825
826 // pin indices
827 pinIndices(start, length);
828
829 // find the last occurrence of c
830 const UChar *array = getArrayStart();
831 const UChar *match = u_memrchr(array + start, c, length);
832 if(match == NULL) {
833 return -1;
834 } else {
835 return (int32_t)(match - array);
836 }
837 }
838
839 int32_t
840 UnicodeString::doLastIndexOf(UChar32 c,
841 int32_t start,
842 int32_t length) const {
843 // pin indices
844 pinIndices(start, length);
845
846 // find the last occurrence of c
847 const UChar *array = getArrayStart();
848 const UChar *match = u_memrchr32(array + start, c, length);
849 if(match == NULL) {
850 return -1;
851 } else {
852 return (int32_t)(match - array);
853 }
854 }
855
856 //========================================
857 // Write implementation
858 //========================================
859
860 UnicodeString&
861 UnicodeString::findAndReplace(int32_t start,
862 int32_t length,
863 const UnicodeString& oldText,
864 int32_t oldStart,
865 int32_t oldLength,
866 const UnicodeString& newText,
867 int32_t newStart,
868 int32_t newLength)
869 {
870 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
871 return *this;
872 }
873
874 pinIndices(start, length);
875 oldText.pinIndices(oldStart, oldLength);
876 newText.pinIndices(newStart, newLength);
877
878 if(oldLength == 0) {
879 return *this;
880 }
881
882 while(length > 0 && length >= oldLength) {
883 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
884 if(pos < 0) {
885 // no more oldText's here: done
886 break;
887 } else {
888 // we found oldText, replace it by newText and go beyond it
889 replace(pos, oldLength, newText, newStart, newLength);
890 length -= pos + oldLength - start;
891 start = pos + newLength;
892 }
893 }
894
895 return *this;
896 }
897
898
899 void
900 UnicodeString::setToBogus()
901 {
902 releaseArray();
903
904 fShortLength = 0;
905 fUnion.fFields.fArray = 0;
906 fUnion.fFields.fCapacity = 0;
907 fFlags = kIsBogus;
908 }
909
910 // turn a bogus string into an empty one
911 void
912 UnicodeString::unBogus() {
913 if(fFlags & kIsBogus) {
914 setToEmpty();
915 }
916 }
917
918 // setTo() analogous to the readonly-aliasing constructor with the same signature
919 UnicodeString &
920 UnicodeString::setTo(UBool isTerminated,
921 const UChar *text,
922 int32_t textLength)
923 {
924 if(fFlags & kOpenGetBuffer) {
925 // do not modify a string that has an "open" getBuffer(minCapacity)
926 return *this;
927 }
928
929 if(text == NULL) {
930 // treat as an empty string, do not alias
931 releaseArray();
932 setToEmpty();
933 return *this;
934 }
935
936 if( textLength < -1 ||
937 (textLength == -1 && !isTerminated) ||
938 (textLength >= 0 && isTerminated && text[textLength] != 0)
939 ) {
940 setToBogus();
941 return *this;
942 }
943
944 releaseArray();
945
946 if(textLength == -1) {
947 // text is terminated, or else it would have failed the above test
948 textLength = u_strlen(text);
949 }
950 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
951
952 fFlags = kReadonlyAlias;
953 return *this;
954 }
955
956 // setTo() analogous to the writable-aliasing constructor with the same signature
957 UnicodeString &
958 UnicodeString::setTo(UChar *buffer,
959 int32_t buffLength,
960 int32_t buffCapacity) {
961 if(fFlags & kOpenGetBuffer) {
962 // do not modify a string that has an "open" getBuffer(minCapacity)
963 return *this;
964 }
965
966 if(buffer == NULL) {
967 // treat as an empty string, do not alias
968 releaseArray();
969 setToEmpty();
970 return *this;
971 }
972
973 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
974 setToBogus();
975 return *this;
976 } else if(buffLength == -1) {
977 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
978 const UChar *p = buffer, *limit = buffer + buffCapacity;
979 while(p != limit && *p != 0) {
980 ++p;
981 }
982 buffLength = (int32_t)(p - buffer);
983 }
984
985 releaseArray();
986
987 setArray(buffer, buffLength, buffCapacity);
988 fFlags = kWritableAlias;
989 return *this;
990 }
991
992 UnicodeString&
993 UnicodeString::setCharAt(int32_t offset,
994 UChar c)
995 {
996 int32_t len = length();
997 if(cloneArrayIfNeeded() && len > 0) {
998 if(offset < 0) {
999 offset = 0;
1000 } else if(offset >= len) {
1001 offset = len - 1;
1002 }
1003
1004 getArrayStart()[offset] = c;
1005 }
1006 return *this;
1007 }
1008
1009 UnicodeString&
1010 UnicodeString::doReplace( int32_t start,
1011 int32_t length,
1012 const UnicodeString& src,
1013 int32_t srcStart,
1014 int32_t srcLength)
1015 {
1016 if(!src.isBogus()) {
1017 // pin the indices to legal values
1018 src.pinIndices(srcStart, srcLength);
1019
1020 // get the characters from src
1021 // and replace the range in ourselves with them
1022 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1023 } else {
1024 // remove the range
1025 return doReplace(start, length, 0, 0, 0);
1026 }
1027 }
1028
1029 UnicodeString&
1030 UnicodeString::doReplace(int32_t start,
1031 int32_t length,
1032 const UChar *srcChars,
1033 int32_t srcStart,
1034 int32_t srcLength)
1035 {
1036 if(!isWritable()) {
1037 return *this;
1038 }
1039
1040 if(srcChars == 0) {
1041 srcStart = srcLength = 0;
1042 } else if(srcLength < 0) {
1043 // get the srcLength if necessary
1044 srcLength = u_strlen(srcChars + srcStart);
1045 }
1046
1047 int32_t oldLength = this->length();
1048
1049 // calculate the size of the string after the replace
1050 int32_t newSize;
1051
1052 // optimize append() onto a large-enough, owned string
1053 if(start >= oldLength) {
1054 newSize = oldLength + srcLength;
1055 if(newSize <= getCapacity() && isBufferWritable()) {
1056 us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength);
1057 setLength(newSize);
1058 return *this;
1059 } else {
1060 // pin the indices to legal values
1061 start = oldLength;
1062 length = 0;
1063 }
1064 } else {
1065 // pin the indices to legal values
1066 pinIndices(start, length);
1067
1068 newSize = oldLength - length + srcLength;
1069 }
1070
1071 // the following may change fArray but will not copy the current contents;
1072 // therefore we need to keep the current fArray
1073 UChar oldStackBuffer[US_STACKBUF_SIZE];
1074 UChar *oldArray;
1075 if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) {
1076 // copy the stack buffer contents because it will be overwritten with
1077 // fUnion.fFields values
1078 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1079 oldArray = oldStackBuffer;
1080 } else {
1081 oldArray = getArrayStart();
1082 }
1083
1084 // clone our array and allocate a bigger array if needed
1085 int32_t *bufferToDelete = 0;
1086 if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
1087 FALSE, &bufferToDelete)
1088 ) {
1089 return *this;
1090 }
1091
1092 // now do the replace
1093
1094 UChar *newArray = getArrayStart();
1095 if(newArray != oldArray) {
1096 // if fArray changed, then we need to copy everything except what will change
1097 us_arrayCopy(oldArray, 0, newArray, 0, start);
1098 us_arrayCopy(oldArray, start + length,
1099 newArray, start + srcLength,
1100 oldLength - (start + length));
1101 } else if(length != srcLength) {
1102 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1103 us_arrayCopy(oldArray, start + length,
1104 newArray, start + srcLength,
1105 oldLength - (start + length));
1106 }
1107
1108 // now fill in the hole with the new string
1109 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1110
1111 setLength(newSize);
1112
1113 // delayed delete in case srcChars == fArray when we started, and
1114 // to keep oldArray alive for the above operations
1115 if (bufferToDelete) {
1116 uprv_free(bufferToDelete);
1117 }
1118
1119 return *this;
1120 }
1121
1122 /**
1123 * Replaceable API
1124 */
1125 void
1126 UnicodeString::handleReplaceBetween(int32_t start,
1127 int32_t limit,
1128 const UnicodeString& text) {
1129 replaceBetween(start, limit, text);
1130 }
1131
1132 /**
1133 * Replaceable API
1134 */
1135 void
1136 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1137 if (limit <= start) {
1138 return; // Nothing to do; avoid bogus malloc call
1139 }
1140 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1141 // Check to make sure text is not null.
1142 if (text != NULL) {
1143 extractBetween(start, limit, text, 0);
1144 insert(dest, text, 0, limit - start);
1145 uprv_free(text);
1146 }
1147 }
1148
1149 /**
1150 * Replaceable API
1151 *
1152 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1153 * so we implement this function here.
1154 */
1155 UBool Replaceable::hasMetaData() const {
1156 return TRUE;
1157 }
1158
1159 /**
1160 * Replaceable API
1161 */
1162 UBool UnicodeString::hasMetaData() const {
1163 return FALSE;
1164 }
1165
1166 UnicodeString&
1167 UnicodeString::doReverse(int32_t start,
1168 int32_t length)
1169 {
1170 if(this->length() <= 1 || !cloneArrayIfNeeded()) {
1171 return *this;
1172 }
1173
1174 // pin the indices to legal values
1175 pinIndices(start, length);
1176
1177 UChar *left = getArrayStart() + start;
1178 UChar *right = left + length;
1179 UChar swap;
1180 UBool hasSupplementary = FALSE;
1181
1182 while(left < --right) {
1183 hasSupplementary |= (UBool)UTF_IS_LEAD(swap = *left);
1184 hasSupplementary |= (UBool)UTF_IS_LEAD(*left++ = *right);
1185 *right = swap;
1186 }
1187
1188 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1189 if(hasSupplementary) {
1190 UChar swap2;
1191
1192 left = getArrayStart() + start;
1193 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1194 while(left < right) {
1195 if(UTF_IS_TRAIL(swap = *left) && UTF_IS_LEAD(swap2 = *(left + 1))) {
1196 *left++ = swap2;
1197 *left++ = swap;
1198 } else {
1199 ++left;
1200 }
1201 }
1202 }
1203
1204 return *this;
1205 }
1206
1207 UBool
1208 UnicodeString::padLeading(int32_t targetLength,
1209 UChar padChar)
1210 {
1211 int32_t oldLength = length();
1212 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1213 return FALSE;
1214 } else {
1215 // move contents up by padding width
1216 UChar *array = getArrayStart();
1217 int32_t start = targetLength - oldLength;
1218 us_arrayCopy(array, 0, array, start, oldLength);
1219
1220 // fill in padding character
1221 while(--start >= 0) {
1222 array[start] = padChar;
1223 }
1224 setLength(targetLength);
1225 return TRUE;
1226 }
1227 }
1228
1229 UBool
1230 UnicodeString::padTrailing(int32_t targetLength,
1231 UChar padChar)
1232 {
1233 int32_t oldLength = length();
1234 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1235 return FALSE;
1236 } else {
1237 // fill in padding character
1238 UChar *array = getArrayStart();
1239 int32_t length = targetLength;
1240 while(--length >= oldLength) {
1241 array[length] = padChar;
1242 }
1243 setLength(targetLength);
1244 return TRUE;
1245 }
1246 }
1247
1248 //========================================
1249 // Hashing
1250 //========================================
1251 int32_t
1252 UnicodeString::doHashCode() const
1253 {
1254 /* Delegate hash computation to uhash. This makes UnicodeString
1255 * hashing consistent with UChar* hashing. */
1256 int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
1257 if (hashCode == kInvalidHashCode) {
1258 hashCode = kEmptyHashCode;
1259 }
1260 return hashCode;
1261 }
1262
1263 //========================================
1264 // External Buffer
1265 //========================================
1266
1267 UChar *
1268 UnicodeString::getBuffer(int32_t minCapacity) {
1269 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1270 fFlags|=kOpenGetBuffer;
1271 fShortLength=0;
1272 return getArrayStart();
1273 } else {
1274 return 0;
1275 }
1276 }
1277
1278 void
1279 UnicodeString::releaseBuffer(int32_t newLength) {
1280 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1281 // set the new fLength
1282 int32_t capacity=getCapacity();
1283 if(newLength==-1) {
1284 // the new length is the string length, capped by fCapacity
1285 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1286 while(p<limit && *p!=0) {
1287 ++p;
1288 }
1289 newLength=(int32_t)(p-array);
1290 } else if(newLength>capacity) {
1291 newLength=capacity;
1292 }
1293 setLength(newLength);
1294 fFlags&=~kOpenGetBuffer;
1295 }
1296 }
1297
1298 //========================================
1299 // Miscellaneous
1300 //========================================
1301 UBool
1302 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1303 int32_t growCapacity,
1304 UBool doCopyArray,
1305 int32_t **pBufferToDelete,
1306 UBool forceClone) {
1307 // default parameters need to be static, therefore
1308 // the defaults are -1 to have convenience defaults
1309 if(newCapacity == -1) {
1310 newCapacity = getCapacity();
1311 }
1312
1313 // while a getBuffer(minCapacity) is "open",
1314 // prevent any modifications of the string by returning FALSE here
1315 // if the string is bogus, then only an assignment or similar can revive it
1316 if(!isWritable()) {
1317 return FALSE;
1318 }
1319
1320 /*
1321 * We need to make a copy of the array if
1322 * the buffer is read-only, or
1323 * the buffer is refCounted (shared), and refCount>1, or
1324 * the buffer is too small.
1325 * Return FALSE if memory could not be allocated.
1326 */
1327 if(forceClone ||
1328 fFlags & kBufferIsReadonly ||
1329 fFlags & kRefCounted && refCount() > 1 ||
1330 newCapacity > getCapacity()
1331 ) {
1332 // check growCapacity for default value and use of the stack buffer
1333 if(growCapacity == -1) {
1334 growCapacity = newCapacity;
1335 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1336 growCapacity = US_STACKBUF_SIZE;
1337 }
1338
1339 // save old values
1340 UChar oldStackBuffer[US_STACKBUF_SIZE];
1341 UChar *oldArray;
1342 uint8_t flags = fFlags;
1343
1344 if(flags&kUsingStackBuffer) {
1345 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1346 // copy the stack buffer contents because it will be overwritten with
1347 // fUnion.fFields values
1348 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1349 oldArray = oldStackBuffer;
1350 } else {
1351 oldArray = 0; // no need to copy from stack buffer to itself
1352 }
1353 } else {
1354 oldArray = fUnion.fFields.fArray;
1355 }
1356
1357 // allocate a new array
1358 if(allocate(growCapacity) ||
1359 newCapacity < growCapacity && allocate(newCapacity)
1360 ) {
1361 if(doCopyArray && oldArray != 0) {
1362 // copy the contents
1363 // do not copy more than what fits - it may be smaller than before
1364 int32_t minLength = length();
1365 newCapacity = getCapacity();
1366 if(newCapacity < minLength) {
1367 minLength = newCapacity;
1368 setLength(minLength);
1369 }
1370 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1371 } else {
1372 fShortLength = 0;
1373 }
1374
1375 // release the old array
1376 if(flags & kRefCounted) {
1377 // the array is refCounted; decrement and release if 0
1378 int32_t *pRefCount = ((int32_t *)oldArray - 1);
1379 if(umtx_atomic_dec(pRefCount) == 0) {
1380 if(pBufferToDelete == 0) {
1381 uprv_free(pRefCount);
1382 } else {
1383 // the caller requested to delete it himself
1384 *pBufferToDelete = pRefCount;
1385 }
1386 }
1387 }
1388 } else {
1389 // not enough memory for growCapacity and not even for the smaller newCapacity
1390 // reset the old values for setToBogus() to release the array
1391 if(!(flags&kUsingStackBuffer)) {
1392 fUnion.fFields.fArray = oldArray;
1393 }
1394 fFlags = flags;
1395 setToBogus();
1396 return FALSE;
1397 }
1398 }
1399 return TRUE;
1400 }
1401 U_NAMESPACE_END
1402
1403 #ifdef U_STATIC_IMPLEMENTATION
1404 /*
1405 This should never be called. It is defined here to make sure that the
1406 virtual vector deleting destructor is defined within unistr.cpp.
1407 The vector deleting destructor is already a part of UObject,
1408 but defining it here makes sure that it is included with this object file.
1409 This makes sure that static library dependencies are kept to a minimum.
1410 */
1411 static void uprv_UnicodeStringDummy(void) {
1412 U_NAMESPACE_USE
1413 delete [] (new UnicodeString[2]);
1414 }
1415 #endif
1416