]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unistr.cpp
ICU-8.11.1.tar.gz
[apple/icu.git] / icuSources / common / unistr.cpp
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
73c04bcf 3* Copyright (C) 1999-2005, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5******************************************************************************
6*
7* File unistr.cpp
8*
9* Modification History:
10*
11* Date Name Description
12* 09/25/98 stephen Creation.
13* 04/20/99 stephen Overhauled per 4/16 code review.
14* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15* 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16* Replaceable.
17* 06/25/01 grhoten Removed the dependency on iostream
18******************************************************************************
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/putil.h"
b75a7d8f
A
23#include "cstring.h"
24#include "cmemory.h"
25#include "unicode/ustring.h"
26#include "unicode/unistr.h"
b75a7d8f
A
27#include "uhash.h"
28#include "ustr_imp.h"
b75a7d8f
A
29#include "umutex.h"
30
31#if 0
32
33#if U_IOSTREAM_SOURCE >= 199711
34#include <iostream>
35using namespace std;
36#elif U_IOSTREAM_SOURCE >= 198506
37#include <iostream.h>
38#endif
39
40//DEBUGGING
41void
42print(const UnicodeString& s,
43 const char *name)
44{
45 UChar c;
46 cout << name << ":|";
47 for(int i = 0; i < s.length(); ++i) {
48 c = s[i];
49 if(c>= 0x007E || c < 0x0020)
50 cout << "[0x" << hex << s[i] << "]";
51 else
52 cout << (char) s[i];
53 }
54 cout << '|' << endl;
55}
56
57void
58print(const UChar *s,
59 int32_t len,
60 const char *name)
61{
62 UChar c;
63 cout << name << ":|";
64 for(int i = 0; i < len; ++i) {
65 c = s[i];
66 if(c>= 0x007E || c < 0x0020)
67 cout << "[0x" << hex << s[i] << "]";
68 else
69 cout << (char) s[i];
70 }
71 cout << '|' << endl;
72}
73// END DEBUGGING
74#endif
75
76// Local function definitions for now
77
78// need to copy areas that may overlap
79static
80inline void
81us_arrayCopy(const UChar *src, int32_t srcStart,
82 UChar *dst, int32_t dstStart, int32_t count)
83{
84 if(count>0) {
85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86 }
87}
88
89// u_unescapeAt() callback to get a UChar from a UnicodeString
90U_CDECL_BEGIN
91static UChar U_CALLCONV
92UnicodeString_charAt(int32_t offset, void *context) {
93 return ((UnicodeString*) context)->charAt(offset);
94}
95U_CDECL_END
96
97U_NAMESPACE_BEGIN
98
374ca955
A
99/* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
101*/
102Replaceable::~Replaceable() {}
103Replaceable::Replaceable() {}
104UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
105
106UnicodeString U_EXPORT2
107operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108 return
109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110 append(s1).
111 append(s2);
112}
b75a7d8f
A
113
114//========================================
115// Reference Counting functions, put at top of file so that optimizing compilers
116// have a chance to automatically inline.
117//========================================
118
119void
120UnicodeString::addRef()
121{ umtx_atomic_inc((int32_t *)fArray - 1);}
122
123int32_t
124UnicodeString::removeRef()
125{ return umtx_atomic_dec((int32_t *)fArray - 1);}
126
127int32_t
128UnicodeString::refCount() const
129{
130 umtx_lock(NULL);
131 // Note: without the lock to force a memory barrier, we might see a very
132 // stale value on some multi-processor systems.
133 int32_t count = *((int32_t *)fArray - 1);
134 umtx_unlock(NULL);
135 return count;
136 }
137
138void
139UnicodeString::releaseArray() {
140 if((fFlags & kRefCounted) && removeRef() == 0) {
141 uprv_free((int32_t *)fArray - 1);
142 }
143}
144
145
146
147//========================================
148// Constructors
149//========================================
150UnicodeString::UnicodeString()
151 : fLength(0),
152 fCapacity(US_STACKBUF_SIZE),
153 fArray(fStackBuffer),
154 fFlags(kShortString)
155{}
156
157UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
158 : fLength(0),
159 fCapacity(US_STACKBUF_SIZE),
160 fArray(0),
161 fFlags(0)
162{
163 if(count <= 0 || (uint32_t)c > 0x10ffff) {
164 // just allocate and do not do anything else
165 allocate(capacity);
166 } else {
167 // count > 0, allocate and fill the new string with count c's
168 int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
169 if(capacity < length) {
170 capacity = length;
171 }
172 if(allocate(capacity)) {
173 int32_t i = 0;
174
175 // fill the new string with c
176 if(unitCount == 1) {
177 // fill with length UChars
178 while(i < length) {
179 fArray[i++] = (UChar)c;
180 }
181 } else {
182 // get the code units for c
183 UChar units[UTF_MAX_CHAR_LENGTH];
184 UTF_APPEND_CHAR_UNSAFE(units, i, c);
185
186 // now it must be i==unitCount
187 i = 0;
188
189 // for Unicode, unitCount can only be 1, 2, 3, or 4
190 // 1 is handled above
191 while(i < length) {
192 int32_t unitIdx = 0;
193 while(unitIdx < unitCount) {
194 fArray[i++]=units[unitIdx++];
195 }
196 }
197 }
198 }
199 fLength = length;
200 }
201}
202
203UnicodeString::UnicodeString(UChar ch)
204 : fLength(1),
205 fCapacity(US_STACKBUF_SIZE),
206 fArray(fStackBuffer),
207 fFlags(kShortString)
208{
209 fStackBuffer[0] = ch;
210}
211
212UnicodeString::UnicodeString(UChar32 ch)
213 : fLength(1),
214 fCapacity(US_STACKBUF_SIZE),
215 fArray(fStackBuffer),
216 fFlags(kShortString)
217{
218 int32_t i = 0;
219 UBool isError = FALSE;
220 U16_APPEND(fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
221 fLength = i;
222}
223
224UnicodeString::UnicodeString(const UChar *text)
225 : fLength(0),
226 fCapacity(US_STACKBUF_SIZE),
227 fArray(fStackBuffer),
228 fFlags(kShortString)
229{
230 doReplace(0, 0, text, 0, -1);
231}
232
233UnicodeString::UnicodeString(const UChar *text,
234 int32_t textLength)
235 : fLength(0),
236 fCapacity(US_STACKBUF_SIZE),
237 fArray(fStackBuffer),
238 fFlags(kShortString)
239{
240 doReplace(0, 0, text, 0, textLength);
241}
242
243UnicodeString::UnicodeString(UBool isTerminated,
244 const UChar *text,
245 int32_t textLength)
246 : fLength(textLength),
247 fCapacity(isTerminated ? textLength + 1 : textLength),
248 fArray((UChar *)text),
249 fFlags(kReadonlyAlias)
250{
251 if(text == NULL) {
252 // treat as an empty string, do not alias
253 fLength = 0;
254 fCapacity = US_STACKBUF_SIZE;
255 fArray = fStackBuffer;
256 fFlags = kShortString;
257 } else if(textLength < -1 ||
258 (textLength == -1 && !isTerminated) ||
259 (textLength >= 0 && isTerminated && text[textLength] != 0)
260 ) {
261 setToBogus();
262 } else if(textLength == -1) {
263 // text is terminated, or else it would have failed the above test
264 fLength = u_strlen(text);
265 fCapacity = fLength + 1;
266 }
267}
268
269UnicodeString::UnicodeString(UChar *buff,
270 int32_t buffLength,
271 int32_t buffCapacity)
272 : fLength(buffLength),
273 fCapacity(buffCapacity),
274 fArray(buff),
275 fFlags(kWritableAlias)
276{
277 if(buff == NULL) {
278 // treat as an empty string, do not alias
279 fLength = 0;
280 fCapacity = US_STACKBUF_SIZE;
281 fArray = fStackBuffer;
282 fFlags = kShortString;
374ca955 283 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
b75a7d8f
A
284 setToBogus();
285 } else if(buffLength == -1) {
286 // fLength = u_strlen(buff); but do not look beyond buffCapacity
287 const UChar *p = buff, *limit = buff + buffCapacity;
288 while(p != limit && *p != 0) {
289 ++p;
290 }
291 fLength = (int32_t)(p - buff);
292 }
293}
294
374ca955 295UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
b75a7d8f
A
296 : fLength(0),
297 fCapacity(US_STACKBUF_SIZE),
298 fArray(fStackBuffer),
299 fFlags(kShortString)
300{
374ca955
A
301 if(src==NULL) {
302 // treat as an empty string
303 } else {
304 if(length<0) {
73c04bcf 305 length=(int32_t)uprv_strlen(src);
b75a7d8f 306 }
374ca955
A
307 if(cloneArrayIfNeeded(length, length, FALSE)) {
308 u_charsToUChars(src, getArrayStart(), length);
309 fLength = length;
310 } else {
b75a7d8f
A
311 setToBogus();
312 }
313 }
314}
315
316UnicodeString::UnicodeString(const UnicodeString& that)
317 : Replaceable(),
318 fLength(0),
319 fCapacity(US_STACKBUF_SIZE),
320 fArray(fStackBuffer),
321 fFlags(kShortString)
322{
323 copyFrom(that);
324}
325
326UnicodeString::UnicodeString(const UnicodeString& that,
327 int32_t srcStart)
328 : Replaceable(),
329 fLength(0),
330 fCapacity(US_STACKBUF_SIZE),
331 fArray(fStackBuffer),
332 fFlags(kShortString)
333{
334 setTo(that, srcStart);
335}
336
337UnicodeString::UnicodeString(const UnicodeString& that,
338 int32_t srcStart,
339 int32_t srcLength)
340 : Replaceable(),
341 fLength(0),
342 fCapacity(US_STACKBUF_SIZE),
343 fArray(fStackBuffer),
344 fFlags(kShortString)
345{
346 setTo(that, srcStart, srcLength);
347}
348
349// Replaceable base class clone() default implementation, does not clone
350Replaceable *
351Replaceable::clone() const {
352 return NULL;
353}
354
355// UnicodeString overrides clone() with a real implementation
356Replaceable *
357UnicodeString::clone() const {
358 return new UnicodeString(*this);
359}
360
361//========================================
362// array allocation
363//========================================
364
365UBool
366UnicodeString::allocate(int32_t capacity) {
367 if(capacity <= US_STACKBUF_SIZE) {
368 fArray = fStackBuffer;
369 fCapacity = US_STACKBUF_SIZE;
370 fFlags = kShortString;
371 } else {
372 // count bytes for the refCounter and the string capacity, and
373 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
374 // to be safely aligned for the refCount
375 int32_t words = (int32_t)(((sizeof(int32_t) + capacity * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
376 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
377 if(array != 0) {
378 // set initial refCount and point behind the refCount
379 *array++ = 1;
380
381 // have fArray point to the first UChar
382 fArray = (UChar *)array;
383 fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
384 fFlags = kLongString;
385 } else {
386 fLength = 0;
387 fCapacity = 0;
388 fFlags = kIsBogus;
389 return FALSE;
390 }
391 }
392 return TRUE;
393}
394
395//========================================
396// Destructor
397//========================================
398UnicodeString::~UnicodeString()
399{
400 releaseArray();
401}
402
403
404//========================================
405// Assignment
406//========================================
407
408UnicodeString &
409UnicodeString::operator=(const UnicodeString &src) {
410 return copyFrom(src);
411}
412
413UnicodeString &
414UnicodeString::fastCopyFrom(const UnicodeString &src) {
415 return copyFrom(src, TRUE);
416}
417
418UnicodeString &
419UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
420 // if assigning to ourselves, do nothing
421 if(this == 0 || this == &src) {
422 return *this;
423 }
424
425 // is the right side bogus?
426 if(&src == 0 || src.isBogus()) {
427 setToBogus();
428 return *this;
429 }
430
431 // delete the current contents
432 releaseArray();
433
434 // we always copy the length
435 fLength = src.fLength;
436 if(fLength == 0) {
437 // empty string - use the stack buffer
438 fArray = fStackBuffer;
439 fCapacity = US_STACKBUF_SIZE;
440 fFlags = kShortString;
441 return *this;
442 }
443
444 // fLength>0 and not an "open" src.getBuffer(minCapacity)
445 switch(src.fFlags) {
446 case kShortString:
447 // short string using the stack buffer, do the same
448 fArray = fStackBuffer;
449 fCapacity = US_STACKBUF_SIZE;
450 fFlags = kShortString;
451 uprv_memcpy(fStackBuffer, src.fArray, fLength * U_SIZEOF_UCHAR);
452 break;
453 case kLongString:
454 // src uses a refCounted string buffer, use that buffer with refCount
455 // src is const, use a cast - we don't really change it
456 ((UnicodeString &)src).addRef();
457 // copy all fields, share the reference-counted buffer
458 fArray = src.fArray;
459 fCapacity = src.fCapacity;
460 fFlags = src.fFlags;
461 break;
462 case kReadonlyAlias:
463 if(fastCopy) {
464 // src is a readonly alias, do the same
465 // -> maintain the readonly alias as such
466 fArray = src.fArray;
467 fCapacity = src.fCapacity;
468 fFlags = src.fFlags;
469 break;
470 }
471 // else if(!fastCopy) fall through to case kWritableAlias
472 // -> allocate a new buffer and copy the contents
473 case kWritableAlias:
474 // src is a writable alias; we make a copy of that instead
475 if(allocate(fLength)) {
476 uprv_memcpy(fArray, src.fArray, fLength * U_SIZEOF_UCHAR);
477 break;
478 }
479 // if there is not enough memory, then fall through to setting to bogus
480 default:
481 // if src is bogus, set ourselves to bogus
482 // do not call setToBogus() here because fArray and fFlags are not consistent here
483 fArray = 0;
484 fLength = 0;
485 fCapacity = 0;
486 fFlags = kIsBogus;
487 break;
488 }
489
490 return *this;
491}
492
493//========================================
494// Miscellaneous operations
495//========================================
496
497UnicodeString UnicodeString::unescape() const {
498 UnicodeString result;
499 for (int32_t i=0; i<length(); ) {
500 UChar32 c = charAt(i++);
501 if (c == 0x005C /*'\\'*/) {
502 c = unescapeAt(i); // advances i
503 if (c == (UChar32)0xFFFFFFFF) {
504 result.remove(); // return empty string
505 break; // invalid escape sequence
506 }
507 }
508 result.append(c);
509 }
510 return result;
511}
512
513UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
514 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
515}
516
517//========================================
518// Read-only implementation
519//========================================
520int8_t
521UnicodeString::doCompare( int32_t start,
522 int32_t length,
523 const UChar *srcChars,
524 int32_t srcStart,
525 int32_t srcLength) const
526{
527 // compare illegal string values
528 // treat const UChar *srcChars==NULL as an empty string
529 if(isBogus()) {
530 return -1;
531 }
532
533 // pin indices to legal values
534 pinIndices(start, length);
535
536 if(srcChars == NULL) {
537 srcStart = srcLength = 0;
538 }
539
540 // get the correct pointer
541 const UChar *chars = getArrayStart();
542
543 chars += start;
544 srcChars += srcStart;
545
546 int32_t minLength;
547 int8_t lengthResult;
548
549 // get the srcLength if necessary
550 if(srcLength < 0) {
551 srcLength = u_strlen(srcChars + srcStart);
552 }
553
554 // are we comparing different lengths?
555 if(length != srcLength) {
556 if(length < srcLength) {
557 minLength = length;
558 lengthResult = -1;
559 } else {
560 minLength = srcLength;
561 lengthResult = 1;
562 }
563 } else {
564 minLength = length;
565 lengthResult = 0;
566 }
567
568 /*
569 * note that uprv_memcmp() returns an int but we return an int8_t;
570 * we need to take care not to truncate the result -
571 * one way to do this is to right-shift the value to
572 * move the sign bit into the lower 8 bits and making sure that this
573 * does not become 0 itself
574 */
575
576 if(minLength > 0 && chars != srcChars) {
577 int32_t result;
578
579# if U_IS_BIG_ENDIAN
580 // big-endian: byte comparison works
581 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
582 if(result != 0) {
583 return (int8_t)(result >> 15 | 1);
584 }
585# else
586 // little-endian: compare UChar units
587 do {
588 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
589 if(result != 0) {
590 return (int8_t)(result >> 15 | 1);
591 }
592 } while(--minLength > 0);
593# endif
594 }
595 return lengthResult;
596}
597
598/* String compare in code point order - doCompare() compares in code unit order. */
599int8_t
600UnicodeString::doCompareCodePointOrder(int32_t start,
601 int32_t length,
602 const UChar *srcChars,
603 int32_t srcStart,
604 int32_t srcLength) const
605{
606 // compare illegal string values
607 // treat const UChar *srcChars==NULL as an empty string
608 if(isBogus()) {
609 return -1;
610 }
611
612 // pin indices to legal values
613 pinIndices(start, length);
614
615 if(srcChars == NULL) {
616 srcStart = srcLength = 0;
617 }
618
619 int32_t diff = uprv_strCompare(fArray + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
620 /* translate the 32-bit result into an 8-bit one */
621 if(diff!=0) {
622 return (int8_t)(diff >> 15 | 1);
623 } else {
624 return 0;
625 }
626}
627
b75a7d8f
A
628int32_t
629UnicodeString::getLength() const {
630 return length();
631}
632
633UChar
634UnicodeString::getCharAt(int32_t offset) const {
635 return charAt(offset);
636}
637
638UChar32
639UnicodeString::getChar32At(int32_t offset) const {
640 return char32At(offset);
641}
642
643int32_t
644UnicodeString::countChar32(int32_t start, int32_t length) const {
645 pinIndices(start, length);
646 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
647 return u_countChar32(fArray+start, length);
648}
649
650UBool
651UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
652 pinIndices(start, length);
653 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
654 return u_strHasMoreChar32Than(fArray+start, length, number);
655}
656
657int32_t
658UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
659 // pin index
660 if(index<0) {
661 index=0;
662 } else if(index>fLength) {
663 index=fLength;
664 }
665
666 if(delta>0) {
667 UTF_FWD_N(fArray, index, fLength, delta);
668 } else {
669 UTF_BACK_N(fArray, 0, index, -delta);
670 }
671
672 return index;
673}
674
675void
676UnicodeString::doExtract(int32_t start,
677 int32_t length,
678 UChar *dst,
679 int32_t dstStart) const
680{
681 // pin indices to legal values
682 pinIndices(start, length);
683
684 // do not copy anything if we alias dst itself
685 if(fArray + start != dst + dstStart) {
686 us_arrayCopy(getArrayStart(), start, dst, dstStart, length);
687 }
688}
689
690int32_t
691UnicodeString::extract(UChar *dest, int32_t destCapacity,
692 UErrorCode &errorCode) const {
693 if(U_SUCCESS(errorCode)) {
694 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
695 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
696 } else {
697 if(fLength>0 && fLength<=destCapacity && fArray!=dest) {
698 uprv_memcpy(dest, fArray, fLength*U_SIZEOF_UCHAR);
699 }
700 return u_terminateUChars(dest, destCapacity, fLength, &errorCode);
701 }
702 }
703
704 return fLength;
705}
706
374ca955
A
707int32_t
708UnicodeString::extract(int32_t start,
709 int32_t length,
710 char *target,
711 int32_t targetCapacity,
712 enum EInvariant) const
713{
714 // if the arguments are illegal, then do nothing
715 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
716 return 0;
717 }
718
719 // pin the indices to legal values
720 pinIndices(start, length);
721
722 if(length <= targetCapacity) {
723 u_UCharsToChars(getArrayStart() + start, target, length);
724 }
725 UErrorCode status = U_ZERO_ERROR;
726 return u_terminateChars(target, targetCapacity, length, &status);
727}
728
729void
730UnicodeString::extractBetween(int32_t start,
731 int32_t limit,
732 UnicodeString& target) const {
733 pinIndex(start);
734 pinIndex(limit);
735 doExtract(start, limit - start, target);
736}
737
b75a7d8f
A
738int32_t
739UnicodeString::indexOf(const UChar *srcChars,
740 int32_t srcStart,
741 int32_t srcLength,
742 int32_t start,
743 int32_t length) const
744{
745 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
746 return -1;
747 }
748
749 // UnicodeString does not find empty substrings
750 if(srcLength < 0 && srcChars[srcStart] == 0) {
751 return -1;
752 }
753
754 // get the indices within bounds
755 pinIndices(start, length);
756
757 // find the first occurrence of the substring
758 const UChar *match = u_strFindFirst(fArray + start, length, srcChars + srcStart, srcLength);
759 if(match == NULL) {
760 return -1;
761 } else {
73c04bcf 762 return (int32_t)(match - fArray);
b75a7d8f
A
763 }
764}
765
766int32_t
767UnicodeString::doIndexOf(UChar c,
768 int32_t start,
769 int32_t length) const
770{
771 // pin indices
772 pinIndices(start, length);
773
774 // find the first occurrence of c
775 const UChar *match = u_memchr(fArray + start, c, length);
776 if(match == NULL) {
777 return -1;
778 } else {
73c04bcf 779 return (int32_t)(match - fArray);
b75a7d8f
A
780 }
781}
782
783int32_t
784UnicodeString::doIndexOf(UChar32 c,
785 int32_t start,
786 int32_t length) const {
787 // pin indices
788 pinIndices(start, length);
789
790 // find the first occurrence of c
791 const UChar *match = u_memchr32(fArray + start, c, length);
792 if(match == NULL) {
793 return -1;
794 } else {
73c04bcf 795 return (int32_t)(match - fArray);
b75a7d8f
A
796 }
797}
798
799int32_t
800UnicodeString::lastIndexOf(const UChar *srcChars,
801 int32_t srcStart,
802 int32_t srcLength,
803 int32_t start,
804 int32_t length) const
805{
806 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
807 return -1;
808 }
809
810 // UnicodeString does not find empty substrings
811 if(srcLength < 0 && srcChars[srcStart] == 0) {
812 return -1;
813 }
814
815 // get the indices within bounds
816 pinIndices(start, length);
817
818 // find the last occurrence of the substring
819 const UChar *match = u_strFindLast(fArray + start, length, srcChars + srcStart, srcLength);
820 if(match == NULL) {
821 return -1;
822 } else {
73c04bcf 823 return (int32_t)(match - fArray);
b75a7d8f
A
824 }
825}
826
827int32_t
828UnicodeString::doLastIndexOf(UChar c,
829 int32_t start,
830 int32_t length) const
831{
832 if(isBogus()) {
833 return -1;
834 }
835
836 // pin indices
837 pinIndices(start, length);
838
839 // find the last occurrence of c
840 const UChar *match = u_memrchr(fArray + start, c, length);
841 if(match == NULL) {
842 return -1;
843 } else {
73c04bcf 844 return (int32_t)(match - fArray);
b75a7d8f
A
845 }
846}
847
848int32_t
849UnicodeString::doLastIndexOf(UChar32 c,
850 int32_t start,
851 int32_t length) const {
852 // pin indices
853 pinIndices(start, length);
854
855 // find the last occurrence of c
856 const UChar *match = u_memrchr32(fArray + start, c, length);
857 if(match == NULL) {
858 return -1;
859 } else {
73c04bcf 860 return (int32_t)(match - fArray);
b75a7d8f
A
861 }
862}
863
864//========================================
865// Write implementation
866//========================================
867
868UnicodeString&
869UnicodeString::findAndReplace(int32_t start,
870 int32_t length,
871 const UnicodeString& oldText,
872 int32_t oldStart,
873 int32_t oldLength,
874 const UnicodeString& newText,
875 int32_t newStart,
876 int32_t newLength)
877{
878 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
879 return *this;
880 }
881
882 pinIndices(start, length);
883 oldText.pinIndices(oldStart, oldLength);
884 newText.pinIndices(newStart, newLength);
885
886 if(oldLength == 0) {
887 return *this;
888 }
889
890 while(length > 0 && length >= oldLength) {
891 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
892 if(pos < 0) {
893 // no more oldText's here: done
894 break;
895 } else {
896 // we found oldText, replace it by newText and go beyond it
897 replace(pos, oldLength, newText, newStart, newLength);
898 length -= pos + oldLength - start;
899 start = pos + newLength;
900 }
901 }
902
903 return *this;
904}
905
906
907void
908UnicodeString::setToBogus()
909{
910 releaseArray();
911
912 fArray = 0;
913 fCapacity = fLength = 0;
914 fFlags = kIsBogus;
915}
916
917// turn a bogus string into an empty one
918void
919UnicodeString::unBogus() {
920 if(fFlags & kIsBogus) {
921 fArray = fStackBuffer;
922 fLength = 0;
923 fCapacity = US_STACKBUF_SIZE;
924 fFlags = kShortString;
925 }
926}
927
928// setTo() analogous to the readonly-aliasing constructor with the same signature
929UnicodeString &
930UnicodeString::setTo(UBool isTerminated,
931 const UChar *text,
932 int32_t textLength)
933{
934 if(fFlags & kOpenGetBuffer) {
935 // do not modify a string that has an "open" getBuffer(minCapacity)
936 return *this;
937 }
938
939 if(text == NULL) {
940 // treat as an empty string, do not alias
941 releaseArray();
942 fLength = 0;
943 fCapacity = US_STACKBUF_SIZE;
944 fArray = fStackBuffer;
945 fFlags = kShortString;
946 return *this;
947 }
948
949 if( textLength < -1 ||
950 (textLength == -1 && !isTerminated) ||
951 (textLength >= 0 && isTerminated && text[textLength] != 0)
952 ) {
953 setToBogus();
954 return *this;
955 }
956
957 releaseArray();
958
959 fArray = (UChar *)text;
960 if(textLength != -1) {
961 fLength = textLength;
962 fCapacity = isTerminated ? fLength + 1 : fLength;
963 } else {
964 // text is terminated, or else it would have failed the above test
965 fLength = u_strlen(text);
966 fCapacity = fLength + 1;
967 }
968
969 fFlags = kReadonlyAlias;
970 return *this;
971}
972
973// setTo() analogous to the writable-aliasing constructor with the same signature
974UnicodeString &
975UnicodeString::setTo(UChar *buffer,
976 int32_t buffLength,
977 int32_t buffCapacity) {
978 if(fFlags & kOpenGetBuffer) {
979 // do not modify a string that has an "open" getBuffer(minCapacity)
980 return *this;
981 }
982
983 if(buffer == NULL) {
984 // treat as an empty string, do not alias
985 releaseArray();
986 fLength = 0;
987 fCapacity = US_STACKBUF_SIZE;
988 fArray = fStackBuffer;
989 fFlags = kShortString;
990 return *this;
991 }
992
374ca955 993 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
b75a7d8f
A
994 setToBogus();
995 return *this;
374ca955
A
996 } else if(buffLength == -1) {
997 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
998 const UChar *p = buffer, *limit = buffer + buffCapacity;
999 while(p != limit && *p != 0) {
1000 ++p;
1001 }
1002 buffLength = (int32_t)(p - buffer);
b75a7d8f
A
1003 }
1004
1005 releaseArray();
1006
1007 fArray = buffer;
1008 fLength = buffLength;
1009 fCapacity = buffCapacity;
1010 fFlags = kWritableAlias;
1011 return *this;
1012}
1013
1014UnicodeString&
1015UnicodeString::setCharAt(int32_t offset,
1016 UChar c)
1017{
1018 if(cloneArrayIfNeeded() && fLength > 0) {
1019 if(offset < 0) {
1020 offset = 0;
1021 } else if(offset >= fLength) {
1022 offset = fLength - 1;
1023 }
1024
1025 fArray[offset] = c;
1026 }
1027 return *this;
1028}
1029
b75a7d8f
A
1030UnicodeString&
1031UnicodeString::doReplace( int32_t start,
1032 int32_t length,
1033 const UnicodeString& src,
1034 int32_t srcStart,
1035 int32_t srcLength)
1036{
1037 if(!src.isBogus()) {
1038 // pin the indices to legal values
1039 src.pinIndices(srcStart, srcLength);
1040
1041 // get the characters from src
1042 // and replace the range in ourselves with them
1043 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1044 } else {
1045 // remove the range
1046 return doReplace(start, length, 0, 0, 0);
1047 }
1048}
1049
1050UnicodeString&
1051UnicodeString::doReplace(int32_t start,
1052 int32_t length,
1053 const UChar *srcChars,
1054 int32_t srcStart,
1055 int32_t srcLength)
1056{
1057 if(isBogus()) {
1058 return *this;
1059 }
1060
1061 if(srcChars == 0) {
1062 srcStart = srcLength = 0;
1063 } else if(srcLength < 0) {
1064 // get the srcLength if necessary
1065 srcLength = u_strlen(srcChars + srcStart);
1066 }
1067
1068 int32_t *bufferToDelete = 0;
1069
1070 // the following may change fArray but will not copy the current contents;
1071 // therefore we need to keep the current fArray
1072 UChar *oldArray = fArray;
1073 int32_t oldLength = fLength;
1074
1075 // pin the indices to legal values
1076 pinIndices(start, length);
1077
1078 // calculate the size of the string after the replace
1079 int32_t newSize = oldLength - length + srcLength;
1080
1081 // clone our array and allocate a bigger array if needed
1082 if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
1083 FALSE, &bufferToDelete)
1084 ) {
1085 return *this;
1086 }
1087
1088 // now do the replace
1089
1090 if(fArray != oldArray) {
1091 // if fArray changed, then we need to copy everything except what will change
1092 us_arrayCopy(oldArray, 0, fArray, 0, start);
1093 us_arrayCopy(oldArray, start + length,
1094 fArray, start + srcLength,
1095 oldLength - (start + length));
1096 } else if(length != srcLength) {
1097 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1098 us_arrayCopy(oldArray, start + length,
1099 fArray, start + srcLength,
1100 oldLength - (start + length));
1101 }
1102
1103 // now fill in the hole with the new string
1104 us_arrayCopy(srcChars, srcStart, getArrayStart(), start, srcLength);
1105
1106 fLength = newSize;
1107
1108 // delayed delete in case srcChars == fArray when we started, and
1109 // to keep oldArray alive for the above operations
1110 if (bufferToDelete) {
1111 uprv_free(bufferToDelete);
1112 }
1113
1114 return *this;
1115}
1116
1117/**
1118 * Replaceable API
1119 */
1120void
1121UnicodeString::handleReplaceBetween(int32_t start,
1122 int32_t limit,
1123 const UnicodeString& text) {
1124 replaceBetween(start, limit, text);
1125}
1126
1127/**
1128 * Replaceable API
1129 */
1130void
1131UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1132 if (limit <= start) {
1133 return; // Nothing to do; avoid bogus malloc call
1134 }
1135 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1136 extractBetween(start, limit, text, 0);
1137 insert(dest, text, 0, limit - start);
1138 uprv_free(text);
1139}
1140
1141/**
1142 * Replaceable API
1143 *
1144 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1145 * so we implement this function here.
1146 */
1147UBool Replaceable::hasMetaData() const {
1148 return TRUE;
1149}
1150
1151/**
1152 * Replaceable API
1153 */
1154UBool UnicodeString::hasMetaData() const {
1155 return FALSE;
1156}
1157
1158UnicodeString&
1159UnicodeString::doReverse(int32_t start,
1160 int32_t length)
1161{
1162 if(fLength <= 1 || !cloneArrayIfNeeded()) {
1163 return *this;
1164 }
1165
1166 // pin the indices to legal values
1167 pinIndices(start, length);
1168
1169 UChar *left = getArrayStart() + start;
1170 UChar *right = getArrayStart() + start + length;
1171 UChar swap;
1172 UBool hasSupplementary = FALSE;
1173
1174 while(left < --right) {
1175 hasSupplementary |= (UBool)UTF_IS_LEAD(swap = *left);
1176 hasSupplementary |= (UBool)UTF_IS_LEAD(*left++ = *right);
1177 *right = swap;
1178 }
1179
1180 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1181 if(hasSupplementary) {
1182 UChar swap2;
1183
1184 left = getArrayStart() + start;
1185 right = getArrayStart() + start + length - 1; // -1 so that we can look at *(left+1) if left<right
1186 while(left < right) {
1187 if(UTF_IS_TRAIL(swap = *left) && UTF_IS_LEAD(swap2 = *(left + 1))) {
1188 *left++ = swap2;
1189 *left++ = swap;
1190 } else {
1191 ++left;
1192 }
1193 }
1194 }
1195
1196 return *this;
1197}
1198
1199UBool
1200UnicodeString::padLeading(int32_t targetLength,
1201 UChar padChar)
1202{
1203 if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1204 return FALSE;
1205 } else {
1206 // move contents up by padding width
1207 int32_t start = targetLength - fLength;
1208 us_arrayCopy(fArray, 0, fArray, start, fLength);
1209
1210 // fill in padding character
1211 while(--start >= 0) {
1212 fArray[start] = padChar;
1213 }
1214 fLength = targetLength;
1215 return TRUE;
1216 }
1217}
1218
1219UBool
1220UnicodeString::padTrailing(int32_t targetLength,
1221 UChar padChar)
1222{
1223 if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1224 return FALSE;
1225 } else {
1226 // fill in padding character
1227 int32_t length = targetLength;
1228 while(--length >= fLength) {
1229 fArray[length] = padChar;
1230 }
1231 fLength = targetLength;
1232 return TRUE;
1233 }
1234}
1235
b75a7d8f
A
1236//========================================
1237// Hashing
1238//========================================
1239int32_t
1240UnicodeString::doHashCode() const
1241{
1242 /* Delegate hash computation to uhash. This makes UnicodeString
1243 * hashing consistent with UChar* hashing. */
1244 int32_t hashCode = uhash_hashUCharsN(getArrayStart(), fLength);
1245 if (hashCode == kInvalidHashCode) {
1246 hashCode = kEmptyHashCode;
1247 }
1248 return hashCode;
1249}
1250
b75a7d8f
A
1251//========================================
1252// External Buffer
1253//========================================
1254
1255UChar *
1256UnicodeString::getBuffer(int32_t minCapacity) {
1257 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1258 fFlags|=kOpenGetBuffer;
1259 fLength=0;
1260 return fArray;
1261 } else {
1262 return 0;
1263 }
1264}
1265
1266void
1267UnicodeString::releaseBuffer(int32_t newLength) {
1268 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1269 // set the new fLength
1270 if(newLength==-1) {
1271 // the new length is the string length, capped by fCapacity
1272 const UChar *p=fArray, *limit=fArray+fCapacity;
1273 while(p<limit && *p!=0) {
1274 ++p;
1275 }
1276 fLength=(int32_t)(p-fArray);
1277 } else if(newLength<=fCapacity) {
1278 fLength=newLength;
1279 } else {
1280 fLength=fCapacity;
1281 }
1282 fFlags&=~kOpenGetBuffer;
1283 }
1284}
1285
1286//========================================
1287// Miscellaneous
1288//========================================
1289UBool
1290UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1291 int32_t growCapacity,
1292 UBool doCopyArray,
1293 int32_t **pBufferToDelete,
1294 UBool forceClone) {
1295 // default parameters need to be static, therefore
1296 // the defaults are -1 to have convenience defaults
1297 if(newCapacity == -1) {
1298 newCapacity = fCapacity;
1299 }
1300
1301 // while a getBuffer(minCapacity) is "open",
1302 // prevent any modifications of the string by returning FALSE here
1303 // if the string is bogus, then only an assignment or similar can revive it
1304 if((fFlags&(kOpenGetBuffer|kIsBogus))!=0) {
1305 return FALSE;
1306 }
1307
1308 /*
1309 * We need to make a copy of the array if
1310 * the buffer is read-only, or
1311 * the buffer is refCounted (shared), and refCount>1, or
1312 * the buffer is too small.
1313 * Return FALSE if memory could not be allocated.
1314 */
1315 if(forceClone ||
1316 fFlags & kBufferIsReadonly ||
1317 fFlags & kRefCounted && refCount() > 1 ||
1318 newCapacity > fCapacity
1319 ) {
1320 // save old values
1321 UChar *array = fArray;
1322 uint16_t flags = fFlags;
1323
1324 // check growCapacity for default value and use of the stack buffer
1325 if(growCapacity == -1) {
1326 growCapacity = newCapacity;
1327 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1328 growCapacity = US_STACKBUF_SIZE;
1329 }
1330
1331 // allocate a new array
1332 if(allocate(growCapacity) ||
1333 newCapacity < growCapacity && allocate(newCapacity)
1334 ) {
1335 if(doCopyArray) {
1336 // copy the contents
1337 // do not copy more than what fits - it may be smaller than before
1338 if(fCapacity < fLength) {
1339 fLength = fCapacity;
1340 }
1341 us_arrayCopy(array, 0, fArray, 0, fLength);
1342 } else {
1343 fLength = 0;
1344 }
1345
1346 // release the old array
1347 if(flags & kRefCounted) {
1348 // the array is refCounted; decrement and release if 0
1349 int32_t *pRefCount = ((int32_t *)array - 1);
1350 if(umtx_atomic_dec(pRefCount) == 0) {
1351 if(pBufferToDelete == 0) {
1352 uprv_free(pRefCount);
1353 } else {
1354 // the caller requested to delete it himself
1355 *pBufferToDelete = pRefCount;
1356 }
1357 }
1358 }
1359 } else {
1360 // not enough memory for growCapacity and not even for the smaller newCapacity
1361 // reset the old values for setToBogus() to release the array
1362 fArray = array;
1363 fFlags = flags;
1364 setToBogus();
1365 return FALSE;
1366 }
1367 }
1368 return TRUE;
1369}
1370U_NAMESPACE_END
73c04bcf
A
1371
1372#ifdef U_STATIC_IMPLEMENTATION
1373/*
1374This should never be called. It is defined here to make sure that the
1375virtual vector deleting destructor is defined within unistr.cpp.
1376The vector deleting destructor is already a part of UObject,
1377but defining it here makes sure that it is included with this object file.
1378This makes sure that static library dependencies are kept to a minimum.
1379*/
1380static void uprv_UnicodeStringDummy(void) {
1381 U_NAMESPACE_USE
1382 delete [] (new UnicodeString[2]);
1383}
1384#endif
1385