]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unistr.cpp
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / common / unistr.cpp
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3* Copyright (C) 1999-2003, International Business Machines Corporation and *
4* others. All Rights Reserved. *
5******************************************************************************
6*
7* File unistr.cpp
8*
9* Modification History:
10*
11* Date Name Description
12* 09/25/98 stephen Creation.
13* 04/20/99 stephen Overhauled per 4/16 code review.
14* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15* 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16* Replaceable.
17* 06/25/01 grhoten Removed the dependency on iostream
18******************************************************************************
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/putil.h"
23#include "unicode/locid.h"
24#include "cstring.h"
25#include "cmemory.h"
26#include "unicode/ustring.h"
27#include "unicode/unistr.h"
28#include "unicode/uchar.h"
29#include "unicode/ucnv.h"
30#include "unicode/ubrk.h"
31#include "uhash.h"
32#include "ustr_imp.h"
33#include "unormimp.h"
34#include "umutex.h"
35
36#if 0
37
38#if U_IOSTREAM_SOURCE >= 199711
39#include <iostream>
40using namespace std;
41#elif U_IOSTREAM_SOURCE >= 198506
42#include <iostream.h>
43#endif
44
45//DEBUGGING
46void
47print(const UnicodeString& s,
48 const char *name)
49{
50 UChar c;
51 cout << name << ":|";
52 for(int i = 0; i < s.length(); ++i) {
53 c = s[i];
54 if(c>= 0x007E || c < 0x0020)
55 cout << "[0x" << hex << s[i] << "]";
56 else
57 cout << (char) s[i];
58 }
59 cout << '|' << endl;
60}
61
62void
63print(const UChar *s,
64 int32_t len,
65 const char *name)
66{
67 UChar c;
68 cout << name << ":|";
69 for(int i = 0; i < len; ++i) {
70 c = s[i];
71 if(c>= 0x007E || c < 0x0020)
72 cout << "[0x" << hex << s[i] << "]";
73 else
74 cout << (char) s[i];
75 }
76 cout << '|' << endl;
77}
78// END DEBUGGING
79#endif
80
81// Local function definitions for now
82
83// need to copy areas that may overlap
84static
85inline void
86us_arrayCopy(const UChar *src, int32_t srcStart,
87 UChar *dst, int32_t dstStart, int32_t count)
88{
89 if(count>0) {
90 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
91 }
92}
93
94// u_unescapeAt() callback to get a UChar from a UnicodeString
95U_CDECL_BEGIN
96static UChar U_CALLCONV
97UnicodeString_charAt(int32_t offset, void *context) {
98 return ((UnicodeString*) context)->charAt(offset);
99}
100U_CDECL_END
101
102U_NAMESPACE_BEGIN
103
104const char UnicodeString::fgClassID=0;
105
106//========================================
107// Reference Counting functions, put at top of file so that optimizing compilers
108// have a chance to automatically inline.
109//========================================
110
111void
112UnicodeString::addRef()
113{ umtx_atomic_inc((int32_t *)fArray - 1);}
114
115int32_t
116UnicodeString::removeRef()
117{ return umtx_atomic_dec((int32_t *)fArray - 1);}
118
119int32_t
120UnicodeString::refCount() const
121{
122 umtx_lock(NULL);
123 // Note: without the lock to force a memory barrier, we might see a very
124 // stale value on some multi-processor systems.
125 int32_t count = *((int32_t *)fArray - 1);
126 umtx_unlock(NULL);
127 return count;
128 }
129
130void
131UnicodeString::releaseArray() {
132 if((fFlags & kRefCounted) && removeRef() == 0) {
133 uprv_free((int32_t *)fArray - 1);
134 }
135}
136
137
138
139//========================================
140// Constructors
141//========================================
142UnicodeString::UnicodeString()
143 : fLength(0),
144 fCapacity(US_STACKBUF_SIZE),
145 fArray(fStackBuffer),
146 fFlags(kShortString)
147{}
148
149UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
150 : fLength(0),
151 fCapacity(US_STACKBUF_SIZE),
152 fArray(0),
153 fFlags(0)
154{
155 if(count <= 0 || (uint32_t)c > 0x10ffff) {
156 // just allocate and do not do anything else
157 allocate(capacity);
158 } else {
159 // count > 0, allocate and fill the new string with count c's
160 int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
161 if(capacity < length) {
162 capacity = length;
163 }
164 if(allocate(capacity)) {
165 int32_t i = 0;
166
167 // fill the new string with c
168 if(unitCount == 1) {
169 // fill with length UChars
170 while(i < length) {
171 fArray[i++] = (UChar)c;
172 }
173 } else {
174 // get the code units for c
175 UChar units[UTF_MAX_CHAR_LENGTH];
176 UTF_APPEND_CHAR_UNSAFE(units, i, c);
177
178 // now it must be i==unitCount
179 i = 0;
180
181 // for Unicode, unitCount can only be 1, 2, 3, or 4
182 // 1 is handled above
183 while(i < length) {
184 int32_t unitIdx = 0;
185 while(unitIdx < unitCount) {
186 fArray[i++]=units[unitIdx++];
187 }
188 }
189 }
190 }
191 fLength = length;
192 }
193}
194
195UnicodeString::UnicodeString(UChar ch)
196 : fLength(1),
197 fCapacity(US_STACKBUF_SIZE),
198 fArray(fStackBuffer),
199 fFlags(kShortString)
200{
201 fStackBuffer[0] = ch;
202}
203
204UnicodeString::UnicodeString(UChar32 ch)
205 : fLength(1),
206 fCapacity(US_STACKBUF_SIZE),
207 fArray(fStackBuffer),
208 fFlags(kShortString)
209{
210 int32_t i = 0;
211 UBool isError = FALSE;
212 U16_APPEND(fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
213 fLength = i;
214}
215
216UnicodeString::UnicodeString(const UChar *text)
217 : fLength(0),
218 fCapacity(US_STACKBUF_SIZE),
219 fArray(fStackBuffer),
220 fFlags(kShortString)
221{
222 doReplace(0, 0, text, 0, -1);
223}
224
225UnicodeString::UnicodeString(const UChar *text,
226 int32_t textLength)
227 : fLength(0),
228 fCapacity(US_STACKBUF_SIZE),
229 fArray(fStackBuffer),
230 fFlags(kShortString)
231{
232 doReplace(0, 0, text, 0, textLength);
233}
234
235UnicodeString::UnicodeString(UBool isTerminated,
236 const UChar *text,
237 int32_t textLength)
238 : fLength(textLength),
239 fCapacity(isTerminated ? textLength + 1 : textLength),
240 fArray((UChar *)text),
241 fFlags(kReadonlyAlias)
242{
243 if(text == NULL) {
244 // treat as an empty string, do not alias
245 fLength = 0;
246 fCapacity = US_STACKBUF_SIZE;
247 fArray = fStackBuffer;
248 fFlags = kShortString;
249 } else if(textLength < -1 ||
250 (textLength == -1 && !isTerminated) ||
251 (textLength >= 0 && isTerminated && text[textLength] != 0)
252 ) {
253 setToBogus();
254 } else if(textLength == -1) {
255 // text is terminated, or else it would have failed the above test
256 fLength = u_strlen(text);
257 fCapacity = fLength + 1;
258 }
259}
260
261UnicodeString::UnicodeString(UChar *buff,
262 int32_t buffLength,
263 int32_t buffCapacity)
264 : fLength(buffLength),
265 fCapacity(buffCapacity),
266 fArray(buff),
267 fFlags(kWritableAlias)
268{
269 if(buff == NULL) {
270 // treat as an empty string, do not alias
271 fLength = 0;
272 fCapacity = US_STACKBUF_SIZE;
273 fArray = fStackBuffer;
274 fFlags = kShortString;
275 } else if(buffLength < -1 || buffLength > buffCapacity) {
276 setToBogus();
277 } else if(buffLength == -1) {
278 // fLength = u_strlen(buff); but do not look beyond buffCapacity
279 const UChar *p = buff, *limit = buff + buffCapacity;
280 while(p != limit && *p != 0) {
281 ++p;
282 }
283 fLength = (int32_t)(p - buff);
284 }
285}
286
287UnicodeString::UnicodeString(const char *codepageData,
288 const char *codepage)
289 : fLength(0),
290 fCapacity(US_STACKBUF_SIZE),
291 fArray(fStackBuffer),
292 fFlags(kShortString)
293{
294 if(codepageData != 0) {
295 doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
296 }
297}
298
299
300UnicodeString::UnicodeString(const char *codepageData,
301 int32_t dataLength,
302 const char *codepage)
303 : fLength(0),
304 fCapacity(US_STACKBUF_SIZE),
305 fArray(fStackBuffer),
306 fFlags(kShortString)
307{
308 if(codepageData != 0) {
309 doCodepageCreate(codepageData, dataLength, codepage);
310 }
311}
312
313UnicodeString::UnicodeString(const char *src, int32_t srcLength,
314 UConverter *cnv,
315 UErrorCode &errorCode)
316 : fLength(0),
317 fCapacity(US_STACKBUF_SIZE),
318 fArray(fStackBuffer),
319 fFlags(kShortString)
320{
321 if(U_SUCCESS(errorCode)) {
322 // check arguments
323 if(src==NULL) {
324 // treat as an empty string, do nothing more
325 } else if(srcLength<-1) {
326 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
327 } else {
328 // get input length
329 if(srcLength==-1) {
330 srcLength=(int32_t)uprv_strlen(src);
331 }
332 if(srcLength>0) {
333 if(cnv!=0) {
334 // use the provided converter
335 ucnv_resetToUnicode(cnv);
336 doCodepageCreate(src, srcLength, cnv, errorCode);
337 } else {
338 // use the default converter
339 cnv=u_getDefaultConverter(&errorCode);
340 doCodepageCreate(src, srcLength, cnv, errorCode);
341 u_releaseDefaultConverter(cnv);
342 }
343 }
344 }
345
346 if(U_FAILURE(errorCode)) {
347 setToBogus();
348 }
349 }
350}
351
352UnicodeString::UnicodeString(const UnicodeString& that)
353 : Replaceable(),
354 fLength(0),
355 fCapacity(US_STACKBUF_SIZE),
356 fArray(fStackBuffer),
357 fFlags(kShortString)
358{
359 copyFrom(that);
360}
361
362UnicodeString::UnicodeString(const UnicodeString& that,
363 int32_t srcStart)
364 : Replaceable(),
365 fLength(0),
366 fCapacity(US_STACKBUF_SIZE),
367 fArray(fStackBuffer),
368 fFlags(kShortString)
369{
370 setTo(that, srcStart);
371}
372
373UnicodeString::UnicodeString(const UnicodeString& that,
374 int32_t srcStart,
375 int32_t srcLength)
376 : Replaceable(),
377 fLength(0),
378 fCapacity(US_STACKBUF_SIZE),
379 fArray(fStackBuffer),
380 fFlags(kShortString)
381{
382 setTo(that, srcStart, srcLength);
383}
384
385// Replaceable base class clone() default implementation, does not clone
386Replaceable *
387Replaceable::clone() const {
388 return NULL;
389}
390
391// UnicodeString overrides clone() with a real implementation
392Replaceable *
393UnicodeString::clone() const {
394 return new UnicodeString(*this);
395}
396
397//========================================
398// array allocation
399//========================================
400
401UBool
402UnicodeString::allocate(int32_t capacity) {
403 if(capacity <= US_STACKBUF_SIZE) {
404 fArray = fStackBuffer;
405 fCapacity = US_STACKBUF_SIZE;
406 fFlags = kShortString;
407 } else {
408 // count bytes for the refCounter and the string capacity, and
409 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
410 // to be safely aligned for the refCount
411 int32_t words = (int32_t)(((sizeof(int32_t) + capacity * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
412 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
413 if(array != 0) {
414 // set initial refCount and point behind the refCount
415 *array++ = 1;
416
417 // have fArray point to the first UChar
418 fArray = (UChar *)array;
419 fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
420 fFlags = kLongString;
421 } else {
422 fLength = 0;
423 fCapacity = 0;
424 fFlags = kIsBogus;
425 return FALSE;
426 }
427 }
428 return TRUE;
429}
430
431//========================================
432// Destructor
433//========================================
434UnicodeString::~UnicodeString()
435{
436 releaseArray();
437}
438
439
440//========================================
441// Assignment
442//========================================
443
444UnicodeString &
445UnicodeString::operator=(const UnicodeString &src) {
446 return copyFrom(src);
447}
448
449UnicodeString &
450UnicodeString::fastCopyFrom(const UnicodeString &src) {
451 return copyFrom(src, TRUE);
452}
453
454UnicodeString &
455UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
456 // if assigning to ourselves, do nothing
457 if(this == 0 || this == &src) {
458 return *this;
459 }
460
461 // is the right side bogus?
462 if(&src == 0 || src.isBogus()) {
463 setToBogus();
464 return *this;
465 }
466
467 // delete the current contents
468 releaseArray();
469
470 // we always copy the length
471 fLength = src.fLength;
472 if(fLength == 0) {
473 // empty string - use the stack buffer
474 fArray = fStackBuffer;
475 fCapacity = US_STACKBUF_SIZE;
476 fFlags = kShortString;
477 return *this;
478 }
479
480 // fLength>0 and not an "open" src.getBuffer(minCapacity)
481 switch(src.fFlags) {
482 case kShortString:
483 // short string using the stack buffer, do the same
484 fArray = fStackBuffer;
485 fCapacity = US_STACKBUF_SIZE;
486 fFlags = kShortString;
487 uprv_memcpy(fStackBuffer, src.fArray, fLength * U_SIZEOF_UCHAR);
488 break;
489 case kLongString:
490 // src uses a refCounted string buffer, use that buffer with refCount
491 // src is const, use a cast - we don't really change it
492 ((UnicodeString &)src).addRef();
493 // copy all fields, share the reference-counted buffer
494 fArray = src.fArray;
495 fCapacity = src.fCapacity;
496 fFlags = src.fFlags;
497 break;
498 case kReadonlyAlias:
499 if(fastCopy) {
500 // src is a readonly alias, do the same
501 // -> maintain the readonly alias as such
502 fArray = src.fArray;
503 fCapacity = src.fCapacity;
504 fFlags = src.fFlags;
505 break;
506 }
507 // else if(!fastCopy) fall through to case kWritableAlias
508 // -> allocate a new buffer and copy the contents
509 case kWritableAlias:
510 // src is a writable alias; we make a copy of that instead
511 if(allocate(fLength)) {
512 uprv_memcpy(fArray, src.fArray, fLength * U_SIZEOF_UCHAR);
513 break;
514 }
515 // if there is not enough memory, then fall through to setting to bogus
516 default:
517 // if src is bogus, set ourselves to bogus
518 // do not call setToBogus() here because fArray and fFlags are not consistent here
519 fArray = 0;
520 fLength = 0;
521 fCapacity = 0;
522 fFlags = kIsBogus;
523 break;
524 }
525
526 return *this;
527}
528
529//========================================
530// Miscellaneous operations
531//========================================
532
533UnicodeString UnicodeString::unescape() const {
534 UnicodeString result;
535 for (int32_t i=0; i<length(); ) {
536 UChar32 c = charAt(i++);
537 if (c == 0x005C /*'\\'*/) {
538 c = unescapeAt(i); // advances i
539 if (c == (UChar32)0xFFFFFFFF) {
540 result.remove(); // return empty string
541 break; // invalid escape sequence
542 }
543 }
544 result.append(c);
545 }
546 return result;
547}
548
549UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
550 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
551}
552
553//========================================
554// Read-only implementation
555//========================================
556int8_t
557UnicodeString::doCompare( int32_t start,
558 int32_t length,
559 const UChar *srcChars,
560 int32_t srcStart,
561 int32_t srcLength) const
562{
563 // compare illegal string values
564 // treat const UChar *srcChars==NULL as an empty string
565 if(isBogus()) {
566 return -1;
567 }
568
569 // pin indices to legal values
570 pinIndices(start, length);
571
572 if(srcChars == NULL) {
573 srcStart = srcLength = 0;
574 }
575
576 // get the correct pointer
577 const UChar *chars = getArrayStart();
578
579 chars += start;
580 srcChars += srcStart;
581
582 int32_t minLength;
583 int8_t lengthResult;
584
585 // get the srcLength if necessary
586 if(srcLength < 0) {
587 srcLength = u_strlen(srcChars + srcStart);
588 }
589
590 // are we comparing different lengths?
591 if(length != srcLength) {
592 if(length < srcLength) {
593 minLength = length;
594 lengthResult = -1;
595 } else {
596 minLength = srcLength;
597 lengthResult = 1;
598 }
599 } else {
600 minLength = length;
601 lengthResult = 0;
602 }
603
604 /*
605 * note that uprv_memcmp() returns an int but we return an int8_t;
606 * we need to take care not to truncate the result -
607 * one way to do this is to right-shift the value to
608 * move the sign bit into the lower 8 bits and making sure that this
609 * does not become 0 itself
610 */
611
612 if(minLength > 0 && chars != srcChars) {
613 int32_t result;
614
615# if U_IS_BIG_ENDIAN
616 // big-endian: byte comparison works
617 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
618 if(result != 0) {
619 return (int8_t)(result >> 15 | 1);
620 }
621# else
622 // little-endian: compare UChar units
623 do {
624 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
625 if(result != 0) {
626 return (int8_t)(result >> 15 | 1);
627 }
628 } while(--minLength > 0);
629# endif
630 }
631 return lengthResult;
632}
633
634/* String compare in code point order - doCompare() compares in code unit order. */
635int8_t
636UnicodeString::doCompareCodePointOrder(int32_t start,
637 int32_t length,
638 const UChar *srcChars,
639 int32_t srcStart,
640 int32_t srcLength) const
641{
642 // compare illegal string values
643 // treat const UChar *srcChars==NULL as an empty string
644 if(isBogus()) {
645 return -1;
646 }
647
648 // pin indices to legal values
649 pinIndices(start, length);
650
651 if(srcChars == NULL) {
652 srcStart = srcLength = 0;
653 }
654
655 int32_t diff = uprv_strCompare(fArray + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
656 /* translate the 32-bit result into an 8-bit one */
657 if(diff!=0) {
658 return (int8_t)(diff >> 15 | 1);
659 } else {
660 return 0;
661 }
662}
663
664int8_t
665UnicodeString::doCaseCompare(int32_t start,
666 int32_t length,
667 const UChar *srcChars,
668 int32_t srcStart,
669 int32_t srcLength,
670 uint32_t options) const
671{
672 // compare illegal string values
673 // treat const UChar *srcChars==NULL as an empty string
674 if(isBogus()) {
675 return -1;
676 }
677
678 // pin indices to legal values
679 pinIndices(start, length);
680
681 if(srcChars == NULL) {
682 srcStart = srcLength = 0;
683 }
684
685 // get the correct pointer
686 const UChar *chars = getArrayStart();
687
688 chars += start;
689 srcChars += srcStart;
690
691 if(chars != srcChars) {
692 UErrorCode errorCode=U_ZERO_ERROR;
693 int32_t result=unorm_cmpEquivFold(chars, length, srcChars, srcLength,
694 options|U_COMPARE_IGNORE_CASE, &errorCode);
695 if(result!=0) {
696 return (int8_t)(result >> 24 | 1);
697 }
698 } else {
699 // get the srcLength if necessary
700 if(srcLength < 0) {
701 srcLength = u_strlen(srcChars + srcStart);
702 }
703 if(length != srcLength) {
704 return (int8_t)((length - srcLength) >> 24 | 1);
705 }
706 }
707 return 0;
708}
709
710int32_t
711UnicodeString::getLength() const {
712 return length();
713}
714
715UChar
716UnicodeString::getCharAt(int32_t offset) const {
717 return charAt(offset);
718}
719
720UChar32
721UnicodeString::getChar32At(int32_t offset) const {
722 return char32At(offset);
723}
724
725int32_t
726UnicodeString::countChar32(int32_t start, int32_t length) const {
727 pinIndices(start, length);
728 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
729 return u_countChar32(fArray+start, length);
730}
731
732UBool
733UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
734 pinIndices(start, length);
735 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
736 return u_strHasMoreChar32Than(fArray+start, length, number);
737}
738
739int32_t
740UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
741 // pin index
742 if(index<0) {
743 index=0;
744 } else if(index>fLength) {
745 index=fLength;
746 }
747
748 if(delta>0) {
749 UTF_FWD_N(fArray, index, fLength, delta);
750 } else {
751 UTF_BACK_N(fArray, 0, index, -delta);
752 }
753
754 return index;
755}
756
757void
758UnicodeString::doExtract(int32_t start,
759 int32_t length,
760 UChar *dst,
761 int32_t dstStart) const
762{
763 // pin indices to legal values
764 pinIndices(start, length);
765
766 // do not copy anything if we alias dst itself
767 if(fArray + start != dst + dstStart) {
768 us_arrayCopy(getArrayStart(), start, dst, dstStart, length);
769 }
770}
771
772int32_t
773UnicodeString::extract(UChar *dest, int32_t destCapacity,
774 UErrorCode &errorCode) const {
775 if(U_SUCCESS(errorCode)) {
776 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
777 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
778 } else {
779 if(fLength>0 && fLength<=destCapacity && fArray!=dest) {
780 uprv_memcpy(dest, fArray, fLength*U_SIZEOF_UCHAR);
781 }
782 return u_terminateUChars(dest, destCapacity, fLength, &errorCode);
783 }
784 }
785
786 return fLength;
787}
788
789int32_t
790UnicodeString::indexOf(const UChar *srcChars,
791 int32_t srcStart,
792 int32_t srcLength,
793 int32_t start,
794 int32_t length) const
795{
796 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
797 return -1;
798 }
799
800 // UnicodeString does not find empty substrings
801 if(srcLength < 0 && srcChars[srcStart] == 0) {
802 return -1;
803 }
804
805 // get the indices within bounds
806 pinIndices(start, length);
807
808 // find the first occurrence of the substring
809 const UChar *match = u_strFindFirst(fArray + start, length, srcChars + srcStart, srcLength);
810 if(match == NULL) {
811 return -1;
812 } else {
813 return match - fArray;
814 }
815}
816
817int32_t
818UnicodeString::doIndexOf(UChar c,
819 int32_t start,
820 int32_t length) const
821{
822 // pin indices
823 pinIndices(start, length);
824
825 // find the first occurrence of c
826 const UChar *match = u_memchr(fArray + start, c, length);
827 if(match == NULL) {
828 return -1;
829 } else {
830 return match - fArray;
831 }
832}
833
834int32_t
835UnicodeString::doIndexOf(UChar32 c,
836 int32_t start,
837 int32_t length) const {
838 // pin indices
839 pinIndices(start, length);
840
841 // find the first occurrence of c
842 const UChar *match = u_memchr32(fArray + start, c, length);
843 if(match == NULL) {
844 return -1;
845 } else {
846 return match - fArray;
847 }
848}
849
850int32_t
851UnicodeString::lastIndexOf(const UChar *srcChars,
852 int32_t srcStart,
853 int32_t srcLength,
854 int32_t start,
855 int32_t length) const
856{
857 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
858 return -1;
859 }
860
861 // UnicodeString does not find empty substrings
862 if(srcLength < 0 && srcChars[srcStart] == 0) {
863 return -1;
864 }
865
866 // get the indices within bounds
867 pinIndices(start, length);
868
869 // find the last occurrence of the substring
870 const UChar *match = u_strFindLast(fArray + start, length, srcChars + srcStart, srcLength);
871 if(match == NULL) {
872 return -1;
873 } else {
874 return match - fArray;
875 }
876}
877
878int32_t
879UnicodeString::doLastIndexOf(UChar c,
880 int32_t start,
881 int32_t length) const
882{
883 if(isBogus()) {
884 return -1;
885 }
886
887 // pin indices
888 pinIndices(start, length);
889
890 // find the last occurrence of c
891 const UChar *match = u_memrchr(fArray + start, c, length);
892 if(match == NULL) {
893 return -1;
894 } else {
895 return match - fArray;
896 }
897}
898
899int32_t
900UnicodeString::doLastIndexOf(UChar32 c,
901 int32_t start,
902 int32_t length) const {
903 // pin indices
904 pinIndices(start, length);
905
906 // find the last occurrence of c
907 const UChar *match = u_memrchr32(fArray + start, c, length);
908 if(match == NULL) {
909 return -1;
910 } else {
911 return match - fArray;
912 }
913}
914
915//========================================
916// Write implementation
917//========================================
918
919UnicodeString&
920UnicodeString::findAndReplace(int32_t start,
921 int32_t length,
922 const UnicodeString& oldText,
923 int32_t oldStart,
924 int32_t oldLength,
925 const UnicodeString& newText,
926 int32_t newStart,
927 int32_t newLength)
928{
929 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
930 return *this;
931 }
932
933 pinIndices(start, length);
934 oldText.pinIndices(oldStart, oldLength);
935 newText.pinIndices(newStart, newLength);
936
937 if(oldLength == 0) {
938 return *this;
939 }
940
941 while(length > 0 && length >= oldLength) {
942 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
943 if(pos < 0) {
944 // no more oldText's here: done
945 break;
946 } else {
947 // we found oldText, replace it by newText and go beyond it
948 replace(pos, oldLength, newText, newStart, newLength);
949 length -= pos + oldLength - start;
950 start = pos + newLength;
951 }
952 }
953
954 return *this;
955}
956
957
958void
959UnicodeString::setToBogus()
960{
961 releaseArray();
962
963 fArray = 0;
964 fCapacity = fLength = 0;
965 fFlags = kIsBogus;
966}
967
968// turn a bogus string into an empty one
969void
970UnicodeString::unBogus() {
971 if(fFlags & kIsBogus) {
972 fArray = fStackBuffer;
973 fLength = 0;
974 fCapacity = US_STACKBUF_SIZE;
975 fFlags = kShortString;
976 }
977}
978
979// setTo() analogous to the readonly-aliasing constructor with the same signature
980UnicodeString &
981UnicodeString::setTo(UBool isTerminated,
982 const UChar *text,
983 int32_t textLength)
984{
985 if(fFlags & kOpenGetBuffer) {
986 // do not modify a string that has an "open" getBuffer(minCapacity)
987 return *this;
988 }
989
990 if(text == NULL) {
991 // treat as an empty string, do not alias
992 releaseArray();
993 fLength = 0;
994 fCapacity = US_STACKBUF_SIZE;
995 fArray = fStackBuffer;
996 fFlags = kShortString;
997 return *this;
998 }
999
1000 if( textLength < -1 ||
1001 (textLength == -1 && !isTerminated) ||
1002 (textLength >= 0 && isTerminated && text[textLength] != 0)
1003 ) {
1004 setToBogus();
1005 return *this;
1006 }
1007
1008 releaseArray();
1009
1010 fArray = (UChar *)text;
1011 if(textLength != -1) {
1012 fLength = textLength;
1013 fCapacity = isTerminated ? fLength + 1 : fLength;
1014 } else {
1015 // text is terminated, or else it would have failed the above test
1016 fLength = u_strlen(text);
1017 fCapacity = fLength + 1;
1018 }
1019
1020 fFlags = kReadonlyAlias;
1021 return *this;
1022}
1023
1024// setTo() analogous to the writable-aliasing constructor with the same signature
1025UnicodeString &
1026UnicodeString::setTo(UChar *buffer,
1027 int32_t buffLength,
1028 int32_t buffCapacity) {
1029 if(fFlags & kOpenGetBuffer) {
1030 // do not modify a string that has an "open" getBuffer(minCapacity)
1031 return *this;
1032 }
1033
1034 if(buffer == NULL) {
1035 // treat as an empty string, do not alias
1036 releaseArray();
1037 fLength = 0;
1038 fCapacity = US_STACKBUF_SIZE;
1039 fArray = fStackBuffer;
1040 fFlags = kShortString;
1041 return *this;
1042 }
1043
1044 if(buffLength < 0 || buffLength > buffCapacity) {
1045 setToBogus();
1046 return *this;
1047 }
1048
1049 releaseArray();
1050
1051 fArray = buffer;
1052 fLength = buffLength;
1053 fCapacity = buffCapacity;
1054 fFlags = kWritableAlias;
1055 return *this;
1056}
1057
1058UnicodeString&
1059UnicodeString::setCharAt(int32_t offset,
1060 UChar c)
1061{
1062 if(cloneArrayIfNeeded() && fLength > 0) {
1063 if(offset < 0) {
1064 offset = 0;
1065 } else if(offset >= fLength) {
1066 offset = fLength - 1;
1067 }
1068
1069 fArray[offset] = c;
1070 }
1071 return *this;
1072}
1073
1074/*
1075 * Implement argument checking and buffer handling
1076 * for string case mapping as a common function.
1077 */
1078enum {
1079 TO_LOWER,
1080 TO_UPPER,
1081 TO_TITLE,
1082 FOLD_CASE
1083};
1084
1085UnicodeString &
1086UnicodeString::toLower() {
1087 return caseMap(0, Locale::getDefault(), 0, TO_LOWER);
1088}
1089
1090UnicodeString &
1091UnicodeString::toLower(const Locale &locale) {
1092 return caseMap(0, locale, 0, TO_LOWER);
1093}
1094
1095UnicodeString &
1096UnicodeString::toUpper() {
1097 return caseMap(0, Locale::getDefault(), 0, TO_UPPER);
1098}
1099
1100UnicodeString &
1101UnicodeString::toUpper(const Locale &locale) {
1102 return caseMap(0, locale, 0, TO_UPPER);
1103}
1104
1105#if !UCONFIG_NO_BREAK_ITERATION
1106
1107UnicodeString &
1108UnicodeString::toTitle(BreakIterator *titleIter) {
1109 return caseMap(titleIter, Locale::getDefault(), 0, TO_TITLE);
1110}
1111
1112UnicodeString &
1113UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
1114 return caseMap(titleIter, locale, 0, TO_TITLE);
1115}
1116
1117#endif
1118
1119UnicodeString &
1120UnicodeString::foldCase(uint32_t options) {
1121 return caseMap(0, Locale::getDefault(), options, FOLD_CASE);
1122}
1123
1124UnicodeString &
1125UnicodeString::caseMap(BreakIterator *titleIter,
1126 const Locale& locale,
1127 uint32_t options,
1128 int32_t toWhichCase) {
1129 if(fLength <= 0) {
1130 // nothing to do
1131 return *this;
1132 }
1133
1134 // We need to allocate a new buffer for the internal string case mapping function.
1135 // This is very similar to how doReplace() below keeps the old array pointer
1136 // and deletes the old array itself after it is done.
1137 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
1138 UChar *oldArray = fArray;
1139 int32_t oldLength = fLength;
1140 int32_t *bufferToDelete = 0;
1141
1142 // Make sure that if the string is in fStackBuffer we do not overwrite it!
1143 int32_t capacity;
1144 if(fLength <= US_STACKBUF_SIZE) {
1145 if(fArray == fStackBuffer) {
1146 capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer
1147 } else {
1148 capacity = US_STACKBUF_SIZE;
1149 }
1150 } else {
1151 capacity = fLength + 20;
1152 }
1153 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
1154 return *this;
1155 }
1156
1157 UErrorCode errorCode;
1158
1159#if !UCONFIG_NO_BREAK_ITERATION
1160 // set up the titlecasing break iterator
1161 UBreakIterator *cTitleIter = 0;
1162
1163 if(toWhichCase == TO_TITLE) {
1164 if(titleIter != 0) {
1165 cTitleIter = (UBreakIterator *)titleIter;
1166 } else {
1167 errorCode = U_ZERO_ERROR;
1168 cTitleIter = ubrk_open(UBRK_WORD, locale.getName(),
1169 oldArray, oldLength,
1170 &errorCode);
1171 if(U_FAILURE(errorCode)) {
1172 uprv_free(bufferToDelete);
1173 setToBogus();
1174 return *this;
1175 }
1176 }
1177 }
1178#endif
1179
1180 // Case-map, and if the result is too long, then reallocate and repeat.
1181 do {
1182 errorCode = U_ZERO_ERROR;
1183 if(toWhichCase==TO_LOWER) {
1184 fLength = u_internalStrToLower(fArray, fCapacity,
1185 oldArray, oldLength,
1186 0, oldLength,
1187 locale.getName(),
1188 &errorCode);
1189 } else if(toWhichCase==TO_UPPER) {
1190 fLength = u_internalStrToUpper(fArray, fCapacity,
1191 oldArray, oldLength,
1192 locale.getName(),
1193 &errorCode);
1194#if !UCONFIG_NO_BREAK_ITERATION
1195 } else if(toWhichCase==TO_TITLE) {
1196 fLength = u_internalStrToTitle(fArray, fCapacity,
1197 oldArray, oldLength,
1198 cTitleIter, locale.getName(),
1199 &errorCode);
1200#endif
1201 } else {
1202 fLength = u_internalStrFoldCase(fArray, fCapacity,
1203 oldArray, oldLength,
1204 options,
1205 &errorCode);
1206 }
1207 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
1208
1209#if !UCONFIG_NO_BREAK_ITERATION
1210 if(cTitleIter != 0 && titleIter == 0) {
1211 ubrk_close(cTitleIter);
1212 }
1213#endif
1214
1215 if (bufferToDelete) {
1216 uprv_free(bufferToDelete);
1217 }
1218 if(U_FAILURE(errorCode)) {
1219 setToBogus();
1220 }
1221 return *this;
1222}
1223
1224UnicodeString&
1225UnicodeString::doReplace( int32_t start,
1226 int32_t length,
1227 const UnicodeString& src,
1228 int32_t srcStart,
1229 int32_t srcLength)
1230{
1231 if(!src.isBogus()) {
1232 // pin the indices to legal values
1233 src.pinIndices(srcStart, srcLength);
1234
1235 // get the characters from src
1236 // and replace the range in ourselves with them
1237 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1238 } else {
1239 // remove the range
1240 return doReplace(start, length, 0, 0, 0);
1241 }
1242}
1243
1244UnicodeString&
1245UnicodeString::doReplace(int32_t start,
1246 int32_t length,
1247 const UChar *srcChars,
1248 int32_t srcStart,
1249 int32_t srcLength)
1250{
1251 if(isBogus()) {
1252 return *this;
1253 }
1254
1255 if(srcChars == 0) {
1256 srcStart = srcLength = 0;
1257 } else if(srcLength < 0) {
1258 // get the srcLength if necessary
1259 srcLength = u_strlen(srcChars + srcStart);
1260 }
1261
1262 int32_t *bufferToDelete = 0;
1263
1264 // the following may change fArray but will not copy the current contents;
1265 // therefore we need to keep the current fArray
1266 UChar *oldArray = fArray;
1267 int32_t oldLength = fLength;
1268
1269 // pin the indices to legal values
1270 pinIndices(start, length);
1271
1272 // calculate the size of the string after the replace
1273 int32_t newSize = oldLength - length + srcLength;
1274
1275 // clone our array and allocate a bigger array if needed
1276 if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
1277 FALSE, &bufferToDelete)
1278 ) {
1279 return *this;
1280 }
1281
1282 // now do the replace
1283
1284 if(fArray != oldArray) {
1285 // if fArray changed, then we need to copy everything except what will change
1286 us_arrayCopy(oldArray, 0, fArray, 0, start);
1287 us_arrayCopy(oldArray, start + length,
1288 fArray, start + srcLength,
1289 oldLength - (start + length));
1290 } else if(length != srcLength) {
1291 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1292 us_arrayCopy(oldArray, start + length,
1293 fArray, start + srcLength,
1294 oldLength - (start + length));
1295 }
1296
1297 // now fill in the hole with the new string
1298 us_arrayCopy(srcChars, srcStart, getArrayStart(), start, srcLength);
1299
1300 fLength = newSize;
1301
1302 // delayed delete in case srcChars == fArray when we started, and
1303 // to keep oldArray alive for the above operations
1304 if (bufferToDelete) {
1305 uprv_free(bufferToDelete);
1306 }
1307
1308 return *this;
1309}
1310
1311/**
1312 * Replaceable API
1313 */
1314void
1315UnicodeString::handleReplaceBetween(int32_t start,
1316 int32_t limit,
1317 const UnicodeString& text) {
1318 replaceBetween(start, limit, text);
1319}
1320
1321/**
1322 * Replaceable API
1323 */
1324void
1325UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1326 if (limit <= start) {
1327 return; // Nothing to do; avoid bogus malloc call
1328 }
1329 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1330 extractBetween(start, limit, text, 0);
1331 insert(dest, text, 0, limit - start);
1332 uprv_free(text);
1333}
1334
1335/**
1336 * Replaceable API
1337 *
1338 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1339 * so we implement this function here.
1340 */
1341UBool Replaceable::hasMetaData() const {
1342 return TRUE;
1343}
1344
1345/**
1346 * Replaceable API
1347 */
1348UBool UnicodeString::hasMetaData() const {
1349 return FALSE;
1350}
1351
1352UnicodeString&
1353UnicodeString::doReverse(int32_t start,
1354 int32_t length)
1355{
1356 if(fLength <= 1 || !cloneArrayIfNeeded()) {
1357 return *this;
1358 }
1359
1360 // pin the indices to legal values
1361 pinIndices(start, length);
1362
1363 UChar *left = getArrayStart() + start;
1364 UChar *right = getArrayStart() + start + length;
1365 UChar swap;
1366 UBool hasSupplementary = FALSE;
1367
1368 while(left < --right) {
1369 hasSupplementary |= (UBool)UTF_IS_LEAD(swap = *left);
1370 hasSupplementary |= (UBool)UTF_IS_LEAD(*left++ = *right);
1371 *right = swap;
1372 }
1373
1374 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1375 if(hasSupplementary) {
1376 UChar swap2;
1377
1378 left = getArrayStart() + start;
1379 right = getArrayStart() + start + length - 1; // -1 so that we can look at *(left+1) if left<right
1380 while(left < right) {
1381 if(UTF_IS_TRAIL(swap = *left) && UTF_IS_LEAD(swap2 = *(left + 1))) {
1382 *left++ = swap2;
1383 *left++ = swap;
1384 } else {
1385 ++left;
1386 }
1387 }
1388 }
1389
1390 return *this;
1391}
1392
1393UBool
1394UnicodeString::padLeading(int32_t targetLength,
1395 UChar padChar)
1396{
1397 if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1398 return FALSE;
1399 } else {
1400 // move contents up by padding width
1401 int32_t start = targetLength - fLength;
1402 us_arrayCopy(fArray, 0, fArray, start, fLength);
1403
1404 // fill in padding character
1405 while(--start >= 0) {
1406 fArray[start] = padChar;
1407 }
1408 fLength = targetLength;
1409 return TRUE;
1410 }
1411}
1412
1413UBool
1414UnicodeString::padTrailing(int32_t targetLength,
1415 UChar padChar)
1416{
1417 if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1418 return FALSE;
1419 } else {
1420 // fill in padding character
1421 int32_t length = targetLength;
1422 while(--length >= fLength) {
1423 fArray[length] = padChar;
1424 }
1425 fLength = targetLength;
1426 return TRUE;
1427 }
1428}
1429
1430UnicodeString&
1431UnicodeString::trim()
1432{
1433 if(isBogus()) {
1434 return *this;
1435 }
1436
1437 UChar32 c;
1438 int32_t i = fLength, length;
1439
1440 // first cut off trailing white space
1441 for(;;) {
1442 length = i;
1443 if(i <= 0) {
1444 break;
1445 }
1446 UTF_PREV_CHAR(fArray, 0, i, c);
1447 if(!(c == 0x20 || u_isWhitespace(c))) {
1448 break;
1449 }
1450 }
1451 if(length < fLength) {
1452 fLength = length;
1453 }
1454
1455 // find leading white space
1456 int32_t start;
1457 i = 0;
1458 for(;;) {
1459 start = i;
1460 if(i >= length) {
1461 break;
1462 }
1463 UTF_NEXT_CHAR(fArray, i, length, c);
1464 if(!(c == 0x20 || u_isWhitespace(c))) {
1465 break;
1466 }
1467 }
1468
1469 // move string forward over leading white space
1470 if(start > 0) {
1471 doReplace(0, start, 0, 0, 0);
1472 }
1473
1474 return *this;
1475}
1476
1477//========================================
1478// Hashing
1479//========================================
1480int32_t
1481UnicodeString::doHashCode() const
1482{
1483 /* Delegate hash computation to uhash. This makes UnicodeString
1484 * hashing consistent with UChar* hashing. */
1485 int32_t hashCode = uhash_hashUCharsN(getArrayStart(), fLength);
1486 if (hashCode == kInvalidHashCode) {
1487 hashCode = kEmptyHashCode;
1488 }
1489 return hashCode;
1490}
1491
1492//========================================
1493// Codeset conversion
1494//========================================
1495int32_t
1496UnicodeString::extract(int32_t start,
1497 int32_t length,
1498 char *target,
1499 uint32_t dstSize,
1500 const char *codepage) const
1501{
1502 // if the arguments are illegal, then do nothing
1503 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
1504 return 0;
1505 }
1506
1507 // pin the indices to legal values
1508 pinIndices(start, length);
1509
1510 // create the converter
1511 UConverter *converter;
1512 UErrorCode status = U_ZERO_ERROR;
1513
1514 // just write the NUL if the string length is 0
1515 if(length == 0) {
1516 if(dstSize >= 0x80000000) {
1517 // careful: dstSize is unsigned! (0xffffffff means "unlimited")
1518 // make sure that the NUL-termination works (takes int32_t)
1519 dstSize=0x7fffffff;
1520 }
1521 return u_terminateChars(target, dstSize, 0, &status);
1522 }
1523
1524 // if the codepage is the default, use our cache
1525 // if it is an empty string, then use the "invariant character" conversion
1526 if (codepage == 0) {
1527 converter = u_getDefaultConverter(&status);
1528 } else if (*codepage == 0) {
1529 // use the "invariant characters" conversion
1530 int32_t destLength;
1531 // careful: dstSize is unsigned! (0xffffffff means "unlimited")
1532 if(dstSize >= 0x80000000) {
1533 destLength = length;
1534 // make sure that the NUL-termination works (takes int32_t)
1535 dstSize=0x7fffffff;
1536 } else if(length <= (int32_t)dstSize) {
1537 destLength = length;
1538 } else {
1539 destLength = (int32_t)dstSize;
1540 }
1541 u_UCharsToChars(getArrayStart() + start, target, destLength);
1542 return u_terminateChars(target, (int32_t)dstSize, length, &status);
1543 } else {
1544 converter = ucnv_open(codepage, &status);
1545 }
1546
1547 length = doExtract(start, length, target, (int32_t)dstSize, converter, status);
1548
1549 // close the converter
1550 if (codepage == 0) {
1551 u_releaseDefaultConverter(converter);
1552 } else {
1553 ucnv_close(converter);
1554 }
1555
1556 return length;
1557}
1558
1559int32_t
1560UnicodeString::extract(char *dest, int32_t destCapacity,
1561 UConverter *cnv,
1562 UErrorCode &errorCode) const {
1563 if(U_FAILURE(errorCode)) {
1564 return 0;
1565 }
1566
1567 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
1568 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1569 return 0;
1570 }
1571
1572 // nothing to do?
1573 if(fLength<=0) {
1574 return u_terminateChars(dest, destCapacity, 0, &errorCode);
1575 }
1576
1577 // get the converter
1578 UBool isDefaultConverter;
1579 if(cnv==0) {
1580 isDefaultConverter=TRUE;
1581 cnv=u_getDefaultConverter(&errorCode);
1582 if(U_FAILURE(errorCode)) {
1583 return 0;
1584 }
1585 } else {
1586 isDefaultConverter=FALSE;
1587 ucnv_resetFromUnicode(cnv);
1588 }
1589
1590 // convert
1591 int32_t length=doExtract(0, fLength, dest, destCapacity, cnv, errorCode);
1592
1593 // release the converter
1594 if(isDefaultConverter) {
1595 u_releaseDefaultConverter(cnv);
1596 }
1597
1598 return length;
1599}
1600
1601void
1602UnicodeString::extractBetween(int32_t start,
1603 int32_t limit,
1604 UnicodeString& target) const
1605{ doExtract(start, limit - start, target); }
1606
1607int32_t
1608UnicodeString::doExtract(int32_t start, int32_t length,
1609 char *dest, int32_t destCapacity,
1610 UConverter *cnv,
1611 UErrorCode &errorCode) const {
1612 if(U_FAILURE(errorCode)) {
1613 if(destCapacity!=0) {
1614 *dest=0;
1615 }
1616 return 0;
1617 }
1618
1619 const UChar *src=fArray+start, *srcLimit=src+length;
1620 char *originalDest=dest;
1621 const char *destLimit;
1622
1623 if(destCapacity==0) {
1624 destLimit=dest=0;
1625 } else if(destCapacity==-1) {
1626 // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
1627 destLimit=(char*)U_MAX_PTR(dest);
1628 // for NUL-termination, translate into highest int32_t
1629 destCapacity=0x7fffffff;
1630 } else {
1631 destLimit=dest+destCapacity;
1632 }
1633
1634 // perform the conversion
1635 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
1636 length=(int32_t)(dest-originalDest);
1637
1638 // if an overflow occurs, then get the preflighting length
1639 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
1640 char buffer[1024];
1641
1642 destLimit=buffer+sizeof(buffer);
1643 do {
1644 dest=buffer;
1645 errorCode=U_ZERO_ERROR;
1646 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode);
1647 length+=(int32_t)(dest-buffer);
1648 } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
1649 }
1650
1651 return u_terminateChars(originalDest, destCapacity, length, &errorCode);
1652}
1653
1654void
1655UnicodeString::doCodepageCreate(const char *codepageData,
1656 int32_t dataLength,
1657 const char *codepage)
1658{
1659 // if there's nothing to convert, do nothing
1660 if(codepageData == 0 || dataLength <= 0) {
1661 return;
1662 }
1663
1664 UErrorCode status = U_ZERO_ERROR;
1665
1666 // create the converter
1667 // if the codepage is the default, use our cache
1668 // if it is an empty string, then use the "invariant character" conversion
1669 UConverter *converter = (codepage == 0 ?
1670 u_getDefaultConverter(&status) :
1671 *codepage == 0 ?
1672 0 :
1673 ucnv_open(codepage, &status));
1674
1675 // if we failed, set the appropriate flags and return
1676 if(U_FAILURE(status)) {
1677 setToBogus();
1678 return;
1679 }
1680
1681 // perform the conversion
1682 if(converter == 0) {
1683 // use the "invariant characters" conversion
1684 if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) {
1685 u_charsToUChars(codepageData, getArrayStart(), dataLength);
1686 fLength = dataLength;
1687 } else {
1688 setToBogus();
1689 }
1690 return;
1691 }
1692
1693 // convert using the real converter
1694 doCodepageCreate(codepageData, dataLength, converter, status);
1695 if(U_FAILURE(status)) {
1696 setToBogus();
1697 }
1698
1699 // close the converter
1700 if(codepage == 0) {
1701 u_releaseDefaultConverter(converter);
1702 } else {
1703 ucnv_close(converter);
1704 }
1705}
1706
1707void
1708UnicodeString::doCodepageCreate(const char *codepageData,
1709 int32_t dataLength,
1710 UConverter *converter,
1711 UErrorCode &status) {
1712 if(U_FAILURE(status)) {
1713 return;
1714 }
1715
1716 // set up the conversion parameters
1717 const char *mySource = codepageData;
1718 const char *mySourceEnd = mySource + dataLength;
1719 UChar *myTarget;
1720
1721 // estimate the size needed:
1722 // 1.25 UChar's per source byte should cover most cases
1723 int32_t arraySize = dataLength + (dataLength >> 2);
1724
1725 // we do not care about the current contents
1726 UBool doCopyArray = FALSE;
1727 for(;;) {
1728 if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
1729 setToBogus();
1730 break;
1731 }
1732
1733 // perform the conversion
1734 myTarget = fArray + fLength;
1735 ucnv_toUnicode(converter, &myTarget, fArray + fCapacity,
1736 &mySource, mySourceEnd, 0, TRUE, &status);
1737
1738 // update the conversion parameters
1739 fLength = (int32_t)(myTarget - fArray);
1740
1741 // allocate more space and copy data, if needed
1742 if(status == U_BUFFER_OVERFLOW_ERROR) {
1743 // reset the error code
1744 status = U_ZERO_ERROR;
1745
1746 // keep the previous conversion results
1747 doCopyArray = TRUE;
1748
1749 // estimate the new size needed, larger than before
1750 // try 2 UChar's per remaining source byte
1751 arraySize = (int32_t)(fLength + 2 * (mySourceEnd - mySource));
1752 } else {
1753 break;
1754 }
1755 }
1756}
1757
1758//========================================
1759// External Buffer
1760//========================================
1761
1762UChar *
1763UnicodeString::getBuffer(int32_t minCapacity) {
1764 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1765 fFlags|=kOpenGetBuffer;
1766 fLength=0;
1767 return fArray;
1768 } else {
1769 return 0;
1770 }
1771}
1772
1773void
1774UnicodeString::releaseBuffer(int32_t newLength) {
1775 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1776 // set the new fLength
1777 if(newLength==-1) {
1778 // the new length is the string length, capped by fCapacity
1779 const UChar *p=fArray, *limit=fArray+fCapacity;
1780 while(p<limit && *p!=0) {
1781 ++p;
1782 }
1783 fLength=(int32_t)(p-fArray);
1784 } else if(newLength<=fCapacity) {
1785 fLength=newLength;
1786 } else {
1787 fLength=fCapacity;
1788 }
1789 fFlags&=~kOpenGetBuffer;
1790 }
1791}
1792
1793//========================================
1794// Miscellaneous
1795//========================================
1796UBool
1797UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1798 int32_t growCapacity,
1799 UBool doCopyArray,
1800 int32_t **pBufferToDelete,
1801 UBool forceClone) {
1802 // default parameters need to be static, therefore
1803 // the defaults are -1 to have convenience defaults
1804 if(newCapacity == -1) {
1805 newCapacity = fCapacity;
1806 }
1807
1808 // while a getBuffer(minCapacity) is "open",
1809 // prevent any modifications of the string by returning FALSE here
1810 // if the string is bogus, then only an assignment or similar can revive it
1811 if((fFlags&(kOpenGetBuffer|kIsBogus))!=0) {
1812 return FALSE;
1813 }
1814
1815 /*
1816 * We need to make a copy of the array if
1817 * the buffer is read-only, or
1818 * the buffer is refCounted (shared), and refCount>1, or
1819 * the buffer is too small.
1820 * Return FALSE if memory could not be allocated.
1821 */
1822 if(forceClone ||
1823 fFlags & kBufferIsReadonly ||
1824 fFlags & kRefCounted && refCount() > 1 ||
1825 newCapacity > fCapacity
1826 ) {
1827 // save old values
1828 UChar *array = fArray;
1829 uint16_t flags = fFlags;
1830
1831 // check growCapacity for default value and use of the stack buffer
1832 if(growCapacity == -1) {
1833 growCapacity = newCapacity;
1834 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1835 growCapacity = US_STACKBUF_SIZE;
1836 }
1837
1838 // allocate a new array
1839 if(allocate(growCapacity) ||
1840 newCapacity < growCapacity && allocate(newCapacity)
1841 ) {
1842 if(doCopyArray) {
1843 // copy the contents
1844 // do not copy more than what fits - it may be smaller than before
1845 if(fCapacity < fLength) {
1846 fLength = fCapacity;
1847 }
1848 us_arrayCopy(array, 0, fArray, 0, fLength);
1849 } else {
1850 fLength = 0;
1851 }
1852
1853 // release the old array
1854 if(flags & kRefCounted) {
1855 // the array is refCounted; decrement and release if 0
1856 int32_t *pRefCount = ((int32_t *)array - 1);
1857 if(umtx_atomic_dec(pRefCount) == 0) {
1858 if(pBufferToDelete == 0) {
1859 uprv_free(pRefCount);
1860 } else {
1861 // the caller requested to delete it himself
1862 *pBufferToDelete = pRefCount;
1863 }
1864 }
1865 }
1866 } else {
1867 // not enough memory for growCapacity and not even for the smaller newCapacity
1868 // reset the old values for setToBogus() to release the array
1869 fArray = array;
1870 fFlags = flags;
1871 setToBogus();
1872 return FALSE;
1873 }
1874 }
1875 return TRUE;
1876}
1877U_NAMESPACE_END