X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..48b980fed3435926e0b3a8d72ecb58be703a1c7a:/icuSources/common/unistr.cpp diff --git a/icuSources/common/unistr.cpp b/icuSources/common/unistr.cpp index 5fc4d446..52fddf32 100644 --- a/icuSources/common/unistr.cpp +++ b/icuSources/common/unistr.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1999-2003, International Business Machines Corporation and * +* Copyright (C) 1999-2008, International Business Machines Corporation and * * others. All Rights Reserved. * ****************************************************************************** * @@ -20,17 +20,12 @@ #include "unicode/utypes.h" #include "unicode/putil.h" -#include "unicode/locid.h" #include "cstring.h" #include "cmemory.h" #include "unicode/ustring.h" #include "unicode/unistr.h" -#include "unicode/uchar.h" -#include "unicode/ucnv.h" -#include "unicode/ubrk.h" #include "uhash.h" #include "ustr_imp.h" -#include "unormimp.h" #include "umutex.h" #if 0 @@ -95,13 +90,26 @@ us_arrayCopy(const UChar *src, int32_t srcStart, U_CDECL_BEGIN static UChar U_CALLCONV UnicodeString_charAt(int32_t offset, void *context) { - return ((UnicodeString*) context)->charAt(offset); + return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset); } U_CDECL_END U_NAMESPACE_BEGIN -const char UnicodeString::fgClassID=0; +/* The Replaceable virtual destructor can't be defined in the header + due to how AIX works with multiple definitions of virtual functions. +*/ +Replaceable::~Replaceable() {} +Replaceable::Replaceable() {} +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) + +UnicodeString U_EXPORT2 +operator+ (const UnicodeString &s1, const UnicodeString &s2) { + return + UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). + append(s1). + append(s2); +} //======================================== // Reference Counting functions, put at top of file so that optimizing compilers @@ -110,11 +118,11 @@ const char UnicodeString::fgClassID=0; void UnicodeString::addRef() -{ umtx_atomic_inc((int32_t *)fArray - 1);} +{ umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);} int32_t UnicodeString::removeRef() -{ return umtx_atomic_dec((int32_t *)fArray - 1);} +{ return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);} int32_t UnicodeString::refCount() const @@ -122,7 +130,7 @@ UnicodeString::refCount() const umtx_lock(NULL); // Note: without the lock to force a memory barrier, we might see a very // stale value on some multi-processor systems. - int32_t count = *((int32_t *)fArray - 1); + int32_t count = *((int32_t *)fUnion.fFields.fArray - 1); umtx_unlock(NULL); return count; } @@ -130,7 +138,7 @@ UnicodeString::refCount() const void UnicodeString::releaseArray() { if((fFlags & kRefCounted) && removeRef() == 0) { - uprv_free((int32_t *)fArray - 1); + uprv_free((int32_t *)fUnion.fFields.fArray - 1); } } @@ -140,16 +148,12 @@ UnicodeString::releaseArray() { // Constructors //======================================== UnicodeString::UnicodeString() - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) {} UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(0), + : fShortLength(0), fFlags(0) { if(count <= 0 || (uint32_t)c > 0x10ffff) { @@ -162,13 +166,14 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) capacity = length; } if(allocate(capacity)) { + UChar *array = getArrayStart(); int32_t i = 0; // fill the new string with c if(unitCount == 1) { // fill with length UChars while(i < length) { - fArray[i++] = (UChar)c; + array[i++] = (UChar)c; } } else { // get the code units for c @@ -183,40 +188,34 @@ UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) while(i < length) { int32_t unitIdx = 0; while(unitIdx < unitCount) { - fArray[i++]=units[unitIdx++]; + array[i++]=units[unitIdx++]; } } } } - fLength = length; + setLength(length); } } UnicodeString::UnicodeString(UChar ch) - : fLength(1), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(1), fFlags(kShortString) { - fStackBuffer[0] = ch; + fUnion.fStackBuffer[0] = ch; } UnicodeString::UnicodeString(UChar32 ch) - : fLength(1), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { int32_t i = 0; UBool isError = FALSE; - U16_APPEND(fStackBuffer, i, US_STACKBUF_SIZE, ch, isError); - fLength = i; + U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError); + fShortLength = (int8_t)i; } UnicodeString::UnicodeString(const UChar *text) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { doReplace(0, 0, text, 0, -1); @@ -224,9 +223,7 @@ UnicodeString::UnicodeString(const UChar *text) UnicodeString::UnicodeString(const UChar *text, int32_t textLength) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + : fShortLength(0), fFlags(kShortString) { doReplace(0, 0, text, 0, textLength); @@ -235,115 +232,64 @@ UnicodeString::UnicodeString(const UChar *text, UnicodeString::UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength) - : fLength(textLength), - fCapacity(isTerminated ? textLength + 1 : textLength), - fArray((UChar *)text), + : fShortLength(0), fFlags(kReadonlyAlias) { if(text == NULL) { // treat as an empty string, do not alias - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fArray = fStackBuffer; - fFlags = kShortString; + setToEmpty(); } else if(textLength < -1 || (textLength == -1 && !isTerminated) || (textLength >= 0 && isTerminated && text[textLength] != 0) ) { setToBogus(); - } else if(textLength == -1) { - // text is terminated, or else it would have failed the above test - fLength = u_strlen(text); - fCapacity = fLength + 1; + } else { + if(textLength == -1) { + // text is terminated, or else it would have failed the above test + textLength = u_strlen(text); + } + setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); } } UnicodeString::UnicodeString(UChar *buff, int32_t buffLength, int32_t buffCapacity) - : fLength(buffLength), - fCapacity(buffCapacity), - fArray(buff), + : fShortLength(0), fFlags(kWritableAlias) { if(buff == NULL) { // treat as an empty string, do not alias - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fArray = fStackBuffer; - fFlags = kShortString; - } else if(buffLength < -1 || buffLength > buffCapacity) { + setToEmpty(); + } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { setToBogus(); - } else if(buffLength == -1) { - // fLength = u_strlen(buff); but do not look beyond buffCapacity - const UChar *p = buff, *limit = buff + buffCapacity; - while(p != limit && *p != 0) { - ++p; + } else { + if(buffLength == -1) { + // fLength = u_strlen(buff); but do not look beyond buffCapacity + const UChar *p = buff, *limit = buff + buffCapacity; + while(p != limit && *p != 0) { + ++p; + } + buffLength = (int32_t)(p - buff); } - fLength = (int32_t)(p - buff); - } -} - -UnicodeString::UnicodeString(const char *codepageData, - const char *codepage) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), - fFlags(kShortString) -{ - if(codepageData != 0) { - doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage); + setArray(buff, buffLength, buffCapacity); } } - -UnicodeString::UnicodeString(const char *codepageData, - int32_t dataLength, - const char *codepage) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), +UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) + : fShortLength(0), fFlags(kShortString) { - if(codepageData != 0) { - doCodepageCreate(codepageData, dataLength, codepage); - } -} - -UnicodeString::UnicodeString(const char *src, int32_t srcLength, - UConverter *cnv, - UErrorCode &errorCode) - : fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), - fFlags(kShortString) -{ - if(U_SUCCESS(errorCode)) { - // check arguments - if(src==NULL) { - // treat as an empty string, do nothing more - } else if(srcLength<-1) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - } else { - // get input length - if(srcLength==-1) { - srcLength=(int32_t)uprv_strlen(src); - } - if(srcLength>0) { - if(cnv!=0) { - // use the provided converter - ucnv_resetToUnicode(cnv); - doCodepageCreate(src, srcLength, cnv, errorCode); - } else { - // use the default converter - cnv=u_getDefaultConverter(&errorCode); - doCodepageCreate(src, srcLength, cnv, errorCode); - u_releaseDefaultConverter(cnv); - } - } + if(src==NULL) { + // treat as an empty string + } else { + if(length<0) { + length=(int32_t)uprv_strlen(src); } - - if(U_FAILURE(errorCode)) { + if(cloneArrayIfNeeded(length, length, FALSE)) { + u_charsToUChars(src, getArrayStart(), length); + setLength(length); + } else { setToBogus(); } } @@ -351,9 +297,7 @@ UnicodeString::UnicodeString(const char *src, int32_t srcLength, UnicodeString::UnicodeString(const UnicodeString& that) : Replaceable(), - fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + fShortLength(0), fFlags(kShortString) { copyFrom(that); @@ -362,9 +306,7 @@ UnicodeString::UnicodeString(const UnicodeString& that) UnicodeString::UnicodeString(const UnicodeString& that, int32_t srcStart) : Replaceable(), - fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + fShortLength(0), fFlags(kShortString) { setTo(that, srcStart); @@ -374,9 +316,7 @@ UnicodeString::UnicodeString(const UnicodeString& that, int32_t srcStart, int32_t srcLength) : Replaceable(), - fLength(0), - fCapacity(US_STACKBUF_SIZE), - fArray(fStackBuffer), + fShortLength(0), fFlags(kShortString) { setTo(that, srcStart, srcLength); @@ -401,8 +341,6 @@ UnicodeString::clone() const { UBool UnicodeString::allocate(int32_t capacity) { if(capacity <= US_STACKBUF_SIZE) { - fArray = fStackBuffer; - fCapacity = US_STACKBUF_SIZE; fFlags = kShortString; } else { // count bytes for the refCounter and the string capacity, and @@ -415,12 +353,13 @@ UnicodeString::allocate(int32_t capacity) { *array++ = 1; // have fArray point to the first UChar - fArray = (UChar *)array; - fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); + fUnion.fFields.fArray = (UChar *)array; + fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); fFlags = kLongString; } else { - fLength = 0; - fCapacity = 0; + fShortLength = 0; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; fFlags = kIsBogus; return FALSE; } @@ -467,40 +406,38 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { // delete the current contents releaseArray(); - // we always copy the length - fLength = src.fLength; - if(fLength == 0) { + if(src.isEmpty()) { // empty string - use the stack buffer - fArray = fStackBuffer; - fCapacity = US_STACKBUF_SIZE; - fFlags = kShortString; + setToEmpty(); return *this; } + // we always copy the length + int32_t srcLength = src.length(); + setLength(srcLength); + // fLength>0 and not an "open" src.getBuffer(minCapacity) switch(src.fFlags) { case kShortString: // short string using the stack buffer, do the same - fArray = fStackBuffer; - fCapacity = US_STACKBUF_SIZE; fFlags = kShortString; - uprv_memcpy(fStackBuffer, src.fArray, fLength * U_SIZEOF_UCHAR); + uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, fShortLength * U_SIZEOF_UCHAR); break; case kLongString: // src uses a refCounted string buffer, use that buffer with refCount // src is const, use a cast - we don't really change it ((UnicodeString &)src).addRef(); // copy all fields, share the reference-counted buffer - fArray = src.fArray; - fCapacity = src.fCapacity; + fUnion.fFields.fArray = src.fUnion.fFields.fArray; + fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; fFlags = src.fFlags; break; case kReadonlyAlias: if(fastCopy) { // src is a readonly alias, do the same // -> maintain the readonly alias as such - fArray = src.fArray; - fCapacity = src.fCapacity; + fUnion.fFields.fArray = src.fUnion.fFields.fArray; + fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; fFlags = src.fFlags; break; } @@ -508,17 +445,17 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { // -> allocate a new buffer and copy the contents case kWritableAlias: // src is a writable alias; we make a copy of that instead - if(allocate(fLength)) { - uprv_memcpy(fArray, src.fArray, fLength * U_SIZEOF_UCHAR); + if(allocate(srcLength)) { + uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); break; } // if there is not enough memory, then fall through to setting to bogus default: // if src is bogus, set ourselves to bogus // do not call setToBogus() here because fArray and fFlags are not consistent here - fArray = 0; - fLength = 0; - fCapacity = 0; + fShortLength = 0; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; fFlags = kIsBogus; break; } @@ -531,17 +468,25 @@ UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { //======================================== UnicodeString UnicodeString::unescape() const { - UnicodeString result; - for (int32_t i=0; i> 15 | 1); @@ -661,52 +606,6 @@ UnicodeString::doCompareCodePointOrder(int32_t start, } } -int8_t -UnicodeString::doCaseCompare(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const -{ - // compare illegal string values - // treat const UChar *srcChars==NULL as an empty string - if(isBogus()) { - return -1; - } - - // pin indices to legal values - pinIndices(start, length); - - if(srcChars == NULL) { - srcStart = srcLength = 0; - } - - // get the correct pointer - const UChar *chars = getArrayStart(); - - chars += start; - srcChars += srcStart; - - if(chars != srcChars) { - UErrorCode errorCode=U_ZERO_ERROR; - int32_t result=unorm_cmpEquivFold(chars, length, srcChars, srcLength, - options|U_COMPARE_IGNORE_CASE, &errorCode); - if(result!=0) { - return (int8_t)(result >> 24 | 1); - } - } else { - // get the srcLength if necessary - if(srcLength < 0) { - srcLength = u_strlen(srcChars + srcStart); - } - if(length != srcLength) { - return (int8_t)((length - srcLength) >> 24 | 1); - } - } - return 0; -} - int32_t UnicodeString::getLength() const { return length(); @@ -726,29 +625,31 @@ int32_t UnicodeString::countChar32(int32_t start, int32_t length) const { pinIndices(start, length); // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL - return u_countChar32(fArray+start, length); + return u_countChar32(getArrayStart()+start, length); } UBool UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { pinIndices(start, length); // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL - return u_strHasMoreChar32Than(fArray+start, length, number); + return u_strHasMoreChar32Than(getArrayStart()+start, length, number); } int32_t UnicodeString::moveIndex32(int32_t index, int32_t delta) const { // pin index + int32_t len = length(); if(index<0) { index=0; - } else if(index>fLength) { - index=fLength; + } else if(index>len) { + index=len; } + const UChar *array = getArrayStart(); if(delta>0) { - UTF_FWD_N(fArray, index, fLength, delta); + UTF_FWD_N(array, index, len, delta); } else { - UTF_BACK_N(fArray, 0, index, -delta); + UTF_BACK_N(array, 0, index, -delta); } return index; @@ -764,26 +665,60 @@ UnicodeString::doExtract(int32_t start, pinIndices(start, length); // do not copy anything if we alias dst itself - if(fArray + start != dst + dstStart) { - us_arrayCopy(getArrayStart(), start, dst, dstStart, length); + const UChar *array = getArrayStart(); + if(array + start != dst + dstStart) { + us_arrayCopy(array, start, dst, dstStart, length); } } int32_t UnicodeString::extract(UChar *dest, int32_t destCapacity, UErrorCode &errorCode) const { + int32_t len = length(); if(U_SUCCESS(errorCode)) { if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; } else { - if(fLength>0 && fLength<=destCapacity && fArray!=dest) { - uprv_memcpy(dest, fArray, fLength*U_SIZEOF_UCHAR); + const UChar *array = getArrayStart(); + if(len>0 && len<=destCapacity && array!=dest) { + uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); } - return u_terminateUChars(dest, destCapacity, fLength, &errorCode); + return u_terminateUChars(dest, destCapacity, len, &errorCode); } } - return fLength; + return len; +} + +int32_t +UnicodeString::extract(int32_t start, + int32_t length, + char *target, + int32_t targetCapacity, + enum EInvariant) const +{ + // if the arguments are illegal, then do nothing + if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { + return 0; + } + + // pin the indices to legal values + pinIndices(start, length); + + if(length <= targetCapacity) { + u_UCharsToChars(getArrayStart() + start, target, length); + } + UErrorCode status = U_ZERO_ERROR; + return u_terminateChars(target, targetCapacity, length, &status); +} + +void +UnicodeString::extractBetween(int32_t start, + int32_t limit, + UnicodeString& target) const { + pinIndex(start); + pinIndex(limit); + doExtract(start, limit - start, target); } int32_t @@ -806,11 +741,12 @@ UnicodeString::indexOf(const UChar *srcChars, pinIndices(start, length); // find the first occurrence of the substring - const UChar *match = u_strFindFirst(fArray + start, length, srcChars + srcStart, srcLength); + const UChar *array = getArrayStart(); + const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); if(match == NULL) { return -1; } else { - return match - fArray; + return (int32_t)(match - array); } } @@ -823,11 +759,12 @@ UnicodeString::doIndexOf(UChar c, pinIndices(start, length); // find the first occurrence of c - const UChar *match = u_memchr(fArray + start, c, length); + const UChar *array = getArrayStart(); + const UChar *match = u_memchr(array + start, c, length); if(match == NULL) { return -1; } else { - return match - fArray; + return (int32_t)(match - array); } } @@ -839,11 +776,12 @@ UnicodeString::doIndexOf(UChar32 c, pinIndices(start, length); // find the first occurrence of c - const UChar *match = u_memchr32(fArray + start, c, length); + const UChar *array = getArrayStart(); + const UChar *match = u_memchr32(array + start, c, length); if(match == NULL) { return -1; } else { - return match - fArray; + return (int32_t)(match - array); } } @@ -867,11 +805,12 @@ UnicodeString::lastIndexOf(const UChar *srcChars, pinIndices(start, length); // find the last occurrence of the substring - const UChar *match = u_strFindLast(fArray + start, length, srcChars + srcStart, srcLength); + const UChar *array = getArrayStart(); + const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); if(match == NULL) { return -1; } else { - return match - fArray; + return (int32_t)(match - array); } } @@ -888,11 +827,12 @@ UnicodeString::doLastIndexOf(UChar c, pinIndices(start, length); // find the last occurrence of c - const UChar *match = u_memrchr(fArray + start, c, length); + const UChar *array = getArrayStart(); + const UChar *match = u_memrchr(array + start, c, length); if(match == NULL) { return -1; } else { - return match - fArray; + return (int32_t)(match - array); } } @@ -904,11 +844,12 @@ UnicodeString::doLastIndexOf(UChar32 c, pinIndices(start, length); // find the last occurrence of c - const UChar *match = u_memrchr32(fArray + start, c, length); + const UChar *array = getArrayStart(); + const UChar *match = u_memrchr32(array + start, c, length); if(match == NULL) { return -1; } else { - return match - fArray; + return (int32_t)(match - array); } } @@ -960,8 +901,9 @@ UnicodeString::setToBogus() { releaseArray(); - fArray = 0; - fCapacity = fLength = 0; + fShortLength = 0; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; fFlags = kIsBogus; } @@ -969,10 +911,7 @@ UnicodeString::setToBogus() void UnicodeString::unBogus() { if(fFlags & kIsBogus) { - fArray = fStackBuffer; - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fFlags = kShortString; + setToEmpty(); } } @@ -990,10 +929,7 @@ UnicodeString::setTo(UBool isTerminated, if(text == NULL) { // treat as an empty string, do not alias releaseArray(); - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fArray = fStackBuffer; - fFlags = kShortString; + setToEmpty(); return *this; } @@ -1007,15 +943,11 @@ UnicodeString::setTo(UBool isTerminated, releaseArray(); - fArray = (UChar *)text; - if(textLength != -1) { - fLength = textLength; - fCapacity = isTerminated ? fLength + 1 : fLength; - } else { + if(textLength == -1) { // text is terminated, or else it would have failed the above test - fLength = u_strlen(text); - fCapacity = fLength + 1; + textLength = u_strlen(text); } + setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); fFlags = kReadonlyAlias; return *this; @@ -1034,23 +966,25 @@ UnicodeString::setTo(UChar *buffer, if(buffer == NULL) { // treat as an empty string, do not alias releaseArray(); - fLength = 0; - fCapacity = US_STACKBUF_SIZE; - fArray = fStackBuffer; - fFlags = kShortString; + setToEmpty(); return *this; } - if(buffLength < 0 || buffLength > buffCapacity) { + if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { setToBogus(); return *this; + } else if(buffLength == -1) { + // buffLength = u_strlen(buff); but do not look beyond buffCapacity + const UChar *p = buffer, *limit = buffer + buffCapacity; + while(p != limit && *p != 0) { + ++p; + } + buffLength = (int32_t)(p - buffer); } releaseArray(); - fArray = buffer; - fLength = buffLength; - fCapacity = buffCapacity; + setArray(buffer, buffLength, buffCapacity); fFlags = kWritableAlias; return *this; } @@ -1059,164 +993,15 @@ UnicodeString& UnicodeString::setCharAt(int32_t offset, UChar c) { - if(cloneArrayIfNeeded() && fLength > 0) { + int32_t len = length(); + if(cloneArrayIfNeeded() && len > 0) { if(offset < 0) { offset = 0; - } else if(offset >= fLength) { - offset = fLength - 1; + } else if(offset >= len) { + offset = len - 1; } - fArray[offset] = c; - } - return *this; -} - -/* - * Implement argument checking and buffer handling - * for string case mapping as a common function. - */ -enum { - TO_LOWER, - TO_UPPER, - TO_TITLE, - FOLD_CASE -}; - -UnicodeString & -UnicodeString::toLower() { - return caseMap(0, Locale::getDefault(), 0, TO_LOWER); -} - -UnicodeString & -UnicodeString::toLower(const Locale &locale) { - return caseMap(0, locale, 0, TO_LOWER); -} - -UnicodeString & -UnicodeString::toUpper() { - return caseMap(0, Locale::getDefault(), 0, TO_UPPER); -} - -UnicodeString & -UnicodeString::toUpper(const Locale &locale) { - return caseMap(0, locale, 0, TO_UPPER); -} - -#if !UCONFIG_NO_BREAK_ITERATION - -UnicodeString & -UnicodeString::toTitle(BreakIterator *titleIter) { - return caseMap(titleIter, Locale::getDefault(), 0, TO_TITLE); -} - -UnicodeString & -UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { - return caseMap(titleIter, locale, 0, TO_TITLE); -} - -#endif - -UnicodeString & -UnicodeString::foldCase(uint32_t options) { - return caseMap(0, Locale::getDefault(), options, FOLD_CASE); -} - -UnicodeString & -UnicodeString::caseMap(BreakIterator *titleIter, - const Locale& locale, - uint32_t options, - int32_t toWhichCase) { - if(fLength <= 0) { - // nothing to do - return *this; - } - - // We need to allocate a new buffer for the internal string case mapping function. - // This is very similar to how doReplace() below keeps the old array pointer - // and deletes the old array itself after it is done. - // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. - UChar *oldArray = fArray; - int32_t oldLength = fLength; - int32_t *bufferToDelete = 0; - - // Make sure that if the string is in fStackBuffer we do not overwrite it! - int32_t capacity; - if(fLength <= US_STACKBUF_SIZE) { - if(fArray == fStackBuffer) { - capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer - } else { - capacity = US_STACKBUF_SIZE; - } - } else { - capacity = fLength + 20; - } - if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { - return *this; - } - - UErrorCode errorCode; - -#if !UCONFIG_NO_BREAK_ITERATION - // set up the titlecasing break iterator - UBreakIterator *cTitleIter = 0; - - if(toWhichCase == TO_TITLE) { - if(titleIter != 0) { - cTitleIter = (UBreakIterator *)titleIter; - } else { - errorCode = U_ZERO_ERROR; - cTitleIter = ubrk_open(UBRK_WORD, locale.getName(), - oldArray, oldLength, - &errorCode); - if(U_FAILURE(errorCode)) { - uprv_free(bufferToDelete); - setToBogus(); - return *this; - } - } - } -#endif - - // Case-map, and if the result is too long, then reallocate and repeat. - do { - errorCode = U_ZERO_ERROR; - if(toWhichCase==TO_LOWER) { - fLength = u_internalStrToLower(fArray, fCapacity, - oldArray, oldLength, - 0, oldLength, - locale.getName(), - &errorCode); - } else if(toWhichCase==TO_UPPER) { - fLength = u_internalStrToUpper(fArray, fCapacity, - oldArray, oldLength, - locale.getName(), - &errorCode); -#if !UCONFIG_NO_BREAK_ITERATION - } else if(toWhichCase==TO_TITLE) { - fLength = u_internalStrToTitle(fArray, fCapacity, - oldArray, oldLength, - cTitleIter, locale.getName(), - &errorCode); -#endif - } else { - fLength = u_internalStrFoldCase(fArray, fCapacity, - oldArray, oldLength, - options, - &errorCode); - } - } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE)); - -#if !UCONFIG_NO_BREAK_ITERATION - if(cTitleIter != 0 && titleIter == 0) { - ubrk_close(cTitleIter); - } -#endif - - if (bufferToDelete) { - uprv_free(bufferToDelete); - } - if(U_FAILURE(errorCode)) { - setToBogus(); + getArrayStart()[offset] = c; } return *this; } @@ -1248,7 +1033,7 @@ UnicodeString::doReplace(int32_t start, int32_t srcStart, int32_t srcLength) { - if(isBogus()) { + if(!isWritable()) { return *this; } @@ -1259,20 +1044,45 @@ UnicodeString::doReplace(int32_t start, srcLength = u_strlen(srcChars + srcStart); } - int32_t *bufferToDelete = 0; + int32_t oldLength = this->length(); - // the following may change fArray but will not copy the current contents; - // therefore we need to keep the current fArray - UChar *oldArray = fArray; - int32_t oldLength = fLength; + // calculate the size of the string after the replace + int32_t newSize; + + // optimize append() onto a large-enough, owned string + if(start >= oldLength) { + newSize = oldLength + srcLength; + if(newSize <= getCapacity() && isBufferWritable()) { + us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength); + setLength(newSize); + return *this; + } else { + // pin the indices to legal values + start = oldLength; + length = 0; + } + } else { + // pin the indices to legal values + pinIndices(start, length); - // pin the indices to legal values - pinIndices(start, length); + newSize = oldLength - length + srcLength; + } - // calculate the size of the string after the replace - int32_t newSize = oldLength - length + srcLength; + // the following may change fArray but will not copy the current contents; + // therefore we need to keep the current fArray + UChar oldStackBuffer[US_STACKBUF_SIZE]; + UChar *oldArray; + if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) { + // copy the stack buffer contents because it will be overwritten with + // fUnion.fFields values + u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength); + oldArray = oldStackBuffer; + } else { + oldArray = getArrayStart(); + } // clone our array and allocate a bigger array if needed + int32_t *bufferToDelete = 0; if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize, FALSE, &bufferToDelete) ) { @@ -1281,23 +1091,24 @@ UnicodeString::doReplace(int32_t start, // now do the replace - if(fArray != oldArray) { + UChar *newArray = getArrayStart(); + if(newArray != oldArray) { // if fArray changed, then we need to copy everything except what will change - us_arrayCopy(oldArray, 0, fArray, 0, start); + us_arrayCopy(oldArray, 0, newArray, 0, start); us_arrayCopy(oldArray, start + length, - fArray, start + srcLength, + newArray, start + srcLength, oldLength - (start + length)); } else if(length != srcLength) { // fArray did not change; copy only the portion that isn't changing, leaving a hole us_arrayCopy(oldArray, start + length, - fArray, start + srcLength, + newArray, start + srcLength, oldLength - (start + length)); } // now fill in the hole with the new string - us_arrayCopy(srcChars, srcStart, getArrayStart(), start, srcLength); + us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); - fLength = newSize; + setLength(newSize); // delayed delete in case srcChars == fArray when we started, and // to keep oldArray alive for the above operations @@ -1327,9 +1138,12 @@ UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { return; // Nothing to do; avoid bogus malloc call } UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); - extractBetween(start, limit, text, 0); - insert(dest, text, 0, limit - start); - uprv_free(text); + // Check to make sure text is not null. + if (text != NULL) { + extractBetween(start, limit, text, 0); + insert(dest, text, 0, limit - start); + uprv_free(text); + } } /** @@ -1353,7 +1167,7 @@ UnicodeString& UnicodeString::doReverse(int32_t start, int32_t length) { - if(fLength <= 1 || !cloneArrayIfNeeded()) { + if(this->length() <= 1 || !cloneArrayIfNeeded()) { return *this; } @@ -1361,7 +1175,7 @@ UnicodeString::doReverse(int32_t start, pinIndices(start, length); UChar *left = getArrayStart() + start; - UChar *right = getArrayStart() + start + length; + UChar *right = left + length; UChar swap; UBool hasSupplementary = FALSE; @@ -1376,7 +1190,7 @@ UnicodeString::doReverse(int32_t start, UChar swap2; left = getArrayStart() + start; - right = getArrayStart() + start + length - 1; // -1 so that we can look at *(left+1) if left= targetLength || !cloneArrayIfNeeded(targetLength)) { + int32_t oldLength = length(); + if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { return FALSE; } else { // move contents up by padding width - int32_t start = targetLength - fLength; - us_arrayCopy(fArray, 0, fArray, start, fLength); + UChar *array = getArrayStart(); + int32_t start = targetLength - oldLength; + us_arrayCopy(array, 0, array, start, oldLength); // fill in padding character while(--start >= 0) { - fArray[start] = padChar; + array[start] = padChar; } - fLength = targetLength; + setLength(targetLength); return TRUE; } } @@ -1414,66 +1230,21 @@ UBool UnicodeString::padTrailing(int32_t targetLength, UChar padChar) { - if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { + int32_t oldLength = length(); + if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { return FALSE; } else { // fill in padding character + UChar *array = getArrayStart(); int32_t length = targetLength; - while(--length >= fLength) { - fArray[length] = padChar; + while(--length >= oldLength) { + array[length] = padChar; } - fLength = targetLength; + setLength(targetLength); return TRUE; } } -UnicodeString& -UnicodeString::trim() -{ - if(isBogus()) { - return *this; - } - - UChar32 c; - int32_t i = fLength, length; - - // first cut off trailing white space - for(;;) { - length = i; - if(i <= 0) { - break; - } - UTF_PREV_CHAR(fArray, 0, i, c); - if(!(c == 0x20 || u_isWhitespace(c))) { - break; - } - } - if(length < fLength) { - fLength = length; - } - - // find leading white space - int32_t start; - i = 0; - for(;;) { - start = i; - if(i >= length) { - break; - } - UTF_NEXT_CHAR(fArray, i, length, c); - if(!(c == 0x20 || u_isWhitespace(c))) { - break; - } - } - - // move string forward over leading white space - if(start > 0) { - doReplace(0, start, 0, 0, 0); - } - - return *this; -} - //======================================== // Hashing //======================================== @@ -1482,279 +1253,13 @@ UnicodeString::doHashCode() const { /* Delegate hash computation to uhash. This makes UnicodeString * hashing consistent with UChar* hashing. */ - int32_t hashCode = uhash_hashUCharsN(getArrayStart(), fLength); + int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length()); if (hashCode == kInvalidHashCode) { hashCode = kEmptyHashCode; } return hashCode; } -//======================================== -// Codeset conversion -//======================================== -int32_t -UnicodeString::extract(int32_t start, - int32_t length, - char *target, - uint32_t dstSize, - const char *codepage) const -{ - // if the arguments are illegal, then do nothing - if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { - return 0; - } - - // pin the indices to legal values - pinIndices(start, length); - - // create the converter - UConverter *converter; - UErrorCode status = U_ZERO_ERROR; - - // just write the NUL if the string length is 0 - if(length == 0) { - if(dstSize >= 0x80000000) { - // careful: dstSize is unsigned! (0xffffffff means "unlimited") - // make sure that the NUL-termination works (takes int32_t) - dstSize=0x7fffffff; - } - return u_terminateChars(target, dstSize, 0, &status); - } - - // if the codepage is the default, use our cache - // if it is an empty string, then use the "invariant character" conversion - if (codepage == 0) { - converter = u_getDefaultConverter(&status); - } else if (*codepage == 0) { - // use the "invariant characters" conversion - int32_t destLength; - // careful: dstSize is unsigned! (0xffffffff means "unlimited") - if(dstSize >= 0x80000000) { - destLength = length; - // make sure that the NUL-termination works (takes int32_t) - dstSize=0x7fffffff; - } else if(length <= (int32_t)dstSize) { - destLength = length; - } else { - destLength = (int32_t)dstSize; - } - u_UCharsToChars(getArrayStart() + start, target, destLength); - return u_terminateChars(target, (int32_t)dstSize, length, &status); - } else { - converter = ucnv_open(codepage, &status); - } - - length = doExtract(start, length, target, (int32_t)dstSize, converter, status); - - // close the converter - if (codepage == 0) { - u_releaseDefaultConverter(converter); - } else { - ucnv_close(converter); - } - - return length; -} - -int32_t -UnicodeString::extract(char *dest, int32_t destCapacity, - UConverter *cnv, - UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - return 0; - } - - if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - // nothing to do? - if(fLength<=0) { - return u_terminateChars(dest, destCapacity, 0, &errorCode); - } - - // get the converter - UBool isDefaultConverter; - if(cnv==0) { - isDefaultConverter=TRUE; - cnv=u_getDefaultConverter(&errorCode); - if(U_FAILURE(errorCode)) { - return 0; - } - } else { - isDefaultConverter=FALSE; - ucnv_resetFromUnicode(cnv); - } - - // convert - int32_t length=doExtract(0, fLength, dest, destCapacity, cnv, errorCode); - - // release the converter - if(isDefaultConverter) { - u_releaseDefaultConverter(cnv); - } - - return length; -} - -void -UnicodeString::extractBetween(int32_t start, - int32_t limit, - UnicodeString& target) const -{ doExtract(start, limit - start, target); } - -int32_t -UnicodeString::doExtract(int32_t start, int32_t length, - char *dest, int32_t destCapacity, - UConverter *cnv, - UErrorCode &errorCode) const { - if(U_FAILURE(errorCode)) { - if(destCapacity!=0) { - *dest=0; - } - return 0; - } - - const UChar *src=fArray+start, *srcLimit=src+length; - char *originalDest=dest; - const char *destLimit; - - if(destCapacity==0) { - destLimit=dest=0; - } else if(destCapacity==-1) { - // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used. - destLimit=(char*)U_MAX_PTR(dest); - // for NUL-termination, translate into highest int32_t - destCapacity=0x7fffffff; - } else { - destLimit=dest+destCapacity; - } - - // perform the conversion - ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); - length=(int32_t)(dest-originalDest); - - // if an overflow occurs, then get the preflighting length - if(errorCode==U_BUFFER_OVERFLOW_ERROR) { - char buffer[1024]; - - destLimit=buffer+sizeof(buffer); - do { - dest=buffer; - errorCode=U_ZERO_ERROR; - ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); - length+=(int32_t)(dest-buffer); - } while(errorCode==U_BUFFER_OVERFLOW_ERROR); - } - - return u_terminateChars(originalDest, destCapacity, length, &errorCode); -} - -void -UnicodeString::doCodepageCreate(const char *codepageData, - int32_t dataLength, - const char *codepage) -{ - // if there's nothing to convert, do nothing - if(codepageData == 0 || dataLength <= 0) { - return; - } - - UErrorCode status = U_ZERO_ERROR; - - // create the converter - // if the codepage is the default, use our cache - // if it is an empty string, then use the "invariant character" conversion - UConverter *converter = (codepage == 0 ? - u_getDefaultConverter(&status) : - *codepage == 0 ? - 0 : - ucnv_open(codepage, &status)); - - // if we failed, set the appropriate flags and return - if(U_FAILURE(status)) { - setToBogus(); - return; - } - - // perform the conversion - if(converter == 0) { - // use the "invariant characters" conversion - if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) { - u_charsToUChars(codepageData, getArrayStart(), dataLength); - fLength = dataLength; - } else { - setToBogus(); - } - return; - } - - // convert using the real converter - doCodepageCreate(codepageData, dataLength, converter, status); - if(U_FAILURE(status)) { - setToBogus(); - } - - // close the converter - if(codepage == 0) { - u_releaseDefaultConverter(converter); - } else { - ucnv_close(converter); - } -} - -void -UnicodeString::doCodepageCreate(const char *codepageData, - int32_t dataLength, - UConverter *converter, - UErrorCode &status) { - if(U_FAILURE(status)) { - return; - } - - // set up the conversion parameters - const char *mySource = codepageData; - const char *mySourceEnd = mySource + dataLength; - UChar *myTarget; - - // estimate the size needed: - // 1.25 UChar's per source byte should cover most cases - int32_t arraySize = dataLength + (dataLength >> 2); - - // we do not care about the current contents - UBool doCopyArray = FALSE; - for(;;) { - if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) { - setToBogus(); - break; - } - - // perform the conversion - myTarget = fArray + fLength; - ucnv_toUnicode(converter, &myTarget, fArray + fCapacity, - &mySource, mySourceEnd, 0, TRUE, &status); - - // update the conversion parameters - fLength = (int32_t)(myTarget - fArray); - - // allocate more space and copy data, if needed - if(status == U_BUFFER_OVERFLOW_ERROR) { - // reset the error code - status = U_ZERO_ERROR; - - // keep the previous conversion results - doCopyArray = TRUE; - - // estimate the new size needed, larger than before - // try 2 UChar's per remaining source byte - arraySize = (int32_t)(fLength + 2 * (mySourceEnd - mySource)); - } else { - break; - } - } -} - //======================================== // External Buffer //======================================== @@ -1763,8 +1268,8 @@ UChar * UnicodeString::getBuffer(int32_t minCapacity) { if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { fFlags|=kOpenGetBuffer; - fLength=0; - return fArray; + fShortLength=0; + return getArrayStart(); } else { return 0; } @@ -1774,18 +1279,18 @@ void UnicodeString::releaseBuffer(int32_t newLength) { if(fFlags&kOpenGetBuffer && newLength>=-1) { // set the new fLength + int32_t capacity=getCapacity(); if(newLength==-1) { // the new length is the string length, capped by fCapacity - const UChar *p=fArray, *limit=fArray+fCapacity; + const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; while(pcapacity) { + newLength=capacity; } + setLength(newLength); fFlags&=~kOpenGetBuffer; } } @@ -1802,13 +1307,13 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, // default parameters need to be static, therefore // the defaults are -1 to have convenience defaults if(newCapacity == -1) { - newCapacity = fCapacity; + newCapacity = getCapacity(); } // while a getBuffer(minCapacity) is "open", // prevent any modifications of the string by returning FALSE here // if the string is bogus, then only an assignment or similar can revive it - if((fFlags&(kOpenGetBuffer|kIsBogus))!=0) { + if(!isWritable()) { return FALSE; } @@ -1822,12 +1327,8 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, if(forceClone || fFlags & kBufferIsReadonly || fFlags & kRefCounted && refCount() > 1 || - newCapacity > fCapacity + newCapacity > getCapacity() ) { - // save old values - UChar *array = fArray; - uint16_t flags = fFlags; - // check growCapacity for default value and use of the stack buffer if(growCapacity == -1) { growCapacity = newCapacity; @@ -1835,25 +1336,46 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, growCapacity = US_STACKBUF_SIZE; } + // save old values + UChar oldStackBuffer[US_STACKBUF_SIZE]; + UChar *oldArray; + uint8_t flags = fFlags; + + if(flags&kUsingStackBuffer) { + if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { + // copy the stack buffer contents because it will be overwritten with + // fUnion.fFields values + us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength); + oldArray = oldStackBuffer; + } else { + oldArray = 0; // no need to copy from stack buffer to itself + } + } else { + oldArray = fUnion.fFields.fArray; + } + // allocate a new array if(allocate(growCapacity) || newCapacity < growCapacity && allocate(newCapacity) ) { - if(doCopyArray) { + if(doCopyArray && oldArray != 0) { // copy the contents // do not copy more than what fits - it may be smaller than before - if(fCapacity < fLength) { - fLength = fCapacity; + int32_t minLength = length(); + newCapacity = getCapacity(); + if(newCapacity < minLength) { + minLength = newCapacity; + setLength(minLength); } - us_arrayCopy(array, 0, fArray, 0, fLength); + us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); } else { - fLength = 0; + fShortLength = 0; } // release the old array if(flags & kRefCounted) { // the array is refCounted; decrement and release if 0 - int32_t *pRefCount = ((int32_t *)array - 1); + int32_t *pRefCount = ((int32_t *)oldArray - 1); if(umtx_atomic_dec(pRefCount) == 0) { if(pBufferToDelete == 0) { uprv_free(pRefCount); @@ -1866,7 +1388,9 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, } else { // not enough memory for growCapacity and not even for the smaller newCapacity // reset the old values for setToBogus() to release the array - fArray = array; + if(!(flags&kUsingStackBuffer)) { + fUnion.fFields.fArray = oldArray; + } fFlags = flags; setToBogus(); return FALSE; @@ -1875,3 +1399,18 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, return TRUE; } U_NAMESPACE_END + +#ifdef U_STATIC_IMPLEMENTATION +/* +This should never be called. It is defined here to make sure that the +virtual vector deleting destructor is defined within unistr.cpp. +The vector deleting destructor is already a part of UObject, +but defining it here makes sure that it is included with this object file. +This makes sure that static library dependencies are kept to a minimum. +*/ +static void uprv_UnicodeStringDummy(void) { + U_NAMESPACE_USE + delete [] (new UnicodeString[2]); +} +#endif +