+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
-* Copyright (C) 1999-2013, International Business Machines Corporation and
+* Copyright (C) 1999-2016, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*
UChar *dst, int32_t dstStart, int32_t count)
{
if(count>0) {
- uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
+ uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
}
}
void
UnicodeString::releaseArray() {
- if((fFlags & kRefCounted) && removeRef() == 0) {
+ if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
uprv_free((int32_t *)fUnion.fFields.fArray - 1);
}
}
// The default constructor is inline in unistr.h.
-UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
- : fShortLength(0),
- fFlags(0)
-{
+UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
+ fUnion.fFields.fLengthAndFlags = 0;
if(count <= 0 || (uint32_t)c > 0x10ffff) {
// just allocate and do not do anything else
allocate(capacity);
- } else {
- // count > 0, allocate and fill the new string with count c's
- int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
+ } else if(c <= 0xffff) {
+ int32_t length = count;
if(capacity < length) {
capacity = length;
}
if(allocate(capacity)) {
UChar *array = getArrayStart();
- int32_t i = 0;
-
- // fill the new string with c
- if(unitCount == 1) {
- // fill with length UChars
- while(i < length) {
- array[i++] = (UChar)c;
- }
- } else {
- // get the code units for c
- UChar units[U16_MAX_LENGTH];
- U16_APPEND_UNSAFE(units, i, c);
-
- // now it must be i==unitCount
- i = 0;
-
- // for Unicode, unitCount can only be 1, 2, 3, or 4
- // 1 is handled above
- while(i < length) {
- int32_t unitIdx = 0;
- while(unitIdx < unitCount) {
- array[i++]=units[unitIdx++];
- }
- }
+ UChar unit = (UChar)c;
+ for(int32_t i = 0; i < length; ++i) {
+ array[i] = unit;
+ }
+ setLength(length);
+ }
+ } else { // supplementary code point, write surrogate pairs
+ if(count > (INT32_MAX / 2)) {
+ // We would get more than 2G UChars.
+ allocate(capacity);
+ return;
+ }
+ int32_t length = count * 2;
+ if(capacity < length) {
+ capacity = length;
+ }
+ if(allocate(capacity)) {
+ UChar *array = getArrayStart();
+ UChar lead = U16_LEAD(c);
+ UChar trail = U16_TRAIL(c);
+ for(int32_t i = 0; i < length; i += 2) {
+ array[i] = lead;
+ array[i + 1] = trail;
}
+ setLength(length);
}
- setLength(length);
}
}
-UnicodeString::UnicodeString(UChar ch)
- : fShortLength(1),
- fFlags(kShortString)
-{
- fUnion.fStackBuffer[0] = ch;
+UnicodeString::UnicodeString(UChar ch) {
+ fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
+ fUnion.fStackFields.fBuffer[0] = ch;
}
-UnicodeString::UnicodeString(UChar32 ch)
- : fShortLength(0),
- fFlags(kShortString)
-{
+UnicodeString::UnicodeString(UChar32 ch) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
int32_t i = 0;
UBool isError = FALSE;
- U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
+ U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
// We test isError so that the compiler does not complain that we don't.
// If isError then i==0 which is what we want anyway.
if(!isError) {
- fShortLength = (int8_t)i;
+ setShortLength(i);
}
}
-UnicodeString::UnicodeString(const UChar *text)
- : fShortLength(0),
- fFlags(kShortString)
-{
- doReplace(0, 0, text, 0, -1);
+UnicodeString::UnicodeString(const UChar *text) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ doAppend(text, 0, -1);
}
UnicodeString::UnicodeString(const UChar *text,
- int32_t textLength)
- : fShortLength(0),
- fFlags(kShortString)
-{
- doReplace(0, 0, text, 0, textLength);
+ int32_t textLength) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ doAppend(text, 0, textLength);
}
UnicodeString::UnicodeString(UBool isTerminated,
- const UChar *text,
- int32_t textLength)
- : fShortLength(0),
- fFlags(kReadonlyAlias)
-{
+ ConstChar16Ptr textPtr,
+ int32_t textLength) {
+ fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
+ const UChar *text = textPtr;
if(text == NULL) {
// treat as an empty string, do not alias
setToEmpty();
// text is terminated, or else it would have failed the above test
textLength = u_strlen(text);
}
- setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
+ setArray(const_cast<UChar *>(text), textLength,
+ isTerminated ? textLength + 1 : textLength);
}
}
UnicodeString::UnicodeString(UChar *buff,
int32_t buffLength,
- int32_t buffCapacity)
- : fShortLength(0),
- fFlags(kWritableAlias)
-{
+ int32_t buffCapacity) {
+ fUnion.fFields.fLengthAndFlags = kWritableAlias;
if(buff == NULL) {
// treat as an empty string, do not alias
setToEmpty();
}
}
-UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
- : fShortLength(0),
- fFlags(kShortString)
-{
+UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
if(src==NULL) {
// treat as an empty string
} else {
#if U_CHARSET_IS_UTF8
-UnicodeString::UnicodeString(const char *codepageData)
- : fShortLength(0),
- fFlags(kShortString) {
+UnicodeString::UnicodeString(const char *codepageData) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
if(codepageData != 0) {
setToUTF8(codepageData);
}
}
-UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
- : fShortLength(0),
- fFlags(kShortString) {
+UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
// if there's nothing to convert, do nothing
if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
return;
// else see unistr_cnv.cpp
#endif
-UnicodeString::UnicodeString(const UnicodeString& that)
- : Replaceable(),
- fShortLength(0),
- fFlags(kShortString)
-{
+UnicodeString::UnicodeString(const UnicodeString& that) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
copyFrom(that);
}
+UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
+ copyFieldsFrom(src, TRUE);
+}
+
UnicodeString::UnicodeString(const UnicodeString& that,
- int32_t srcStart)
- : Replaceable(),
- fShortLength(0),
- fFlags(kShortString)
-{
+ int32_t srcStart) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
setTo(that, srcStart);
}
UnicodeString::UnicodeString(const UnicodeString& that,
int32_t srcStart,
- int32_t srcLength)
- : Replaceable(),
- fShortLength(0),
- fFlags(kShortString)
-{
+ int32_t srcLength) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
setTo(that, srcStart, srcLength);
}
// array allocation
//========================================
+namespace {
+
+const int32_t kGrowSize = 128;
+
+// The number of bytes for one int32_t reference counter and capacity UChars
+// must fit into a 32-bit size_t (at least when on a 32-bit platform).
+// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
+// and round up to a multiple of 16 bytes.
+// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
+// (With more complicated checks we could go up to 0x7ffffffd without rounding up,
+// but that does not seem worth it.)
+const int32_t kMaxCapacity = 0x7ffffff5;
+
+int32_t getGrowCapacity(int32_t newLength) {
+ int32_t growSize = (newLength >> 2) + kGrowSize;
+ if(growSize <= (kMaxCapacity - newLength)) {
+ return newLength + growSize;
+ } else {
+ return kMaxCapacity;
+ }
+}
+
+} // namespace
+
UBool
UnicodeString::allocate(int32_t capacity) {
if(capacity <= US_STACKBUF_SIZE) {
- fFlags = kShortString;
- } else {
- // count bytes for the refCounter and the string capacity, and
- // round up to a multiple of 16; then divide by 4 and allocate int32_t's
- // to be safely aligned for the refCount
- // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
- int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
- int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
- if(array != 0) {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+ return TRUE;
+ }
+ if(capacity <= kMaxCapacity) {
+ ++capacity; // for the NUL
+ // Switch to size_t which is unsigned so that we can allocate up to 4GB.
+ // Reference counter + UChars.
+ size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
+ // Round up to a multiple of 16.
+ numBytes = (numBytes + 15) & ~15;
+ int32_t *array = (int32_t *) uprv_malloc(numBytes);
+ if(array != NULL) {
// set initial refCount and point behind the refCount
*array++ = 1;
+ numBytes -= sizeof(int32_t);
// have fArray point to the first UChar
fUnion.fFields.fArray = (UChar *)array;
- fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
- fFlags = kLongString;
- } else {
- fShortLength = 0;
- fUnion.fFields.fArray = 0;
- fUnion.fFields.fCapacity = 0;
- fFlags = kIsBogus;
- return FALSE;
+ fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
+ fUnion.fFields.fLengthAndFlags = kLongString;
+ return TRUE;
}
}
- return TRUE;
+ fUnion.fFields.fLengthAndFlags = kIsBogus;
+ fUnion.fFields.fArray = 0;
+ fUnion.fFields.fCapacity = 0;
+ return FALSE;
}
//========================================
// Destructor
//========================================
+
+#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
+static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1
+static u_atomic_int32_t beyondCount(0);
+
+U_CAPI void unistr_printLengths() {
+ int32_t i;
+ for(i = 0; i <= 59; ++i) {
+ printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]);
+ }
+ int32_t beyond = beyondCount;
+ for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
+ beyond += finalLengthCounts[i];
+ }
+ printf(">59, %9d\n", beyond);
+}
+#endif
+
UnicodeString::~UnicodeString()
{
+#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
+ // Count lengths of strings at the end of their lifetime.
+ // Useful for discussion of a desirable stack buffer size.
+ // Count the contents length, not the optional NUL terminator nor further capacity.
+ // Ignore open-buffer strings and strings which alias external storage.
+ if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
+ if(hasShortLength()) {
+ umtx_atomic_inc(finalLengthCounts + getShortLength());
+ } else {
+ umtx_atomic_inc(&beyondCount);
+ }
+ }
+#endif
+
releaseArray();
}
// Factory methods
//========================================
-UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
+UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
UnicodeString result;
result.setToUTF8(utf8);
return result;
UnicodeString &
UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
// if assigning to ourselves, do nothing
- if(this == 0 || this == &src) {
+ if(this == &src) {
return *this;
}
// is the right side bogus?
- if(&src == 0 || src.isBogus()) {
+ if(src.isBogus()) {
setToBogus();
return *this;
}
return *this;
}
- // we always copy the length
- int32_t srcLength = src.length();
- setLength(srcLength);
-
// fLength>0 and not an "open" src.getBuffer(minCapacity)
- switch(src.fFlags) {
+ fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
+ switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
case kShortString:
// short string using the stack buffer, do the same
- fFlags = kShortString;
- uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
+ uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
+ getShortLength() * U_SIZEOF_UCHAR);
break;
case kLongString:
// src uses a refCounted string buffer, use that buffer with refCount
- // src is const, use a cast - we don't really change it
+ // src is const, use a cast - we don't actually change it
((UnicodeString &)src).addRef();
// copy all fields, share the reference-counted buffer
fUnion.fFields.fArray = src.fUnion.fFields.fArray;
fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
- fFlags = src.fFlags;
+ if(!hasShortLength()) {
+ fUnion.fFields.fLength = src.fUnion.fFields.fLength;
+ }
break;
case kReadonlyAlias:
if(fastCopy) {
// -> maintain the readonly alias as such
fUnion.fFields.fArray = src.fUnion.fFields.fArray;
fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
- fFlags = src.fFlags;
+ if(!hasShortLength()) {
+ fUnion.fFields.fLength = src.fUnion.fFields.fLength;
+ }
break;
}
// else if(!fastCopy) fall through to case kWritableAlias
// -> allocate a new buffer and copy the contents
- case kWritableAlias:
+ U_FALLTHROUGH;
+ case kWritableAlias: {
// src is a writable alias; we make a copy of that instead
+ int32_t srcLength = src.length();
if(allocate(srcLength)) {
- uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
+ u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
+ setLength(srcLength);
break;
}
// if there is not enough memory, then fall through to setting to bogus
+ U_FALLTHROUGH;
+ }
default:
// if src is bogus, set ourselves to bogus
- // do not call setToBogus() here because fArray and fFlags are not consistent here
- fShortLength = 0;
+ // do not call setToBogus() here because fArray and flags are not consistent here
+ fUnion.fFields.fLengthAndFlags = kIsBogus;
fUnion.fFields.fArray = 0;
fUnion.fFields.fCapacity = 0;
- fFlags = kIsBogus;
break;
}
return *this;
}
+UnicodeString &UnicodeString::operator=(UnicodeString &&src) U_NOEXCEPT {
+ // No explicit check for self move assignment, consistent with standard library.
+ // Self move assignment causes no crash nor leak but might make the object bogus.
+ releaseArray();
+ copyFieldsFrom(src, TRUE);
+ return *this;
+}
+
+// Same as move assignment except without memory management.
+void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
+ int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
+ if(lengthAndFlags & kUsingStackBuffer) {
+ // Short string using the stack buffer, copy the contents.
+ // Check for self assignment to prevent "overlap in memcpy" warnings,
+ // although it should be harmless to copy a buffer to itself exactly.
+ if(this != &src) {
+ uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
+ getShortLength() * U_SIZEOF_UCHAR);
+ }
+ } else {
+ // In all other cases, copy all fields.
+ fUnion.fFields.fArray = src.fUnion.fFields.fArray;
+ fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
+ if(!hasShortLength()) {
+ fUnion.fFields.fLength = src.fUnion.fFields.fLength;
+ }
+ if(setSrcToBogus) {
+ // Set src to bogus without releasing any memory.
+ src.fUnion.fFields.fLengthAndFlags = kIsBogus;
+ src.fUnion.fFields.fArray = NULL;
+ src.fUnion.fFields.fCapacity = 0;
+ }
+ }
+}
+
+void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
+ UnicodeString temp; // Empty short string: Known not to need releaseArray().
+ // Copy fields without resetting source values in between.
+ temp.copyFieldsFrom(*this, FALSE);
+ this->copyFieldsFrom(other, FALSE);
+ other.copyFieldsFrom(temp, FALSE);
+ // Set temp to an empty string so that other's memory is not released twice.
+ temp.fUnion.fFields.fLengthAndFlags = kShortString;
+}
+
//========================================
// Miscellaneous operations
//========================================
UnicodeString UnicodeString::unescape() const {
UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
+ if (result.isBogus()) {
+ return result;
+ }
const UChar *array = getBuffer();
int32_t len = length();
int32_t prev = 0;
}
int32_t
-UnicodeString::extract(UChar *dest, int32_t destCapacity,
+UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
UErrorCode &errorCode) const {
int32_t len = length();
if(U_SUCCESS(errorCode)) {
} else {
const UChar *array = getArrayStart();
if(len>0 && len<=destCapacity && array!=dest) {
- uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
+ u_memcpy(dest, array, len);
}
return u_terminateUChars(dest, destCapacity, len, &errorCode);
}
pinIndices(start, len);
const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
if(array==NULL) {
- array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string
+ array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string
len=-2; // bogus result string
}
return UnicodeString(FALSE, array + start, len);
{
releaseArray();
- fShortLength = 0;
+ fUnion.fFields.fLengthAndFlags = kIsBogus;
fUnion.fFields.fArray = 0;
fUnion.fFields.fCapacity = 0;
- fFlags = kIsBogus;
}
// turn a bogus string into an empty one
void
UnicodeString::unBogus() {
- if(fFlags & kIsBogus) {
+ if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
setToEmpty();
}
}
-const UChar *
+const char16_t *
UnicodeString::getTerminatedBuffer() {
if(!isWritable()) {
- return 0;
+ return nullptr;
}
UChar *array = getArrayStart();
int32_t len = length();
if(len < getCapacity()) {
- if(fFlags & kBufferIsReadonly) {
+ if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
// If len<capacity on a read-only alias, then array[len] is
// either the original NUL (if constructed with (TRUE, s, length))
// or one of the original string contents characters (if later truncated),
if(array[len] == 0) {
return array;
}
- } else if(((fFlags & kRefCounted) == 0 || refCount() == 1)) {
+ } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
// kRefCounted: Do not write the NUL if the buffer is shared.
// That is mostly safe, except when the length of one copy was modified
// without copy-on-write, e.g., via truncate(newLength) or remove(void).
return array;
}
}
- if(cloneArrayIfNeeded(len+1)) {
+ if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
array = getArrayStart();
array[len] = 0;
return array;
} else {
- return NULL;
+ return nullptr;
}
}
// setTo() analogous to the readonly-aliasing constructor with the same signature
UnicodeString &
UnicodeString::setTo(UBool isTerminated,
- const UChar *text,
+ ConstChar16Ptr textPtr,
int32_t textLength)
{
- if(fFlags & kOpenGetBuffer) {
+ if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
// do not modify a string that has an "open" getBuffer(minCapacity)
return *this;
}
+ const UChar *text = textPtr;
if(text == NULL) {
// treat as an empty string, do not alias
releaseArray();
// text is terminated, or else it would have failed the above test
textLength = u_strlen(text);
}
+ fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
-
- fFlags = kReadonlyAlias;
return *this;
}
UnicodeString::setTo(UChar *buffer,
int32_t buffLength,
int32_t buffCapacity) {
- if(fFlags & kOpenGetBuffer) {
+ if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
// do not modify a string that has an "open" getBuffer(minCapacity)
return *this;
}
releaseArray();
+ fUnion.fFields.fLengthAndFlags = kWritableAlias;
setArray(buffer, buffLength, buffCapacity);
- fFlags = kWritableAlias;
return *this;
}
-UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
+UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
unBogus();
int32_t length = utf8.length();
int32_t capacity;
UBool isError = FALSE;
U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
// We test isError so that the compiler does not complain that we don't.
- // If isError then _length==0 which turns the doReplace() into a no-op anyway.
- return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
+ // If isError then _length==0 which turns the doAppend() into a no-op anyway.
+ return isError ? *this : doAppend(buffer, 0, _length);
}
UnicodeString&
int32_t srcStart,
int32_t srcLength)
{
- if(!src.isBogus()) {
- // pin the indices to legal values
- src.pinIndices(srcStart, srcLength);
+ // pin the indices to legal values
+ src.pinIndices(srcStart, srcLength);
- // get the characters from src
- // and replace the range in ourselves with them
- return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
- } else {
- // remove the range
- return doReplace(start, length, 0, 0, 0);
- }
+ // get the characters from src
+ // and replace the range in ourselves with them
+ return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
}
UnicodeString&
int32_t oldLength = this->length();
// optimize (read-only alias).remove(0, start) and .remove(start, end)
- if((fFlags&kBufferIsReadonly) && srcLength == 0) {
+ if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
if(start == 0) {
// remove prefix by adjusting the array pointer
pinIndex(length);
}
}
+ if(start == oldLength) {
+ return doAppend(srcChars, srcStart, srcLength);
+ }
+
if(srcChars == 0) {
- srcStart = srcLength = 0;
- } else if(srcLength < 0) {
- // get the srcLength if necessary
- srcLength = u_strlen(srcChars + srcStart);
+ srcLength = 0;
+ } else {
+ // Perform all remaining operations relative to srcChars + srcStart.
+ // From this point forward, do not use srcStart.
+ srcChars += srcStart;
+ if (srcLength < 0) {
+ // get the srcLength if necessary
+ srcLength = u_strlen(srcChars);
+ }
}
- // calculate the size of the string after the replace
- int32_t newLength;
+ // pin the indices to legal values
+ pinIndices(start, length);
- // optimize append() onto a large-enough, owned string
- if(start >= oldLength) {
- if(srcLength == 0) {
- return *this;
- }
- newLength = oldLength + srcLength;
- if(newLength <= getCapacity() && isBufferWritable()) {
- UChar *oldArray = getArrayStart();
- // Do not copy characters when
- // UChar *buffer=str.getAppendBuffer(...);
- // is followed by
- // str.append(buffer, length);
- // or
- // str.appendString(buffer, length)
- // or similar.
- if(srcChars + srcStart != oldArray + start || start > oldLength) {
- us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
- }
- setLength(newLength);
+ // Calculate the size of the string after the replace.
+ // Avoid int32_t overflow.
+ int32_t newLength = oldLength - length;
+ if(srcLength > (INT32_MAX - newLength)) {
+ setToBogus();
+ return *this;
+ }
+ newLength += srcLength;
+
+ // Check for insertion into ourself
+ const UChar *oldArray = getArrayStart();
+ if (isBufferWritable() &&
+ oldArray < srcChars + srcLength &&
+ srcChars < oldArray + oldLength) {
+ // Copy into a new UnicodeString and start over
+ UnicodeString copy(srcChars, srcLength);
+ if (copy.isBogus()) {
+ setToBogus();
return *this;
- } else {
- // pin the indices to legal values
- start = oldLength;
- length = 0;
}
- } else {
- // pin the indices to legal values
- pinIndices(start, length);
-
- newLength = oldLength - length + srcLength;
+ return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
}
- // the following may change fArray but will not copy the current contents;
+ // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
// therefore we need to keep the current fArray
UChar oldStackBuffer[US_STACKBUF_SIZE];
- UChar *oldArray;
- if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
+ if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
// copy the stack buffer contents because it will be overwritten with
// fUnion.fFields values
- u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
+ u_memcpy(oldStackBuffer, oldArray, oldLength);
oldArray = oldStackBuffer;
- } else {
- oldArray = getArrayStart();
}
// clone our array and allocate a bigger array if needed
int32_t *bufferToDelete = 0;
- if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
+ if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
FALSE, &bufferToDelete)
) {
return *this;
}
// now fill in the hole with the new string
- us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
+ us_arrayCopy(srcChars, 0, newArray, start, srcLength);
setLength(newLength);
return *this;
}
+// Versions of doReplace() only for append() variants.
+// doReplace() and doAppend() optimize for different cases.
+
+UnicodeString&
+UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
+ if(srcLength == 0) {
+ return *this;
+ }
+
+ // pin the indices to legal values
+ src.pinIndices(srcStart, srcLength);
+ return doAppend(src.getArrayStart(), srcStart, srcLength);
+}
+
+UnicodeString&
+UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
+ if(!isWritable() || srcLength == 0 || srcChars == NULL) {
+ return *this;
+ }
+
+ // Perform all remaining operations relative to srcChars + srcStart.
+ // From this point forward, do not use srcStart.
+ srcChars += srcStart;
+
+ if(srcLength < 0) {
+ // get the srcLength if necessary
+ if((srcLength = u_strlen(srcChars)) == 0) {
+ return *this;
+ }
+ }
+
+ int32_t oldLength = length();
+ int32_t newLength = oldLength + srcLength;
+
+ // Check for append onto ourself
+ const UChar* oldArray = getArrayStart();
+ if (isBufferWritable() &&
+ oldArray < srcChars + srcLength &&
+ srcChars < oldArray + oldLength) {
+ // Copy into a new UnicodeString and start over
+ UnicodeString copy(srcChars, srcLength);
+ if (copy.isBogus()) {
+ setToBogus();
+ return *this;
+ }
+ return doAppend(copy.getArrayStart(), 0, srcLength);
+ }
+
+ // optimize append() onto a large-enough, owned string
+ if((newLength <= getCapacity() && isBufferWritable()) ||
+ cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
+ UChar *newArray = getArrayStart();
+ // Do not copy characters when
+ // UChar *buffer=str.getAppendBuffer(...);
+ // is followed by
+ // str.append(buffer, length);
+ // or
+ // str.appendString(buffer, length)
+ // or similar.
+ if(srcChars != newArray + oldLength) {
+ us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
+ }
+ setLength(newLength);
+ }
+ return *this;
+}
+
/**
* Replaceable API
*/
// External Buffer
//========================================
-UChar *
+char16_t *
UnicodeString::getBuffer(int32_t minCapacity) {
if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
- fFlags|=kOpenGetBuffer;
- fShortLength=0;
+ fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
+ setZeroLength();
return getArrayStart();
} else {
- return 0;
+ return nullptr;
}
}
void
UnicodeString::releaseBuffer(int32_t newLength) {
- if(fFlags&kOpenGetBuffer && newLength>=-1) {
+ if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
// set the new fLength
int32_t capacity=getCapacity();
if(newLength==-1) {
newLength=capacity;
}
setLength(newLength);
- fFlags&=~kOpenGetBuffer;
+ fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
}
}
* Return FALSE if memory could not be allocated.
*/
if(forceClone ||
- fFlags & kBufferIsReadonly ||
- (fFlags & kRefCounted && refCount() > 1) ||
+ fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
+ (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
newCapacity > getCapacity()
) {
// check growCapacity for default value and use of the stack buffer
// save old values
UChar oldStackBuffer[US_STACKBUF_SIZE];
UChar *oldArray;
- uint8_t flags = fFlags;
+ int32_t oldLength = length();
+ int16_t flags = fUnion.fFields.fLengthAndFlags;
if(flags&kUsingStackBuffer) {
U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
// copy the stack buffer contents because it will be overwritten with
// fUnion.fFields values
- us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
+ us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
oldArray = oldStackBuffer;
} else {
- oldArray = 0; // no need to copy from stack buffer to itself
+ oldArray = NULL; // no need to copy from the stack buffer to itself
}
} else {
oldArray = fUnion.fFields.fArray;
if(allocate(growCapacity) ||
(newCapacity < growCapacity && allocate(newCapacity))
) {
- if(doCopyArray && oldArray != 0) {
+ if(doCopyArray) {
// copy the contents
// do not copy more than what fits - it may be smaller than before
- int32_t minLength = length();
+ int32_t minLength = oldLength;
newCapacity = getCapacity();
if(newCapacity < minLength) {
minLength = newCapacity;
- setLength(minLength);
}
- us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
+ if(oldArray != NULL) {
+ us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
+ }
+ setLength(minLength);
} else {
- fShortLength = 0;
+ setZeroLength();
}
// release the old array
if(!(flags&kUsingStackBuffer)) {
fUnion.fFields.fArray = oldArray;
}
- fFlags = flags;
+ fUnion.fFields.fLengthAndFlags = flags;
setToBogus();
return FALSE;
}
UBool
UnicodeStringAppendable::appendCodeUnit(UChar c) {
- return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
+ return str.doAppend(&c, 0, 1).isWritable();
}
UBool
int32_t cLength = 0;
UBool isError = FALSE;
U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
- return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
+ return !isError && str.doAppend(buffer, 0, cLength).isWritable();
}
UBool
UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
- return str.doReplace(str.length(), 0, s, 0, length).isWritable();
+ return str.doAppend(s, 0, length).isWritable();
}
UBool
return NULL;
}
int32_t oldLength = str.length();
- if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
+ if(minCapacity <= (kMaxCapacity - oldLength) &&
+ desiredCapacityHint <= (kMaxCapacity - oldLength) &&
+ str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
*resultCapacity = str.getCapacity() - oldLength;
return str.getArrayStart() + oldLength;
}