X-Git-Url: https://git.saurik.com/apple/javascriptcore.git/blobdiff_plain/ba379fdc102753d6be2c4d937058fe40257329fe..1981f5dfe8d77d97469d20652f712a09400c48ed:/runtime/UString.cpp?ds=sidebyside diff --git a/runtime/UString.cpp b/runtime/UString.cpp index e1e51df..5b1e9a0 100644 --- a/runtime/UString.cpp +++ b/runtime/UString.cpp @@ -25,25 +25,22 @@ #include "UString.h" #include "JSGlobalObjectFunctions.h" -#include "Collector.h" -#include "dtoa.h" +#include "Heap.h" #include "Identifier.h" #include "Operations.h" #include -#include #include -#include +#include #include #include #include #include #include +#include #include +#include #include -#if HAVE(STRING_H) -#include -#endif #if HAVE(STRINGS_H) #include #endif @@ -52,781 +49,63 @@ using namespace WTF; using namespace WTF::Unicode; using namespace std; -// This can be tuned differently per platform by putting platform #ifs right here. -// If you don't define this macro at all, then copyChars will just call directly -// to memcpy. -#define USTRING_COPY_CHARS_INLINE_CUTOFF 20 - namespace JSC { - -extern const double NaN; -extern const double Inf; - -// This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings. -static const int minLengthToShare = 10; - -static inline size_t overflowIndicator() { return std::numeric_limits::max(); } -static inline size_t maxUChars() { return std::numeric_limits::max() / sizeof(UChar); } -static inline UChar* allocChars(size_t length) -{ - ASSERT(length); - if (length > maxUChars()) - return 0; - return static_cast(tryFastMalloc(sizeof(UChar) * length)); -} +COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small); -static inline UChar* reallocChars(UChar* buffer, size_t length) +// Construct a string with UTF-16 data. +UString::UString(const UChar* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) { - ASSERT(length); - if (length > maxUChars()) - return 0; - return static_cast(tryFastRealloc(buffer, sizeof(UChar) * length)); } -static inline void copyChars(UChar* destination, const UChar* source, unsigned numCharacters) +// Construct a string with UTF-16 data, from a null-terminated source. +UString::UString(const UChar* characters) { -#ifdef USTRING_COPY_CHARS_INLINE_CUTOFF - if (numCharacters <= USTRING_COPY_CHARS_INLINE_CUTOFF) { - for (unsigned i = 0; i < numCharacters; ++i) - destination[i] = source[i]; + if (!characters) return; - } -#endif - memcpy(destination, source, numCharacters * sizeof(UChar)); -} -COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes); + int length = 0; + while (characters[length] != UChar(0)) + ++length; -CString::CString(const char* c) - : m_length(strlen(c)) - , m_data(new char[m_length + 1]) -{ - memcpy(m_data, c, m_length + 1); + m_impl = StringImpl::create(characters, length); } -CString::CString(const char* c, size_t length) - : m_length(length) - , m_data(new char[length + 1]) +// Construct a string with latin1 data. +UString::UString(const LChar* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) { - memcpy(m_data, c, m_length); - m_data[m_length] = 0; } -CString::CString(const CString& b) +UString::UString(const char* characters, unsigned length) + : m_impl(characters ? StringImpl::create(reinterpret_cast(characters), length) : 0) { - m_length = b.m_length; - if (b.m_data) { - m_data = new char[m_length + 1]; - memcpy(m_data, b.m_data, m_length + 1); - } else - m_data = 0; } -CString::~CString() +// Construct a string with latin1 data, from a null-terminated source. +UString::UString(const LChar* characters) + : m_impl(characters ? StringImpl::create(characters) : 0) { - delete [] m_data; } -CString CString::adopt(char* c, size_t length) +UString::UString(const char* characters) + : m_impl(characters ? StringImpl::create(reinterpret_cast(characters)) : 0) { - CString s; - s.m_data = c; - s.m_length = length; - return s; -} - -CString& CString::append(const CString& t) -{ - char* n; - n = new char[m_length + t.m_length + 1]; - if (m_length) - memcpy(n, m_data, m_length); - if (t.m_length) - memcpy(n + m_length, t.m_data, t.m_length); - m_length += t.m_length; - n[m_length] = 0; - - delete [] m_data; - m_data = n; - - return *this; -} - -CString& CString::operator=(const char* c) -{ - if (m_data) - delete [] m_data; - m_length = strlen(c); - m_data = new char[m_length + 1]; - memcpy(m_data, c, m_length + 1); - - return *this; -} - -CString& CString::operator=(const CString& str) -{ - if (this == &str) - return *this; - - if (m_data) - delete [] m_data; - m_length = str.m_length; - if (str.m_data) { - m_data = new char[m_length + 1]; - memcpy(m_data, str.m_data, m_length + 1); - } else - m_data = 0; - - return *this; -} - -bool operator==(const CString& c1, const CString& c2) -{ - size_t len = c1.size(); - return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0); -} - -// These static strings are immutable, except for rc, whose initial value is chosen to -// reduce the possibility of it becoming zero due to ref/deref not being thread-safe. -static UChar sharedEmptyChar; -UString::BaseString* UString::Rep::nullBaseString; -UString::BaseString* UString::Rep::emptyBaseString; -UString* UString::nullUString; - -static void initializeStaticBaseString(UString::BaseString& base) -{ - base.rc = INT_MAX / 2; - base.m_identifierTableAndFlags.setFlag(UString::Rep::StaticFlag); - base.checkConsistency(); -} - -void initializeUString() -{ - UString::Rep::nullBaseString = new UString::BaseString(0, 0); - initializeStaticBaseString(*UString::Rep::nullBaseString); - - UString::Rep::emptyBaseString = new UString::BaseString(&sharedEmptyChar, 0); - initializeStaticBaseString(*UString::Rep::emptyBaseString); - - UString::nullUString = new UString; -} - -static char* statBuffer = 0; // Only used for debugging via UString::ascii(). - -PassRefPtr UString::Rep::createCopying(const UChar* d, int l) -{ - UChar* copyD = static_cast(fastMalloc(l * sizeof(UChar))); - copyChars(copyD, d, l); - return create(copyD, l); -} - -PassRefPtr UString::Rep::createFromUTF8(const char* string) -{ - if (!string) - return &UString::Rep::null(); - - size_t length = strlen(string); - Vector buffer(length); - UChar* p = buffer.data(); - if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length)) - return &UString::Rep::null(); - - return UString::Rep::createCopying(buffer.data(), p - buffer.data()); -} - -PassRefPtr UString::Rep::create(UChar* string, int length, PassRefPtr sharedBuffer) -{ - PassRefPtr rep = create(string, length); - rep->baseString()->setSharedBuffer(sharedBuffer); - rep->checkConsistency(); - return rep; -} - -UString::SharedUChar* UString::Rep::sharedBuffer() -{ - UString::BaseString* base = baseString(); - if (len < minLengthToShare) - return 0; - - return base->sharedBuffer(); -} - -void UString::Rep::destroy() -{ - checkConsistency(); - - // Static null and empty strings can never be destroyed, but we cannot rely on - // reference counting, because ref/deref are not thread-safe. - if (!isStatic()) { - if (identifierTable()) - Identifier::remove(this); - - UString::BaseString* base = baseString(); - if (base == this) { - if (m_sharedBuffer) - m_sharedBuffer->deref(); - else - fastFree(base->buf); - } else - base->deref(); - - delete this; - } -} - -// Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's -// or anything like that. -const unsigned PHI = 0x9e3779b9U; - -// Paul Hsieh's SuperFastHash -// http://www.azillionmonkeys.com/qed/hash.html -unsigned UString::Rep::computeHash(const UChar* s, int len) -{ - unsigned l = len; - uint32_t hash = PHI; - uint32_t tmp; - - int rem = l & 1; - l >>= 1; - - // Main loop - for (; l > 0; l--) { - hash += s[0]; - tmp = (s[1] << 11) ^ hash; - hash = (hash << 16) ^ tmp; - s += 2; - hash += hash >> 11; - } - - // Handle end case - if (rem) { - hash += s[0]; - hash ^= hash << 11; - hash += hash >> 17; - } - - // Force "avalanching" of final 127 bits - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 2; - hash += hash >> 15; - hash ^= hash << 10; - - // this avoids ever returning a hash code of 0, since that is used to - // signal "hash not computed yet", using a value that is likely to be - // effectively the same as 0 when the low bits are masked - if (hash == 0) - hash = 0x80000000; - - return hash; -} - -// Paul Hsieh's SuperFastHash -// http://www.azillionmonkeys.com/qed/hash.html -unsigned UString::Rep::computeHash(const char* s, int l) -{ - // This hash is designed to work on 16-bit chunks at a time. But since the normal case - // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they - // were 16-bit chunks, which should give matching results - - uint32_t hash = PHI; - uint32_t tmp; - - size_t rem = l & 1; - l >>= 1; - - // Main loop - for (; l > 0; l--) { - hash += static_cast(s[0]); - tmp = (static_cast(s[1]) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - s += 2; - hash += hash >> 11; - } - - // Handle end case - if (rem) { - hash += static_cast(s[0]); - hash ^= hash << 11; - hash += hash >> 17; - } - - // Force "avalanching" of final 127 bits - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 2; - hash += hash >> 15; - hash ^= hash << 10; - - // this avoids ever returning a hash code of 0, since that is used to - // signal "hash not computed yet", using a value that is likely to be - // effectively the same as 0 when the low bits are masked - if (hash == 0) - hash = 0x80000000; - - return hash; -} - -#ifndef NDEBUG -void UString::Rep::checkConsistency() const -{ - const UString::BaseString* base = baseString(); - - // There is no recursion for base strings. - ASSERT(base == base->baseString()); - - if (isStatic()) { - // There are only two static strings: null and empty. - ASSERT(!len); - - // Static strings cannot get in identifier tables, because they are globally shared. - ASSERT(!identifierTable()); - } - - // The string fits in buffer. - ASSERT(base->usedPreCapacity <= base->preCapacity); - ASSERT(base->usedCapacity <= base->capacity); - ASSERT(-offset <= base->usedPreCapacity); - ASSERT(offset + len <= base->usedCapacity); -} -#endif - -UString::SharedUChar* UString::BaseString::sharedBuffer() -{ - if (!m_sharedBuffer) - setSharedBuffer(SharedUChar::create(new OwnFastMallocPtr(buf))); - return m_sharedBuffer; -} - -void UString::BaseString::setSharedBuffer(PassRefPtr sharedBuffer) -{ - // The manual steps below are because m_sharedBuffer can't be a RefPtr. m_sharedBuffer - // is in a union with another variable to avoid making BaseString any larger. - if (m_sharedBuffer) - m_sharedBuffer->deref(); - m_sharedBuffer = sharedBuffer.releaseRef(); -} - -bool UString::BaseString::slowIsBufferReadOnly() -{ - // The buffer may not be modified as soon as the underlying data has been shared with another class. - if (m_sharedBuffer->isShared()) - return true; - - // At this point, we know it that the underlying buffer isn't shared outside of this base class, - // so get rid of m_sharedBuffer. - OwnPtr > mallocPtr(m_sharedBuffer->release()); - UChar* unsharedBuf = const_cast(mallocPtr->release()); - setSharedBuffer(0); - preCapacity += (buf - unsharedBuf); - buf = unsharedBuf; - return false; -} - -// Put these early so they can be inlined. -static inline size_t expandedSize(size_t capacitySize, size_t precapacitySize) -{ - // Combine capacitySize & precapacitySize to produce a single size to allocate, - // check that doing so does not result in overflow. - size_t size = capacitySize + precapacitySize; - if (size < capacitySize) - return overflowIndicator(); - - // Small Strings (up to 4 pages): - // Expand the allocation size to 112.5% of the amount requested. This is largely sicking - // to our previous policy, however 112.5% is cheaper to calculate. - if (size < 0x4000) { - size_t expandedSize = ((size + (size >> 3)) | 15) + 1; - // Given the limited range within which we calculate the expansion in this - // fashion the above calculation should never overflow. - ASSERT(expandedSize >= size); - ASSERT(expandedSize < maxUChars()); - return expandedSize; - } - - // Medium Strings (up to 128 pages): - // For pages covering multiple pages over-allocation is less of a concern - any unused - // space will not be paged in if it is not used, so this is purely a VM overhead. For - // these strings allocate 2x the requested size. - if (size < 0x80000) { - size_t expandedSize = ((size + size) | 0xfff) + 1; - // Given the limited range within which we calculate the expansion in this - // fashion the above calculation should never overflow. - ASSERT(expandedSize >= size); - ASSERT(expandedSize < maxUChars()); - return expandedSize; - } - - // Large Strings (to infinity and beyond!): - // Revert to our 112.5% policy - probably best to limit the amount of unused VM we allow - // any individual string be responsible for. - size_t expandedSize = ((size + (size >> 3)) | 0xfff) + 1; - - // Check for overflow - any result that is at least as large as requested (but - // still below the limit) is okay. - if ((expandedSize >= size) && (expandedSize < maxUChars())) - return expandedSize; - return overflowIndicator(); } -static inline bool expandCapacity(UString::Rep* rep, int requiredLength) +UString UString::number(int i) { - rep->checkConsistency(); - ASSERT(!rep->baseString()->isBufferReadOnly()); + LChar buf[1 + sizeof(i) * 3]; + LChar* end = buf + WTF_ARRAY_LENGTH(buf); + LChar* p = end; - UString::BaseString* base = rep->baseString(); - - if (requiredLength > base->capacity) { - size_t newCapacity = expandedSize(requiredLength, base->preCapacity); - UChar* oldBuf = base->buf; - base->buf = reallocChars(base->buf, newCapacity); - if (!base->buf) { - base->buf = oldBuf; - return false; - } - base->capacity = newCapacity - base->preCapacity; - } - if (requiredLength > base->usedCapacity) - base->usedCapacity = requiredLength; - - rep->checkConsistency(); - return true; -} - -bool UString::Rep::reserveCapacity(int capacity) -{ - // If this is an empty string there is no point 'growing' it - just allocate a new one. - // If the BaseString is shared with another string that is using more capacity than this - // string is, then growing the buffer won't help. - // If the BaseString's buffer is readonly, then it isn't allowed to grow. - UString::BaseString* base = baseString(); - if (!base->buf || !base->capacity || (offset + len) != base->usedCapacity || base->isBufferReadOnly()) - return false; - - // If there is already sufficient capacity, no need to grow! - if (capacity <= base->capacity) - return true; - - checkConsistency(); - - size_t newCapacity = expandedSize(capacity, base->preCapacity); - UChar* oldBuf = base->buf; - base->buf = reallocChars(base->buf, newCapacity); - if (!base->buf) { - base->buf = oldBuf; - return false; - } - base->capacity = newCapacity - base->preCapacity; - - checkConsistency(); - return true; -} - -void UString::expandCapacity(int requiredLength) -{ - if (!JSC::expandCapacity(m_rep.get(), requiredLength)) - makeNull(); -} - -void UString::expandPreCapacity(int requiredPreCap) -{ - m_rep->checkConsistency(); - ASSERT(!m_rep->baseString()->isBufferReadOnly()); - - BaseString* base = m_rep->baseString(); - - if (requiredPreCap > base->preCapacity) { - size_t newCapacity = expandedSize(requiredPreCap, base->capacity); - int delta = newCapacity - base->capacity - base->preCapacity; - - UChar* newBuf = allocChars(newCapacity); - if (!newBuf) { - makeNull(); - return; - } - copyChars(newBuf + delta, base->buf, base->capacity + base->preCapacity); - fastFree(base->buf); - base->buf = newBuf; - - base->preCapacity = newCapacity - base->capacity; - } - if (requiredPreCap > base->usedPreCapacity) - base->usedPreCapacity = requiredPreCap; - - m_rep->checkConsistency(); -} - -static PassRefPtr createRep(const char* c) -{ - if (!c) - return &UString::Rep::null(); - - if (!c[0]) - return &UString::Rep::empty(); - - size_t length = strlen(c); - UChar* d = allocChars(length); - if (!d) - return &UString::Rep::null(); - else { - for (size_t i = 0; i < length; i++) - d[i] = static_cast(c[i]); // use unsigned char to zero-extend instead of sign-extend - return UString::Rep::create(d, static_cast(length)); - } - -} - -UString::UString(const char* c) - : m_rep(createRep(c)) -{ -} - -UString::UString(const UChar* c, int length) -{ - if (length == 0) - m_rep = &Rep::empty(); - else - m_rep = Rep::createCopying(c, length); -} - -UString::UString(UChar* c, int length, bool copy) -{ - if (length == 0) - m_rep = &Rep::empty(); - else if (copy) - m_rep = Rep::createCopying(c, length); - else - m_rep = Rep::create(c, length); -} - -UString::UString(const Vector& buffer) -{ - if (!buffer.size()) - m_rep = &Rep::empty(); - else - m_rep = Rep::createCopying(buffer.data(), buffer.size()); -} - -static ALWAYS_INLINE int newCapacityWithOverflowCheck(const int currentCapacity, const int extendLength, const bool plusOne = false) -{ - ASSERT_WITH_MESSAGE(extendLength >= 0, "extendedLength = %d", extendLength); - - const int plusLength = plusOne ? 1 : 0; - if (currentCapacity > std::numeric_limits::max() - extendLength - plusLength) - CRASH(); - - return currentCapacity + extendLength + plusLength; -} - -static ALWAYS_INLINE PassRefPtr concatenate(PassRefPtr r, const UChar* tData, int tSize) -{ - RefPtr rep = r; - - rep->checkConsistency(); - - int thisSize = rep->size(); - int thisOffset = rep->offset; - int length = thisSize + tSize; - UString::BaseString* base = rep->baseString(); - - // possible cases: - if (tSize == 0) { - // t is empty - } else if (thisSize == 0) { - // this is empty - rep = UString::Rep::createCopying(tData, tSize); - } else if (rep == base && !base->isShared()) { - // this is direct and has refcount of 1 (so we can just alter it directly) - if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length))) - rep = &UString::Rep::null(); - if (rep->data()) { - copyChars(rep->data() + thisSize, tData, tSize); - rep->len = length; - rep->_hash = 0; - } - } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) { - // this reaches the end of the buffer - extend it if it's long enough to append to - if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length))) - rep = &UString::Rep::null(); - if (rep->data()) { - copyChars(rep->data() + thisSize, tData, tSize); - rep = UString::Rep::create(rep, 0, length); - } - } else { - // This is shared in some way that prevents us from modifying base, so we must make a whole new string. - size_t newCapacity = expandedSize(length, 0); - UChar* d = allocChars(newCapacity); - if (!d) - rep = &UString::Rep::null(); - else { - copyChars(d, rep->data(), thisSize); - copyChars(d + thisSize, tData, tSize); - rep = UString::Rep::create(d, length); - rep->baseString()->capacity = newCapacity; - } - } - - rep->checkConsistency(); - - return rep.release(); -} - -static ALWAYS_INLINE PassRefPtr concatenate(PassRefPtr r, const char* t) -{ - RefPtr rep = r; - - rep->checkConsistency(); - - int thisSize = rep->size(); - int thisOffset = rep->offset; - int tSize = static_cast(strlen(t)); - int length = thisSize + tSize; - UString::BaseString* base = rep->baseString(); - - // possible cases: - if (thisSize == 0) { - // this is empty - rep = createRep(t); - } else if (tSize == 0) { - // t is empty, we'll just return *this below. - } else if (rep == base && !base->isShared()) { - // this is direct and has refcount of 1 (so we can just alter it directly) - expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)); - UChar* d = rep->data(); - if (d) { - for (int i = 0; i < tSize; ++i) - d[thisSize + i] = static_cast(t[i]); // use unsigned char to zero-extend instead of sign-extend - rep->len = length; - rep->_hash = 0; - } - } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) { - // this string reaches the end of the buffer - extend it - expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)); - UChar* d = rep->data(); - if (d) { - for (int i = 0; i < tSize; ++i) - d[thisSize + i] = static_cast(t[i]); // use unsigned char to zero-extend instead of sign-extend - rep = UString::Rep::create(rep, 0, length); - } - } else { - // This is shared in some way that prevents us from modifying base, so we must make a whole new string. - size_t newCapacity = expandedSize(length, 0); - UChar* d = allocChars(newCapacity); - if (!d) - rep = &UString::Rep::null(); - else { - copyChars(d, rep->data(), thisSize); - for (int i = 0; i < tSize; ++i) - d[thisSize + i] = static_cast(t[i]); // use unsigned char to zero-extend instead of sign-extend - rep = UString::Rep::create(d, length); - rep->baseString()->capacity = newCapacity; - } - } - - rep->checkConsistency(); - - return rep.release(); -} - -PassRefPtr concatenate(UString::Rep* a, UString::Rep* b) -{ - a->checkConsistency(); - b->checkConsistency(); - - int aSize = a->size(); - int bSize = b->size(); - int aOffset = a->offset; - - // possible cases: - - UString::BaseString* aBase = a->baseString(); - if (bSize == 1 && aOffset + aSize == aBase->usedCapacity && aOffset + aSize < aBase->capacity && !aBase->isBufferReadOnly()) { - // b is a single character (common fast case) - ++aBase->usedCapacity; - a->data()[aSize] = b->data()[0]; - return UString::Rep::create(a, 0, aSize + 1); - } - - // a is empty - if (aSize == 0) - return b; - // b is empty - if (bSize == 0) - return a; - - int bOffset = b->offset; - int length = aSize + bSize; - - UString::BaseString* bBase = b->baseString(); - if (aOffset + aSize == aBase->usedCapacity && aSize >= minShareSize && 4 * aSize >= bSize - && (-bOffset != bBase->usedPreCapacity || aSize >= bSize) && !aBase->isBufferReadOnly()) { - // - a reaches the end of its buffer so it qualifies for shared append - // - also, it's at least a quarter the length of b - appending to a much shorter - // string does more harm than good - // - however, if b qualifies for prepend and is longer than a, we'd rather prepend - - UString x(a); - x.expandCapacity(newCapacityWithOverflowCheck(aOffset, length)); - if (!a->data() || !x.data()) - return 0; - copyChars(a->data() + aSize, b->data(), bSize); - PassRefPtr result = UString::Rep::create(a, 0, length); - - a->checkConsistency(); - b->checkConsistency(); - result->checkConsistency(); - - return result; - } - - if (-bOffset == bBase->usedPreCapacity && bSize >= minShareSize && 4 * bSize >= aSize && !bBase->isBufferReadOnly()) { - // - b reaches the beginning of its buffer so it qualifies for shared prepend - // - also, it's at least a quarter the length of a - prepending to a much shorter - // string does more harm than good - UString y(b); - y.expandPreCapacity(-bOffset + aSize); - if (!b->data() || !y.data()) - return 0; - copyChars(b->data() - aSize, a->data(), aSize); - PassRefPtr result = UString::Rep::create(b, -aSize, length); - - a->checkConsistency(); - b->checkConsistency(); - result->checkConsistency(); - - return result; - } - - // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string - size_t newCapacity = expandedSize(length, 0); - UChar* d = allocChars(newCapacity); - if (!d) - return 0; - copyChars(d, a->data(), aSize); - copyChars(d + aSize, b->data(), bSize); - PassRefPtr result = UString::Rep::create(d, length); - result->baseString()->capacity = newCapacity; - - a->checkConsistency(); - b->checkConsistency(); - result->checkConsistency(); - - return result; -} - -PassRefPtr concatenate(UString::Rep* rep, int i) -{ - UChar buf[1 + sizeof(i) * 3]; - UChar* end = buf + sizeof(buf) / sizeof(UChar); - UChar* p = end; - if (i == 0) *--p = '0'; else if (i == INT_MIN) { char minBuf[1 + sizeof(i) * 3]; - sprintf(minBuf, "%d", INT_MIN); - return concatenate(rep, minBuf); + snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN); + return UString(minBuf); } else { bool negative = false; if (i < 0) { @@ -841,89 +120,24 @@ PassRefPtr concatenate(UString::Rep* rep, int i) *--p = '-'; } - return concatenate(rep, p, static_cast(end - p)); - + return UString(p, static_cast(end - p)); } -PassRefPtr concatenate(UString::Rep* rep, double d) +UString UString::number(long long i) { - // avoid ever printing -NaN, in JS conceptually there is only one NaN value - if (isnan(d)) - return concatenate(rep, "NaN"); - - if (d == 0.0) // stringify -0 as 0 - d = 0.0; - - char buf[80]; - int decimalPoint; - int sign; - - char result[80]; - WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL); - int length = static_cast(strlen(result)); - - int i = 0; - if (sign) - buf[i++] = '-'; - - if (decimalPoint <= 0 && decimalPoint > -6) { - buf[i++] = '0'; - buf[i++] = '.'; - for (int j = decimalPoint; j < 0; j++) - buf[i++] = '0'; - strcpy(buf + i, result); - } else if (decimalPoint <= 21 && decimalPoint > 0) { - if (length <= decimalPoint) { - strcpy(buf + i, result); - i += length; - for (int j = 0; j < decimalPoint - length; j++) - buf[i++] = '0'; - buf[i] = '\0'; - } else { - strncpy(buf + i, result, decimalPoint); - i += decimalPoint; - buf[i++] = '.'; - strcpy(buf + i, result + decimalPoint); - } - } else if (result[0] < '0' || result[0] > '9') - strcpy(buf + i, result); - else { - buf[i++] = result[0]; - if (length > 1) { - buf[i++] = '.'; - strcpy(buf + i, result + 1); - i += length - 1; - } - - buf[i++] = 'e'; - buf[i++] = (decimalPoint >= 0) ? '+' : '-'; - // decimalPoint can't be more than 3 digits decimal given the - // nature of float representation - int exponential = decimalPoint - 1; - if (exponential < 0) - exponential = -exponential; - if (exponential >= 100) - buf[i++] = static_cast('0' + exponential / 100); - if (exponential >= 10) - buf[i++] = static_cast('0' + (exponential % 100) / 10); - buf[i++] = static_cast('0' + exponential % 10); - buf[i++] = '\0'; - } - - return concatenate(rep, buf); -} + LChar buf[1 + sizeof(i) * 3]; + LChar* end = buf + WTF_ARRAY_LENGTH(buf); + LChar* p = end; -UString UString::from(int i) -{ - UChar buf[1 + sizeof(i) * 3]; - UChar* end = buf + sizeof(buf) / sizeof(UChar); - UChar* p = end; - if (i == 0) *--p = '0'; - else if (i == INT_MIN) { + else if (i == std::numeric_limits::min()) { char minBuf[1 + sizeof(i) * 3]; - snprintf(minBuf, 1 + sizeof(i) * 3, "%d", INT_MIN); +#if OS(WINDOWS) + snprintf(minBuf, sizeof(minBuf), "%I64d", std::numeric_limits::min()); +#else + snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits::min()); +#endif return UString(minBuf); } else { bool negative = false; @@ -939,15 +153,15 @@ UString UString::from(int i) *--p = '-'; } - return UString(p, static_cast(end - p)); + return UString(p, static_cast(end - p)); } -UString UString::from(unsigned int u) +UString UString::number(unsigned u) { - UChar buf[sizeof(u) * 3]; - UChar* end = buf + sizeof(buf) / sizeof(UChar); - UChar* p = end; - + LChar buf[sizeof(u) * 3]; + LChar* end = buf + WTF_ARRAY_LENGTH(buf); + LChar* p = end; + if (u == 0) *--p = '0'; else { @@ -956,21 +170,21 @@ UString UString::from(unsigned int u) u /= 10; } } - - return UString(p, static_cast(end - p)); + + return UString(p, static_cast(end - p)); } -UString UString::from(long l) +UString UString::number(long l) { - UChar buf[1 + sizeof(l) * 3]; - UChar* end = buf + sizeof(buf) / sizeof(UChar); - UChar* p = end; + LChar buf[1 + sizeof(l) * 3]; + LChar* end = buf + WTF_ARRAY_LENGTH(buf); + LChar* p = end; if (l == 0) *--p = '0'; else if (l == LONG_MIN) { char minBuf[1 + sizeof(l) * 3]; - snprintf(minBuf, 1 + sizeof(l) * 3, "%ld", LONG_MIN); + snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN); return UString(minBuf); } else { bool negative = false; @@ -986,766 +200,276 @@ UString UString::from(long l) *--p = '-'; } - return UString(p, static_cast(end - p)); + return UString(p, end - p); } -UString UString::from(double d) +UString UString::number(double d) { - // avoid ever printing -NaN, in JS conceptually there is only one NaN value - if (isnan(d)) - return "NaN"; - - char buf[80]; - int decimalPoint; - int sign; - - char result[80]; - WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL); - int length = static_cast(strlen(result)); - - int i = 0; - if (sign) - buf[i++] = '-'; - - if (decimalPoint <= 0 && decimalPoint > -6) { - buf[i++] = '0'; - buf[i++] = '.'; - for (int j = decimalPoint; j < 0; j++) - buf[i++] = '0'; - strlcpy(buf + i, result, sizeof(buf) - i); - } else if (decimalPoint <= 21 && decimalPoint > 0) { - if (length <= decimalPoint) { - strlcpy(buf + i, result, sizeof(buf) - i); - i += length; - for (int j = 0; j < decimalPoint - length; j++) - buf[i++] = '0'; - buf[i] = '\0'; - } else { - int len = (decimalPoint <= static_cast(sizeof(buf)) - i ? decimalPoint : sizeof(buf) - i); - strncpy(buf + i, result, len); - i += len; - buf[i++] = '.'; - strlcpy(buf + i, result + decimalPoint, sizeof(buf) - i); - } - } else if (result[0] < '0' || result[0] > '9') - strlcpy(buf + i, result, sizeof(buf) - i); - else { - buf[i++] = result[0]; - if (length > 1) { - buf[i++] = '.'; - strlcpy(buf + i, result + 1, sizeof(buf) - i); - i += length - 1; - } - - buf[i++] = 'e'; - buf[i++] = (decimalPoint >= 0) ? '+' : '-'; - // decimalPoint can't be more than 3 digits decimal given the - // nature of float representation - int exponential = decimalPoint - 1; - if (exponential < 0) - exponential = -exponential; - if (exponential >= 100) - buf[i++] = static_cast('0' + exponential / 100); - if (exponential >= 10) - buf[i++] = static_cast('0' + (exponential % 100) / 10); - buf[i++] = static_cast('0' + exponential % 10); - buf[i++] = '\0'; - ASSERT(i <= static_cast(sizeof(buf))); - } - - return UString(buf); + NumberToStringBuffer buffer; + return UString(numberToString(d, buffer)); } -UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const +UString UString::substringSharingImpl(unsigned offset, unsigned length) const { - m_rep->checkConsistency(); - - if (rangeCount == 1 && separatorCount == 0) { - int thisSize = size(); - int position = substringRanges[0].position; - int length = substringRanges[0].length; - if (position <= 0 && length >= thisSize) - return *this; - return UString::Rep::create(m_rep, max(0, position), min(thisSize, length)); - } - - int totalLength = 0; - for (int i = 0; i < rangeCount; i++) - totalLength += substringRanges[i].length; - for (int i = 0; i < separatorCount; i++) - totalLength += separators[i].size(); - - if (totalLength == 0) - return ""; - - UChar* buffer = allocChars(totalLength); - if (!buffer) - return null(); + // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar). - int maxCount = max(rangeCount, separatorCount); - int bufferPos = 0; - for (int i = 0; i < maxCount; i++) { - if (i < rangeCount) { - copyChars(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length); - bufferPos += substringRanges[i].length; - } - if (i < separatorCount) { - copyChars(buffer + bufferPos, separators[i].data(), separators[i].size()); - bufferPos += separators[i].size(); - } - } - - return UString::Rep::create(buffer, totalLength); -} - -UString UString::replaceRange(int rangeStart, int rangeLength, const UString& replacement) const -{ - m_rep->checkConsistency(); + unsigned stringLength = this->length(); + offset = min(offset, stringLength); + length = min(length, stringLength - offset); - int replacementLength = replacement.size(); - int totalLength = size() - rangeLength + replacementLength; - if (totalLength == 0) - return ""; - - UChar* buffer = allocChars(totalLength); - if (!buffer) - return null(); - - copyChars(buffer, data(), rangeStart); - copyChars(buffer + rangeStart, replacement.data(), replacementLength); - int rangeEnd = rangeStart + rangeLength; - copyChars(buffer + rangeStart + replacementLength, data() + rangeEnd, size() - rangeEnd); - - return UString::Rep::create(buffer, totalLength); -} - - -UString& UString::append(const UString &t) -{ - m_rep->checkConsistency(); - t.rep()->checkConsistency(); - - int thisSize = size(); - int thisOffset = m_rep->offset; - int tSize = t.size(); - int length = thisSize + tSize; - BaseString* base = m_rep->baseString(); - - // possible cases: - if (thisSize == 0) { - // this is empty - *this = t; - } else if (tSize == 0) { - // t is empty - } else if (m_rep == base && !base->isShared()) { - // this is direct and has refcount of 1 (so we can just alter it directly) - expandCapacity(newCapacityWithOverflowCheck(thisOffset, length)); - if (data()) { - copyChars(m_rep->data() + thisSize, t.data(), tSize); - m_rep->len = length; - m_rep->_hash = 0; - } - } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) { - // this reaches the end of the buffer - extend it if it's long enough to append to - expandCapacity(newCapacityWithOverflowCheck(thisOffset, length)); - if (data()) { - copyChars(m_rep->data() + thisSize, t.data(), tSize); - m_rep = Rep::create(m_rep, 0, length); - } - } else { - // This is shared in some way that prevents us from modifying base, so we must make a whole new string. - size_t newCapacity = expandedSize(length, 0); - UChar* d = allocChars(newCapacity); - if (!d) - makeNull(); - else { - copyChars(d, data(), thisSize); - copyChars(d + thisSize, t.data(), tSize); - m_rep = Rep::create(d, length); - m_rep->baseString()->capacity = newCapacity; - } - } - - m_rep->checkConsistency(); - t.rep()->checkConsistency(); - - return *this; -} - -UString& UString::append(const UChar* tData, int tSize) -{ - m_rep = concatenate(m_rep.release(), tData, tSize); - return *this; -} - -UString& UString::appendNumeric(int i) -{ - m_rep = concatenate(rep(), i); - return *this; + if (!offset && length == stringLength) + return *this; + return UString(StringImpl::create(m_impl, offset, length)); } -UString& UString::appendNumeric(double d) +bool operator==(const UString& s1, const char *s2) { - m_rep = concatenate(rep(), d); - return *this; -} + if (s1.isEmpty()) + return !s2; -UString& UString::append(const char* t) -{ - m_rep = concatenate(m_rep.release(), t); - return *this; + return equal(s1.impl(), s2); } -UString& UString::append(UChar c) +// This method assumes that all simple checks have been performed by +// the inlined operator==() in the header file. +bool equalSlowCase(const UString& s1, const UString& s2) { - m_rep->checkConsistency(); - - int thisOffset = m_rep->offset; - int length = size(); - BaseString* base = m_rep->baseString(); + StringImpl* rep1 = s1.impl(); + StringImpl* rep2 = s2.impl(); + unsigned size1 = rep1->length(); - // possible cases: - if (length == 0) { - // this is empty - must make a new m_rep because we don't want to pollute the shared empty one - size_t newCapacity = expandedSize(1, 0); - UChar* d = allocChars(newCapacity); - if (!d) - makeNull(); - else { - d[0] = c; - m_rep = Rep::create(d, 1); - m_rep->baseString()->capacity = newCapacity; - } - } else if (m_rep == base && !base->isShared()) { - // this is direct and has refcount of 1 (so we can just alter it directly) - expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true)); - UChar* d = m_rep->data(); - if (d) { - d[length] = c; - m_rep->len = length + 1; - m_rep->_hash = 0; - } - } else if (thisOffset + length == base->usedCapacity && length >= minShareSize && !base->isBufferReadOnly()) { - // this reaches the end of the string - extend it and share - expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true)); - UChar* d = m_rep->data(); - if (d) { - d[length] = c; - m_rep = Rep::create(m_rep, 0, length + 1); + // At this point we know + // (a) that the strings are the same length and + // (b) that they are greater than zero length. + bool s1Is8Bit = rep1->is8Bit(); + bool s2Is8Bit = rep2->is8Bit(); + + if (s1Is8Bit) { + const LChar* d1 = rep1->characters8(); + if (s2Is8Bit) { + const LChar* d2 = rep2->characters8(); + + if (d1 == d2) // Check to see if the data pointers are the same. + return true; + + // Do quick checks for sizes 1 and 2. + switch (size1) { + case 1: + return d1[0] == d2[0]; + case 2: + return (d1[0] == d2[0]) & (d1[1] == d2[1]); + default: + return (!memcmp(d1, d2, size1 * sizeof(LChar))); + } } - } else { - // This is shared in some way that prevents us from modifying base, so we must make a whole new string. - size_t newCapacity = expandedSize(length + 1, 0); - UChar* d = allocChars(newCapacity); - if (!d) - makeNull(); - else { - copyChars(d, data(), length); - d[length] = c; - m_rep = Rep::create(d, length + 1); - m_rep->baseString()->capacity = newCapacity; + + const UChar* d2 = rep2->characters16(); + + for (unsigned i = 0; i < size1; i++) { + if (d1[i] != d2[i]) + return false; } + return true; } - - m_rep->checkConsistency(); - - return *this; -} - -bool UString::getCString(CStringBuffer& buffer) const -{ - int length = size(); - int neededSize = length + 1; - buffer.resize(neededSize); - char* buf = buffer.data(); - - UChar ored = 0; - const UChar* p = data(); - char* q = buf; - const UChar* limit = p + length; - while (p != limit) { - UChar c = p[0]; - ored |= c; - *q = static_cast(c); - ++p; - ++q; - } - *q = '\0'; - - return !(ored & 0xFF00); -} - -char* UString::ascii() const -{ - int length = size(); - int neededSize = length + 1; - delete[] statBuffer; - statBuffer = new char[neededSize]; - - const UChar* p = data(); - char* q = statBuffer; - const UChar* limit = p + length; - while (p != limit) { - *q = static_cast(p[0]); - ++p; - ++q; - } - *q = '\0'; - - return statBuffer; -} - -UString& UString::operator=(const char* c) -{ - if (!c) { - m_rep = &Rep::null(); - return *this; - } - - if (!c[0]) { - m_rep = &Rep::empty(); - return *this; - } - - int l = static_cast(strlen(c)); - UChar* d; - BaseString* base = m_rep->baseString(); - if (!base->isShared() && l <= base->capacity && m_rep == base && m_rep->offset == 0 && base->preCapacity == 0) { - d = base->buf; - m_rep->_hash = 0; - m_rep->len = l; - } else { - d = allocChars(l); - if (!d) { - makeNull(); - return *this; + + if (s2Is8Bit) { + const UChar* d1 = rep1->characters16(); + const LChar* d2 = rep2->characters8(); + + for (unsigned i = 0; i < size1; i++) { + if (d1[i] != d2[i]) + return false; } - m_rep = Rep::create(d, l); - } - for (int i = 0; i < l; i++) - d[i] = static_cast(c[i]); // use unsigned char to zero-extend instead of sign-extend - - return *this; -} - -bool UString::is8Bit() const -{ - const UChar* u = data(); - const UChar* limit = u + size(); - while (u < limit) { - if (u[0] > 0xFF) - return false; - ++u; - } - - return true; -} - -UChar UString::operator[](int pos) const -{ - if (pos >= size()) - return '\0'; - return data()[pos]; -} - -double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const -{ - if (size() == 1) { - UChar c = data()[0]; - if (isASCIIDigit(c)) - return c - '0'; - if (isASCIISpace(c) && tolerateEmptyString) - return 0; - return NaN; + return true; + } - - // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk - // after the number, so this is too strict a check. - CStringBuffer s; - if (!getCString(s)) - return NaN; - const char* c = s.data(); - - // skip leading white space - while (isASCIISpace(*c)) - c++; - - // empty string ? - if (*c == '\0') - return tolerateEmptyString ? 0.0 : NaN; - - double d; - - // hex number ? - if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) { - const char* firstDigitPosition = c + 2; - c++; - d = 0.0; - while (*(++c)) { - if (*c >= '0' && *c <= '9') - d = d * 16.0 + *c - '0'; - else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f')) - d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0; - else - break; - } - - if (d >= mantissaOverflowLowerBound) - d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16); - } else { - // regular number ? - char* end; - d = WTF::strtod(c, &end); - if ((d != 0.0 || end != c) && d != Inf && d != -Inf) { - c = end; - } else { - double sign = 1.0; - - if (*c == '+') - c++; - else if (*c == '-') { - sign = -1.0; - c++; - } - - // We used strtod() to do the conversion. However, strtod() handles - // infinite values slightly differently than JavaScript in that it - // converts the string "inf" with any capitalization to infinity, - // whereas the ECMA spec requires that it be converted to NaN. - - if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') { - d = sign * Inf; - c += 8; - } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i') - c = end; - else - return NaN; - } + + const UChar* d1 = rep1->characters16(); + const UChar* d2 = rep2->characters16(); + + if (d1 == d2) // Check to see if the data pointers are the same. + return true; + + // Do quick checks for sizes 1 and 2. + switch (size1) { + case 1: + return d1[0] == d2[0]; + case 2: + return (d1[0] == d2[0]) & (d1[1] == d2[1]); + default: + return (!memcmp(d1, d2, size1 * sizeof(UChar))); } - - // allow trailing white space - while (isASCIISpace(*c)) - c++; - // don't allow anything after - unless tolerant=true - if (!tolerateTrailingJunk && *c != '\0') - d = NaN; - - return d; -} - -double UString::toDouble(bool tolerateTrailingJunk) const -{ - return toDouble(tolerateTrailingJunk, true); } -double UString::toDouble() const -{ - return toDouble(false, true); -} - -uint32_t UString::toUInt32(bool* ok) const +bool operator<(const UString& s1, const UString& s2) { - double d = toDouble(); - bool b = true; - - if (d != static_cast(d)) { - b = false; - d = 0; + const unsigned l1 = s1.length(); + const unsigned l2 = s2.length(); + const unsigned lmin = l1 < l2 ? l1 : l2; + if (s1.is8Bit() && s2.is8Bit()) { + const LChar* c1 = s1.characters8(); + const LChar* c2 = s2.characters8(); + unsigned length = 0; + while (length < lmin && *c1 == *c2) { + c1++; + c2++; + length++; + } + if (length < lmin) + return (c1[0] < c2[0]); + + return (l1 < l2); + } + const UChar* c1 = s1.characters(); + const UChar* c2 = s2.characters(); + unsigned length = 0; + while (length < lmin && *c1 == *c2) { + c1++; + c2++; + length++; } + if (length < lmin) + return (c1[0] < c2[0]); - if (ok) - *ok = b; - - return static_cast(d); + return (l1 < l2); } -uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const +bool operator>(const UString& s1, const UString& s2) { - double d = toDouble(false, tolerateEmptyString); - bool b = true; - - if (d != static_cast(d)) { - b = false; - d = 0; + const unsigned l1 = s1.length(); + const unsigned l2 = s2.length(); + const unsigned lmin = l1 < l2 ? l1 : l2; + const UChar* c1 = s1.characters(); + const UChar* c2 = s2.characters(); + unsigned l = 0; + while (l < lmin && *c1 == *c2) { + c1++; + c2++; + l++; } + if (l < lmin) + return (c1[0] > c2[0]); - if (ok) - *ok = b; - - return static_cast(d); + return (l1 > l2); } -uint32_t UString::toStrictUInt32(bool* ok) const +CString UString::ascii() const { - if (ok) - *ok = false; - - // Empty string is not OK. - int len = m_rep->len; - if (len == 0) - return 0; - const UChar* p = m_rep->data(); - unsigned short c = p[0]; + // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are + // preserved, characters outside of this range are converted to '?'. - // If the first digit is 0, only 0 itself is OK. - if (c == '0') { - if (len == 1 && ok) - *ok = true; - return 0; - } - - // Convert to UInt32, checking for overflow. - uint32_t i = 0; - while (1) { - // Process character, turning it into a digit. - if (c < '0' || c > '9') - return 0; - const unsigned d = c - '0'; - - // Multiply by 10, checking for overflow out of 32 bits. - if (i > 0xFFFFFFFFU / 10) - return 0; - i *= 10; - - // Add in the digit, checking for overflow out of 32 bits. - const unsigned max = 0xFFFFFFFFU - d; - if (i > max) - return 0; - i += d; + unsigned length = this->length(); - // Handle end of string. - if (--len == 0) { - if (ok) - *ok = true; - return i; + if (this->is8Bit()) { + const LChar* characters = this->characters8(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + LChar ch = characters[i]; + characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; } - - // Get next character. - c = *(++p); + + return result; } -} -int UString::find(const UString& f, int pos) const -{ - int fsz = f.size(); + const UChar* characters = this->characters16(); - if (pos < 0) - pos = 0; + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); - if (fsz == 1) { - UChar ch = f[0]; - const UChar* end = data() + size(); - for (const UChar* c = data() + pos; c < end; c++) { - if (*c == ch) - return static_cast(c - data()); - } - return -1; + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch && (ch < 0x20 || ch >= 0x7f) ? '?' : ch; } - int sz = size(); - if (sz < fsz) - return -1; - if (fsz == 0) - return pos; - const UChar* end = data() + sz - fsz; - int fsizeminusone = (fsz - 1) * sizeof(UChar); - const UChar* fdata = f.data(); - unsigned short fchar = fdata[0]; - ++fdata; - for (const UChar* c = data() + pos; c <= end; c++) { - if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone)) - return static_cast(c - data()); - } - - return -1; + return result; } -int UString::find(UChar ch, int pos) const +CString UString::latin1() const { - if (pos < 0) - pos = 0; - const UChar* end = data() + size(); - for (const UChar* c = data() + pos; c < end; c++) { - if (*c == ch) - return static_cast(c - data()); - } - - return -1; -} + // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are + // preserved, characters outside of this range are converted to '?'. -int UString::rfind(const UString& f, int pos) const -{ - int sz = size(); - int fsz = f.size(); - if (sz < fsz) - return -1; - if (pos < 0) - pos = 0; - if (pos > sz - fsz) - pos = sz - fsz; - if (fsz == 0) - return pos; - int fsizeminusone = (fsz - 1) * sizeof(UChar); - const UChar* fdata = f.data(); - for (const UChar* c = data() + pos; c >= data(); c--) { - if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone)) - return static_cast(c - data()); - } + unsigned length = this->length(); + const UChar* characters = this->characters(); - return -1; -} + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); -int UString::rfind(UChar ch, int pos) const -{ - if (isEmpty()) - return -1; - if (pos + 1 >= size()) - pos = size() - 1; - for (const UChar* c = data() + pos; c >= data(); c--) { - if (*c == ch) - return static_cast(c - data()); + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch > 0xff ? '?' : ch; } - return -1; -} - -UString UString::substr(int pos, int len) const -{ - int s = size(); - - if (pos < 0) - pos = 0; - else if (pos >= s) - pos = s; - if (len < 0) - len = s; - if (pos + len >= s) - len = s - pos; - - if (pos == 0 && len == s) - return *this; - - return UString(Rep::create(m_rep, pos, len)); + return result; } -bool operator==(const UString& s1, const char *s2) +// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available. +static inline void putUTF8Triple(char*& buffer, UChar ch) { - if (s2 == 0) - return s1.isEmpty(); - - const UChar* u = s1.data(); - const UChar* uend = u + s1.size(); - while (u != uend && *s2) { - if (u[0] != (unsigned char)*s2) - return false; - s2++; - u++; - } - - return u == uend && *s2 == 0; + ASSERT(ch >= 0x0800); + *buffer++ = static_cast(((ch >> 12) & 0x0F) | 0xE0); + *buffer++ = static_cast(((ch >> 6) & 0x3F) | 0x80); + *buffer++ = static_cast((ch & 0x3F) | 0x80); } -bool operator<(const UString& s1, const UString& s2) +CString UString::utf8(bool strict) const { - const int l1 = s1.size(); - const int l2 = s2.size(); - const int lmin = l1 < l2 ? l1 : l2; - const UChar* c1 = s1.data(); - const UChar* c2 = s2.data(); - int l = 0; - while (l < lmin && *c1 == *c2) { - c1++; - c2++; - l++; - } - if (l < lmin) - return (c1[0] < c2[0]); + unsigned length = this->length(); - return (l1 < l2); -} + if (!length) + return CString("", 0); -bool operator>(const UString& s1, const UString& s2) -{ - const int l1 = s1.size(); - const int l2 = s2.size(); - const int lmin = l1 < l2 ? l1 : l2; - const UChar* c1 = s1.data(); - const UChar* c2 = s2.data(); - int l = 0; - while (l < lmin && *c1 == *c2) { - c1++; - c2++; - l++; - } - if (l < lmin) - return (c1[0] > c2[0]); + // Allocate a buffer big enough to hold all the characters + // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). + // Optimization ideas, if we find this function is hot: + // * We could speculatively create a CStringBuffer to contain 'length' + // characters, and resize if necessary (i.e. if the buffer contains + // non-ascii characters). (Alternatively, scan the buffer first for + // ascii characters, so we know this will be sufficient). + // * We could allocate a CStringBuffer with an appropriate size to + // have a good chance of being able to write the string into the + // buffer without reallocing (say, 1.5 x length). + if (length > numeric_limits::max() / 3) + return CString(); - return (l1 > l2); -} + Vector bufferVector(length * 3); + char* buffer = bufferVector.data(); -int compare(const UString& s1, const UString& s2) -{ - const int l1 = s1.size(); - const int l2 = s2.size(); - const int lmin = l1 < l2 ? l1 : l2; - const UChar* c1 = s1.data(); - const UChar* c2 = s2.data(); - int l = 0; - while (l < lmin && *c1 == *c2) { - c1++; - c2++; - l++; - } + if (is8Bit()) { + const LChar* characters = this->characters8(); - if (l < lmin) - return (c1[0] > c2[0]) ? 1 : -1; + ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size()); + ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion + } else { + const UChar* characters = this->characters16(); - if (l1 == l2) - return 0; + ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict); + ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion - return (l1 > l2) ? 1 : -1; -} + // Only produced from strict conversion. + if (result == sourceIllegal) + return CString(); -bool equal(const UString::Rep* r, const UString::Rep* b) -{ - int length = r->len; - if (length != b->len) - return false; - const UChar* d = r->data(); - const UChar* s = b->data(); - for (int i = 0; i != length; ++i) { - if (d[i] != s[i]) - return false; + // Check for an unconverted high surrogate. + if (result == sourceExhausted) { + if (strict) + return CString(); + // This should be one unpaired high surrogate. Treat it the same + // was as an unpaired high surrogate would have been handled in + // the middle of a string with non-strict conversion - which is + // to say, simply encode it to UTF-8. + ASSERT((characters + 1) == (this->characters() + length)); + ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); + // There should be room left, since one UChar hasn't been converted. + ASSERT((buffer + 3) <= (buffer + bufferVector.size())); + putUTF8Triple(buffer, *characters); + } } - return true; -} - -CString UString::UTF8String(bool strict) const -{ - // Allocate a buffer big enough to hold all the characters. - const int length = size(); - Vector buffer(length * 3); - - // Convert to runs of 8-bit characters. - char* p = buffer.data(); - const UChar* d = reinterpret_cast(&data()[0]); - ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict); - if (result != conversionOK) - return CString(); - - return CString(buffer.data(), p - buffer.data()); -} -// For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X. -NEVER_INLINE void UString::makeNull() -{ - m_rep = &Rep::null(); -} - -// For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X. -NEVER_INLINE UString::Rep* UString::nullRep() -{ - return &Rep::null(); + return CString(bufferVector.data(), buffer - bufferVector.data()); } } // namespace JSC