X-Git-Url: https://git.saurik.com/apple/javascriptcore.git/blobdiff_plain/4e4e5a6f2694187498445a6ac6f1634ce8141119..14957cd040308e3eeec43d26bae5d76da13fcd85:/wtf/text/WTFString.cpp diff --git a/wtf/text/WTFString.cpp b/wtf/text/WTFString.cpp index 842d755..3ab4ff5 100644 --- a/wtf/text/WTFString.cpp +++ b/wtf/text/WTFString.cpp @@ -1,6 +1,6 @@ /* * (C) 1999 Lars Knoll (knoll@kde.org) - * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserved. * Copyright (C) 2007-2009 Torch Mobile, Inc. * * This library is free software; you can redistribute it and/or @@ -22,7 +22,6 @@ #include "config.h" #include "WTFString.h" -#include #include #include #include @@ -32,23 +31,47 @@ #include #include -using namespace WTF; -using namespace WTF::Unicode; +using namespace std; -namespace WebCore { +namespace WTF { +using namespace Unicode; +using namespace std; + +// Construct a string with UTF-16 data. +String::String(const UChar* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) +{ +} + +// Construct a string with UTF-16 data, from a null-terminated source. String::String(const UChar* str) { if (!str) return; - int len = 0; + size_t len = 0; while (str[len] != UChar(0)) len++; + + if (len > numeric_limits::max()) + CRASH(); m_impl = StringImpl::create(str, len); } +// Construct a string with latin1 data. +String::String(const char* characters, unsigned length) + : m_impl(characters ? StringImpl::create(characters, length) : 0) +{ +} + +// Construct a string with latin1 data, from a null-terminated source. +String::String(const char* characters) + : m_impl(characters ? StringImpl::create(characters) : 0) +{ +} + void String::append(const String& str) { if (str.isEmpty()) @@ -61,8 +84,9 @@ void String::append(const String& str) if (str.m_impl) { if (m_impl) { UChar* data; - RefPtr newImpl = - StringImpl::createUninitialized(m_impl->length() + str.length(), data); + if (str.length() > numeric_limits::max() - m_impl->length()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar)); m_impl = newImpl.release(); @@ -79,8 +103,9 @@ void String::append(char c) // call to fastMalloc every single time. if (m_impl) { UChar* data; - RefPtr newImpl = - StringImpl::createUninitialized(m_impl->length() + 1, data); + if (m_impl->length() >= numeric_limits::max()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); data[m_impl->length()] = c; m_impl = newImpl.release(); @@ -96,8 +121,9 @@ void String::append(UChar c) // call to fastMalloc every single time. if (m_impl) { UChar* data; - RefPtr newImpl = - StringImpl::createUninitialized(m_impl->length() + 1, data); + if (m_impl->length() >= numeric_limits::max()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); data[m_impl->length()] = c; m_impl = newImpl.release(); @@ -105,25 +131,9 @@ void String::append(UChar c) m_impl = StringImpl::create(&c, 1); } -String operator+(const String& a, const String& b) +int codePointCompare(const String& a, const String& b) { - if (a.isEmpty()) - return b; - if (b.isEmpty()) - return a; - String c = a; - c += b; - return c; -} - -String operator+(const String& s, const char* cs) -{ - return s + String(cs); -} - -String operator+(const char* cs, const String& s) -{ - return String(cs) + s; + return codePointCompare(a.impl(), b.impl()); } void String::insert(const String& str, unsigned pos) @@ -152,8 +162,9 @@ void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) ASSERT(charactersToAppend); UChar* data; - RefPtr newImpl = - StringImpl::createUninitialized(length() + lengthToAppend, data); + if (lengthToAppend > numeric_limits::max() - length()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data); memcpy(data, characters(), length() * sizeof(UChar)); memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar)); m_impl = newImpl.release(); @@ -173,8 +184,9 @@ void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, un ASSERT(charactersToInsert); UChar* data; - RefPtr newImpl = - StringImpl::createUninitialized(length() + lengthToInsert, data); + if (lengthToInsert > numeric_limits::max() - length()) + CRASH(); + RefPtr newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data); memcpy(data, characters(), position * sizeof(UChar)); memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar)); @@ -207,8 +219,7 @@ void String::remove(unsigned position, int lengthToRemove) if (static_cast(lengthToRemove) > length() - position) lengthToRemove = length() - position; UChar* data; - RefPtr newImpl = - StringImpl::createUninitialized(length() - lengthToRemove, data); + RefPtr newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data); memcpy(data, characters(), position * sizeof(UChar)); memcpy(data + position, characters() + position + lengthToRemove, (length() - lengthToRemove - position) * sizeof(UChar)); @@ -222,6 +233,19 @@ String String::substring(unsigned pos, unsigned len) const return m_impl->substring(pos, len); } +String String::substringSharingImpl(unsigned offset, unsigned length) const +{ + // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar). + + unsigned stringLength = this->length(); + offset = min(offset, stringLength); + length = min(length, stringLength - offset); + + if (!offset && length == stringLength) + return *this; + return String(StringImpl::create(m_impl, offset, length)); +} + String String::lower() const { if (!m_impl) @@ -299,7 +323,8 @@ String String::format(const char *format, ...) va_end(args); - return buffer; + QByteArray ba = buffer.toUtf8(); + return StringImpl::create(ba.constData(), ba.length()); #elif OS(WINCE) va_list args; @@ -515,24 +540,28 @@ intptr_t String::toIntPtr(bool* ok) const return m_impl->toIntPtr(ok); } -double String::toDouble(bool* ok) const +double String::toDouble(bool* ok, bool* didReadNumber) const { if (!m_impl) { if (ok) *ok = false; + if (didReadNumber) + *didReadNumber = false; return 0.0; } - return m_impl->toDouble(ok); + return m_impl->toDouble(ok, didReadNumber); } -float String::toFloat(bool* ok) const +float String::toFloat(bool* ok, bool* didReadNumber) const { if (!m_impl) { if (ok) *ok = false; + if (didReadNumber) + *didReadNumber = false; return 0.0f; } - return m_impl->toFloat(ok); + return m_impl->toFloat(ok, didReadNumber); } String String::threadsafeCopy() const @@ -553,54 +582,59 @@ void String::split(const String& separator, bool allowEmptyEntries, Vector(length())) + if (allowEmptyEntries || startPos != length()) result.append(substring(startPos)); } void String::split(const String& separator, Vector& result) const { - return split(separator, false, result); + split(separator, false, result); } void String::split(UChar separator, bool allowEmptyEntries, Vector& result) const { result.clear(); - int startPos = 0; - int endPos; - while ((endPos = find(separator, startPos)) != -1) { + unsigned startPos = 0; + size_t endPos; + while ((endPos = find(separator, startPos)) != notFound) { if (allowEmptyEntries || startPos != endPos) result.append(substring(startPos, endPos - startPos)); startPos = endPos + 1; } - if (allowEmptyEntries || startPos != static_cast(length())) + if (allowEmptyEntries || startPos != length()) result.append(substring(startPos)); } void String::split(UChar separator, Vector& result) const { - return split(String(&separator, 1), false, result); + split(String(&separator, 1), false, result); } -Vector String::ascii() const +CString String::ascii() const { - if (m_impl) - return m_impl->ascii(); - - const char* nullMsg = "(null impl)"; - Vector buffer; - for (int i = 0; nullMsg[i]; ++i) - buffer.append(nullMsg[i]); - - buffer.append('\0'); - return buffer; + // Printable ASCII characters 32..127 and the null character are + // preserved, characters outside of this range are converted to '?'. + + unsigned length = this->length(); + const UChar* characters = this->characters(); + + char* characterBuffer; + CString result = CString::newUninitialized(length, characterBuffer); + + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; + } + + return result; } CString String::latin1() const @@ -616,7 +650,7 @@ CString String::latin1() const for (unsigned i = 0; i < length; ++i) { UChar ch = characters[i]; - characterBuffer[i] = ch > 255 ? '?' : ch; + characterBuffer[i] = ch > 0xff ? '?' : ch; } return result; @@ -631,7 +665,7 @@ static inline void putUTF8Triple(char*& buffer, UChar ch) *buffer++ = static_cast((ch & 0x3F) | 0x80); } -CString String::utf8() const +CString String::utf8(bool strict) const { unsigned length = this->length(); const UChar* characters = this->characters(); @@ -646,19 +680,27 @@ CString String::utf8() const // * We could allocate a CStringBuffer with an appropriate size to // have a good chance of being able to write the string into the // buffer without reallocing (say, 1.5 x length). + if (length > numeric_limits::max() / 3) + return CString(); Vector bufferVector(length * 3); char* buffer = bufferVector.data(); - ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), false); - ASSERT(result != sourceIllegal); // Only produced from strict conversion. + ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict); ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion - // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate - // would have been handled in the middle of a string with non-strict conversion - which is to say, - // simply encode it to UTF-8. + // Only produced from strict conversion. + if (result == sourceIllegal) + return CString(); + + // Check for an unconverted high surrogate. if (result == sourceExhausted) { - // This should be one unpaired high surrogate. - ASSERT((characters + 1) == (characters + length)); + if (strict) + return CString(); + // This should be one unpaired high surrogate. Treat it the same + // was as an unpaired high surrogate would have been handled in + // the middle of a string with non-strict conversion - which is + // to say, simply encode it to UTF-8. + ASSERT((characters + 1) == (this->characters() + length)); ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); // There should be room left, since one UChar hasn't been converted. ASSERT((buffer + 3) <= (buffer + bufferVector.size())); @@ -670,6 +712,9 @@ CString String::utf8() const String String::fromUTF8(const char* stringStart, size_t length) { + if (length > numeric_limits::max()) + CRASH(); + if (!stringStart) return String(); @@ -729,8 +774,8 @@ static bool isCharacterAllowedInBase(UChar c, int base) template static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base) { - static const IntegralType integralMax = std::numeric_limits::max(); - static const bool isSigned = std::numeric_limits::is_signed; + static const IntegralType integralMax = numeric_limits::max(); + static const bool isSigned = numeric_limits::is_signed; const IntegralType maxMultiplier = integralMax / base; IntegralType value = 0; @@ -875,11 +920,13 @@ intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) return toIntegralType(data, lengthOfCharactersAsInteger(data, length), ok, 10); } -double charactersToDouble(const UChar* data, size_t length, bool* ok) +double charactersToDouble(const UChar* data, size_t length, bool* ok, bool* didReadNumber) { if (!length) { if (ok) *ok = false; + if (didReadNumber) + *didReadNumber = false; return 0.0; } @@ -887,27 +934,64 @@ double charactersToDouble(const UChar* data, size_t length, bool* ok) for (unsigned i = 0; i < length; ++i) bytes[i] = data[i] < 0x7F ? data[i] : '?'; bytes[length] = '\0'; + char* start = bytes.data(); char* end; - double val = WTF::strtod(bytes.data(), &end); + double val = WTF::strtod(start, &end); if (ok) *ok = (end == 0 || *end == '\0'); + if (didReadNumber) + *didReadNumber = end - start; return val; } -float charactersToFloat(const UChar* data, size_t length, bool* ok) +float charactersToFloat(const UChar* data, size_t length, bool* ok, bool* didReadNumber) { // FIXME: This will return ok even when the string fits into a double but not a float. - return static_cast(charactersToDouble(data, length, ok)); + return static_cast(charactersToDouble(data, length, ok, didReadNumber)); +} + +const String& emptyString() +{ + DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty())); + return emptyString; } -} // namespace WebCore +} // namespace WTF #ifndef NDEBUG -// For use in the debugger - leaks memory -WebCore::String* string(const char*); +// For use in the debugger +String* string(const char*); +Vector asciiDebug(StringImpl* impl); +Vector asciiDebug(String& string); -WebCore::String* string(const char* s) +String* string(const char* s) { - return new WebCore::String(s); + // leaks memory! + return new String(s); } + +Vector asciiDebug(StringImpl* impl) +{ + if (!impl) + return asciiDebug(String("[null]").impl()); + + Vector buffer; + unsigned length = impl->length(); + const UChar* characters = impl->characters(); + + buffer.resize(length + 1); + for (unsigned i = 0; i < length; ++i) { + UChar ch = characters[i]; + buffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; + } + buffer[length] = '\0'; + + return buffer; +} + +Vector asciiDebug(String& string) +{ + return asciiDebug(string.impl()); +} + #endif