X-Git-Url: https://git.saurik.com/apple/javascriptcore.git/blobdiff_plain/ba379fdc102753d6be2c4d937058fe40257329fe..1981f5dfe8d77d97469d20652f712a09400c48ed:/runtime/UString.h diff --git a/runtime/UString.h b/runtime/UString.h index d01b75d..7677161 100644 --- a/runtime/UString.h +++ b/runtime/UString.h @@ -1,572 +1,284 @@ /* - * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) - * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. - * Copyright (C) 2009 Google Inc. All rights reserved. + * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) + * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. + * Copyright (C) 2009 Google Inc. All rights reserved. * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. + * You should have received a copy of the GNU Library General Public License + * along with this library; see the file COPYING.LIB. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. * */ #ifndef UString_h #define UString_h -#include "Collector.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include namespace JSC { - using WTF::PlacementNewAdoptType; - using WTF::PlacementNewAdopt; - - class IdentifierTable; - - class CString { - public: - CString() - : m_length(0) - , m_data(0) - { - } - - CString(const char*); - CString(const char*, size_t); - CString(const CString&); - - ~CString(); - - static CString adopt(char*, size_t); // buffer should be allocated with new[]. - - CString& append(const CString&); - CString& operator=(const char* c); - CString& operator=(const CString&); - CString& operator+=(const CString& c) { return append(c); } - - size_t size() const { return m_length; } - const char* c_str() const { return m_data; } - - private: - size_t m_length; - char* m_data; - }; - - typedef Vector CStringBuffer; - - class UString { - friend class JIT; - - public: - typedef CrossThreadRefCounted > SharedUChar; - struct BaseString; - struct Rep : Noncopyable { - friend class JIT; - - static PassRefPtr create(UChar* buffer, int length) - { - return adoptRef(new BaseString(buffer, length)); - } - - static PassRefPtr createEmptyBuffer(size_t size) - { - // Guard against integer overflow - if (size < (std::numeric_limits::max() / sizeof(UChar))) { - if (void * buf = tryFastMalloc(size * sizeof(UChar))) - return adoptRef(new BaseString(static_cast(buf), 0, size)); - } - return adoptRef(new BaseString(0, 0, 0)); - } - - static PassRefPtr createCopying(const UChar*, int); - static PassRefPtr create(PassRefPtr base, int offset, int length); - - // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h). - // Returns UString::Rep::null for null input or conversion failure. - static PassRefPtr createFromUTF8(const char*); - - // Uses SharedUChar to have joint ownership over the UChar*. - static PassRefPtr create(UChar*, int, PassRefPtr); - - SharedUChar* sharedBuffer(); - void destroy(); - - bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); } - UChar* data() const; - int size() const { return len; } - - unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; } - unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers - - static unsigned computeHash(const UChar*, int length); - static unsigned computeHash(const char*, int length); - static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); } - - IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); } - void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); } - - bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); } - void setStatic(bool); - void setBaseString(PassRefPtr); - BaseString* baseString(); - const BaseString* baseString() const; - - Rep* ref() { ++rc; return this; } - ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); } - - void checkConsistency() const; - enum UStringFlags { - StaticFlag, - BaseStringFlag - }; - - // unshared data - int offset; - int len; - int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted. - mutable unsigned _hash; - PtrAndFlags m_identifierTableAndFlags; - - static BaseString& null() { return *nullBaseString; } - static BaseString& empty() { return *emptyBaseString; } - - bool reserveCapacity(int capacity); - - protected: - // Constructor for use by BaseString subclass; they use the union with m_baseString for another purpose. - Rep(int length) - : offset(0) - , len(length) - , rc(1) - , _hash(0) - , m_baseString(0) - { - } - - Rep(PassRefPtr base, int offsetInBase, int length) - : offset(offsetInBase) - , len(length) - , rc(1) - , _hash(0) - , m_baseString(base.releaseRef()) - { - checkConsistency(); - } - - union { - // If !baseIsSelf() - BaseString* m_baseString; - // If baseIsSelf() - SharedUChar* m_sharedBuffer; - }; - - private: - // For SmallStringStorage which allocates an array and does initialization manually. - Rep() { } - - friend class SmallStringsStorage; - friend void initializeUString(); - JS_EXPORTDATA static BaseString* nullBaseString; - JS_EXPORTDATA static BaseString* emptyBaseString; - }; - - - struct BaseString : public Rep { - bool isShared() { return rc != 1 || isBufferReadOnly(); } - void setSharedBuffer(PassRefPtr); - - bool isBufferReadOnly() - { - if (!m_sharedBuffer) - return false; - return slowIsBufferReadOnly(); - } - - // potentially shared data. - UChar* buf; - int preCapacity; - int usedPreCapacity; - int capacity; - int usedCapacity; - - size_t reportedCost; - - private: - BaseString(UChar* buffer, int length, int additionalCapacity = 0) - : Rep(length) - , buf(buffer) - , preCapacity(0) - , usedPreCapacity(0) - , capacity(length + additionalCapacity) - , usedCapacity(length) - , reportedCost(0) - { - m_identifierTableAndFlags.setFlag(BaseStringFlag); - checkConsistency(); - } - - SharedUChar* sharedBuffer(); - bool slowIsBufferReadOnly(); - - friend struct Rep; - friend class SmallStringsStorage; - friend void initializeUString(); - }; - - public: - UString(); - UString(const char*); - UString(const UChar*, int length); - UString(UChar*, int length, bool copy); - - UString(const UString& s) - : m_rep(s.m_rep) - { - } - - UString(const Vector& buffer); - - ~UString() - { - } - - // Special constructor for cases where we overwrite an object in place. - UString(PlacementNewAdoptType) - : m_rep(PlacementNewAdopt) - { - } +class UString { +public: + // Construct a null string, distinguishable from an empty string. + UString() { } - static UString from(int); - static UString from(unsigned int); - static UString from(long); - static UString from(double); + // Construct a string with UTF-16 data. + JS_EXPORT_PRIVATE UString(const UChar* characters, unsigned length); - struct Range { - public: - Range(int pos, int len) - : position(pos) - , length(len) - { - } + // Construct a string with UTF-16 data, from a null-terminated source. + JS_EXPORT_PRIVATE UString(const UChar*); - Range() - { - } + // Construct a string with latin1 data. + UString(const LChar* characters, unsigned length); + JS_EXPORT_PRIVATE UString(const char* characters, unsigned length); - int position; - int length; - }; + // Construct a string with latin1 data, from a null-terminated source. + UString(const LChar* characters); + JS_EXPORT_PRIVATE UString(const char* characters); - UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const; + // Construct a string referencing an existing StringImpl. + UString(StringImpl* impl) : m_impl(impl) { } + UString(PassRefPtr impl) : m_impl(impl) { } + UString(RefPtr impl) : m_impl(impl) { } - UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const; + // Inline the destructor. + ALWAYS_INLINE ~UString() { } - UString& append(const UString&); - UString& append(const char*); - UString& append(UChar); - UString& append(char c) { return append(static_cast(static_cast(c))); } - UString& append(const UChar*, int size); - UString& appendNumeric(int); - UString& appendNumeric(double); + void swap(UString& o) { m_impl.swap(o.m_impl); } - bool getCString(CStringBuffer&) const; + template + static UString adopt(Vector& vector) { return StringImpl::adopt(vector); } - // NOTE: This method should only be used for *debugging* purposes as it - // is neither Unicode safe nor free from side effects nor thread-safe. - char* ascii() const; + bool isNull() const { return !m_impl; } + bool isEmpty() const { return !m_impl || !m_impl->length(); } - /** - * Convert the string to UTF-8, assuming it is UTF-16 encoded. - * In non-strict mode, this function is tolerant of badly formed UTF-16, it - * can create UTF-8 strings that are invalid because they have characters in - * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is - * guaranteed to be otherwise valid. - * In strict mode, error is returned as null CString. - */ - CString UTF8String(bool strict = false) const; + StringImpl* impl() const { return m_impl.get(); } - UString& operator=(const char*c); - - UString& operator+=(const UString& s) { return append(s); } - UString& operator+=(const char* s) { return append(s); } - - const UChar* data() const { return m_rep->data(); } - - bool isNull() const { return (m_rep == &Rep::null()); } - bool isEmpty() const { return (!m_rep->len); } - - bool is8Bit() const; + unsigned length() const + { + if (!m_impl) + return 0; + return m_impl->length(); + } - int size() const { return m_rep->size(); } + const UChar* characters() const + { + if (!m_impl) + return 0; + return m_impl->characters(); + } - UChar operator[](int pos) const; + const LChar* characters8() const + { + if (!m_impl) + return 0; + ASSERT(m_impl->is8Bit()); + return m_impl->characters8(); + } - double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const; - double toDouble(bool tolerateTrailingJunk) const; - double toDouble() const; + const UChar* characters16() const + { + if (!m_impl) + return 0; + ASSERT(!m_impl->is8Bit()); + return m_impl->characters16(); + } - uint32_t toUInt32(bool* ok = 0) const; - uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const; - uint32_t toStrictUInt32(bool* ok = 0) const; + template + inline const CharType* getCharacters() const; - unsigned toArrayIndex(bool* ok = 0) const; + bool is8Bit() const { return m_impl->is8Bit(); } - int find(const UString& f, int pos = 0) const; - int find(UChar, int pos = 0) const; - int rfind(const UString& f, int pos) const; - int rfind(UChar, int pos) const; + JS_EXPORT_PRIVATE CString ascii() const; + CString latin1() const; + JS_EXPORT_PRIVATE CString utf8(bool strict = false) const; - UString substr(int pos = 0, int len = -1) const; + UChar operator[](unsigned index) const + { + if (!m_impl || index >= m_impl->length()) + return 0; + if (is8Bit()) + return m_impl->characters8()[index]; + return m_impl->characters16()[index]; + } - static const UString& null() { return *nullUString; } + JS_EXPORT_PRIVATE static UString number(int); + JS_EXPORT_PRIVATE static UString number(unsigned); + JS_EXPORT_PRIVATE static UString number(long); + static UString number(long long); + JS_EXPORT_PRIVATE static UString number(double); - Rep* rep() const { return m_rep.get(); } - static Rep* nullRep(); + // Find a single character or string, also with match function & latin1 forms. + size_t find(UChar c, unsigned start = 0) const + { return m_impl ? m_impl->find(c, start) : notFound; } - UString(PassRefPtr r) - : m_rep(r) - { - ASSERT(m_rep); - } + size_t find(const UString& str) const + { return m_impl ? m_impl->find(str.impl()) : notFound; } + size_t find(const UString& str, unsigned start) const + { return m_impl ? m_impl->find(str.impl(), start) : notFound; } - size_t cost() const; - - // Attempt to grow this string such that it can grow to a total length of 'capacity' - // without reallocation. This may fail a number of reasons - if the BasicString is - // shared and another string is using part of the capacity beyond our end point, if - // the realloc fails, or if this string is empty and has no storage. - // - // This method returns a boolean indicating success. - bool reserveCapacity(int capacity) - { - return m_rep->reserveCapacity(capacity); - } + size_t find(const LChar* str, unsigned start = 0) const + { return m_impl ? m_impl->find(str, start) : notFound; } - private: - void expandCapacity(int requiredLength); - void expandPreCapacity(int requiredPreCap); - void makeNull(); + // Find the last instance of a single character or string. + size_t reverseFind(UChar c, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFind(c, start) : notFound; } + size_t reverseFind(const UString& str, unsigned start = UINT_MAX) const + { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; } - RefPtr m_rep; - static UString* nullUString; + JS_EXPORT_PRIVATE UString substringSharingImpl(unsigned pos, unsigned len = UINT_MAX) const; - friend void initializeUString(); - friend bool operator==(const UString&, const UString&); - friend PassRefPtr concatenate(Rep*, Rep*); // returns 0 if out of memory - }; - PassRefPtr concatenate(UString::Rep*, UString::Rep*); - PassRefPtr concatenate(UString::Rep*, int); - PassRefPtr concatenate(UString::Rep*, double); +private: + RefPtr m_impl; +}; - inline bool operator==(const UString& s1, const UString& s2) - { - int size = s1.size(); - switch (size) { - case 0: - return !s2.size(); - case 1: - return s2.size() == 1 && s1.data()[0] == s2.data()[0]; - case 2: { - if (s2.size() != 2) - return false; - const UChar* d1 = s1.data(); - const UChar* d2 = s2.data(); - return (d1[0] == d2[0]) & (d1[1] == d2[1]); - } - default: - return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0; - } - } +template<> +inline const LChar* UString::getCharacters() const { return characters8(); } +template<> +inline const UChar* UString::getCharacters() const { return characters(); } - inline bool operator!=(const UString& s1, const UString& s2) - { - return !JSC::operator==(s1, s2); - } +NEVER_INLINE bool equalSlowCase(const UString& s1, const UString& s2); - bool operator<(const UString& s1, const UString& s2); - bool operator>(const UString& s1, const UString& s2); +ALWAYS_INLINE bool operator==(const UString& s1, const UString& s2) +{ + StringImpl* rep1 = s1.impl(); + StringImpl* rep2 = s2.impl(); - bool operator==(const UString& s1, const char* s2); + if (rep1 == rep2) // If they're the same rep, they're equal. + return true; - inline bool operator!=(const UString& s1, const char* s2) - { - return !JSC::operator==(s1, s2); - } + unsigned size1 = 0; + unsigned size2 = 0; - inline bool operator==(const char *s1, const UString& s2) - { - return operator==(s2, s1); - } + if (rep1) + size1 = rep1->length(); - inline bool operator!=(const char *s1, const UString& s2) - { - return !JSC::operator==(s1, s2); - } + if (rep2) + size2 = rep2->length(); - bool operator==(const CString&, const CString&); + if (size1 != size2) // If the lengths are not the same, we're done. + return false; - inline UString operator+(const UString& s1, const UString& s2) - { - RefPtr result = concatenate(s1.rep(), s2.rep()); - return UString(result ? result.release() : UString::nullRep()); - } + if (!size1) + return true; - int compare(const UString&, const UString&); + if (size1 == 1) + return (*rep1)[0u] == (*rep2)[0u]; - bool equal(const UString::Rep*, const UString::Rep*); + return equalSlowCase(s1, s2); +} - inline PassRefPtr UString::Rep::create(PassRefPtr rep, int offset, int length) - { - ASSERT(rep); - rep->checkConsistency(); - int repOffset = rep->offset; +inline bool operator!=(const UString& s1, const UString& s2) +{ + return !JSC::operator==(s1, s2); +} - PassRefPtr base = rep->baseString(); +JS_EXPORT_PRIVATE bool operator<(const UString& s1, const UString& s2); +JS_EXPORT_PRIVATE bool operator>(const UString& s1, const UString& s2); - ASSERT(-(offset + repOffset) <= base->usedPreCapacity); - ASSERT(offset + repOffset + length <= base->usedCapacity); +JS_EXPORT_PRIVATE bool operator==(const UString& s1, const char* s2); - // Steal the single reference this Rep was created with. - return adoptRef(new Rep(base, repOffset + offset, length)); - } +inline bool operator!=(const UString& s1, const char* s2) +{ + return !JSC::operator==(s1, s2); +} - inline UChar* UString::Rep::data() const - { - const BaseString* base = baseString(); - return base->buf + base->preCapacity + offset; - } +inline bool operator==(const char *s1, const UString& s2) +{ + return operator==(s2, s1); +} - inline void UString::Rep::setStatic(bool v) - { - ASSERT(!identifierTable()); - if (v) - m_identifierTableAndFlags.setFlag(StaticFlag); - else - m_identifierTableAndFlags.clearFlag(StaticFlag); - } +inline bool operator!=(const char *s1, const UString& s2) +{ + return !JSC::operator==(s1, s2); +} - inline void UString::Rep::setBaseString(PassRefPtr base) - { - ASSERT(base != this); - ASSERT(!baseIsSelf()); - m_baseString = base.releaseRef(); - } +inline int codePointCompare(const UString& s1, const UString& s2) +{ + return codePointCompare(s1.impl(), s2.impl()); +} - inline UString::BaseString* UString::Rep::baseString() +struct UStringHash { + static unsigned hash(StringImpl* key) { return key->hash(); } + static bool equal(const StringImpl* a, const StringImpl* b) { - return !baseIsSelf() ? m_baseString : reinterpret_cast(this) ; - } + if (a == b) + return true; + if (!a || !b) + return false; + + unsigned aLength = a->length(); + unsigned bLength = b->length(); + if (aLength != bLength) + return false; + + // FIXME: perhaps we should have a more abstract macro that indicates when + // going 4 bytes at a time is unsafe +#if CPU(ARM) || CPU(SH4) || CPU(MIPS) || CPU(SPARC) + const UChar* aChars = a->characters(); + const UChar* bChars = b->characters(); + for (unsigned i = 0; i != aLength; ++i) { + if (*aChars++ != *bChars++) + return false; + } + return true; +#else + /* Do it 4-bytes-at-a-time on architectures where it's safe */ + const uint32_t* aChars = reinterpret_cast(a->characters()); + const uint32_t* bChars = reinterpret_cast(b->characters()); + + unsigned halfLength = aLength >> 1; + for (unsigned i = 0; i != halfLength; ++i) + if (*aChars++ != *bChars++) + return false; - inline const UString::BaseString* UString::Rep::baseString() const - { - return const_cast(this)->baseString(); - } + if (aLength & 1 && *reinterpret_cast(aChars) != *reinterpret_cast(bChars)) + return false; -#ifdef NDEBUG - inline void UString::Rep::checkConsistency() const - { - } + return true; #endif - - inline UString::UString() - : m_rep(&Rep::null()) - { } - // Rule from ECMA 15.2 about what an array index is. - // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1. - inline unsigned UString::toArrayIndex(bool* ok) const + static unsigned hash(const RefPtr& key) { return key->hash(); } + static bool equal(const RefPtr& a, const RefPtr& b) { - unsigned i = toStrictUInt32(ok); - if (ok && i >= 0xFFFFFFFFU) - *ok = false; - return i; + return equal(a.get(), b.get()); } - // We'd rather not do shared substring append for small strings, since - // this runs too much risk of a tiny initial string holding down a - // huge buffer. - // FIXME: this should be size_t but that would cause warnings until we - // fix UString sizes to be size_t instead of int - static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar); - - inline size_t UString::cost() const + static unsigned hash(const UString& key) { return key.impl()->hash(); } + static bool equal(const UString& a, const UString& b) { - BaseString* base = m_rep->baseString(); - size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar); - size_t reportedCost = base->reportedCost; - ASSERT(capacity >= reportedCost); - - size_t capacityDelta = capacity - reportedCost; - - if (capacityDelta < static_cast(minShareSize)) - return 0; - - base->reportedCost = capacity; - - return capacityDelta; + return equal(a.impl(), b.impl()); } - struct IdentifierRepHash : PtrHash > { - static unsigned hash(const RefPtr& key) { return key->computedHash(); } - static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); } - }; + static const bool safeToCompareToEmptyOrDeleted = false; +}; - void initializeUString(); } // namespace JSC namespace WTF { - template struct DefaultHash; - template struct StrHash; - - template<> struct StrHash { - static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); } - static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); } - static const bool safeToCompareToEmptyOrDeleted = false; - }; - - template<> struct StrHash > : public StrHash { - using StrHash::hash; - static unsigned hash(const RefPtr& key) { return key->hash(); } - using StrHash::equal; - static bool equal(const RefPtr& a, const RefPtr& b) { return JSC::equal(a.get(), b.get()); } - static bool equal(const JSC::UString::Rep* a, const RefPtr& b) { return JSC::equal(a, b.get()); } - static bool equal(const RefPtr& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); } +// UStringHash is the default hash for UString +template struct DefaultHash; +template<> struct DefaultHash { + typedef JSC::UStringHash Hash; +}; - static const bool safeToCompareToEmptyOrDeleted = false; - }; - - template<> struct DefaultHash { - typedef StrHash Hash; - }; - - template<> struct DefaultHash > { - typedef StrHash > Hash; - - }; +template <> struct VectorTraits : SimpleClassVectorTraits { }; } // namespace WTF #endif +