+++ /dev/null
-/*
- * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
- * Copyright (C) 2009 Google Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef StringImpl_h
-#define StringImpl_h
-
-#include <limits.h>
-#include <wtf/ASCIICType.h>
-#include <wtf/CrossThreadRefCounted.h>
-#include <wtf/Forward.h>
-#include <wtf/OwnFastMallocPtr.h>
-#include <wtf/StdLibExtras.h>
-#include <wtf/StringHasher.h>
-#include <wtf/Vector.h>
-#include <wtf/text/StringImplBase.h>
-#include <wtf/unicode/Unicode.h>
-
-#if USE(CF)
-typedef const struct __CFString * CFStringRef;
-#endif
-
-#ifdef __OBJC__
-@class NSString;
-#endif
-
-// FIXME: This is a temporary layering violation while we move string code to WTF.
-// Landing the file moves in one patch, will follow on with patches to change the namespaces.
-namespace JSC {
-struct IdentifierCStringTranslator;
-struct IdentifierUCharBufferTranslator;
-}
-
-namespace WTF {
-
-struct CStringTranslator;
-struct HashAndCharactersTranslator;
-struct HashAndUTF8CharactersTranslator;
-struct UCharBufferTranslator;
-
-enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
-
-typedef OwnFastMallocPtr<const UChar> SharableUChar;
-typedef CrossThreadRefCounted<SharableUChar> SharedUChar;
-typedef bool (*CharacterMatchFunctionPtr)(UChar);
-
-class StringImpl : public StringImplBase {
- friend struct JSC::IdentifierCStringTranslator;
- friend struct JSC::IdentifierUCharBufferTranslator;
- friend struct WTF::CStringTranslator;
- friend struct WTF::HashAndCharactersTranslator;
- friend struct WTF::HashAndUTF8CharactersTranslator;
- friend struct WTF::UCharBufferTranslator;
- friend class AtomicStringImpl;
-private:
- // Used to construct static strings, which have an special refCount that can never hit zero.
- // This means that the static string will never be destroyed, which is important because
- // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
- StringImpl(const UChar* characters, unsigned length, StaticStringConstructType)
- : StringImplBase(length, ConstructStaticString)
- , m_data(characters)
- , m_buffer(0)
- , m_hash(0)
- {
- // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
- // with impunity. The empty string is special because it is never entered into
- // AtomicString's HashKey, but still needs to compare correctly.
- hash();
- }
-
- // Create a normal string with internal storage (BufferInternal)
- StringImpl(unsigned length)
- : StringImplBase(length, BufferInternal)
- , m_data(reinterpret_cast<const UChar*>(this + 1))
- , m_buffer(0)
- , m_hash(0)
- {
- ASSERT(m_data);
- ASSERT(m_length);
- }
-
- // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
- StringImpl(const UChar* characters, unsigned length)
- : StringImplBase(length, BufferOwned)
- , m_data(characters)
- , m_buffer(0)
- , m_hash(0)
- {
- ASSERT(m_data);
- ASSERT(m_length);
- }
-
- // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
- StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
- : StringImplBase(length, BufferSubstring)
- , m_data(characters)
- , m_substringBuffer(base.leakRef())
- , m_hash(0)
- {
- ASSERT(m_data);
- ASSERT(m_length);
- ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
- }
-
- // Used to construct new strings sharing an existing SharedUChar (BufferShared)
- StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
- : StringImplBase(length, BufferShared)
- , m_data(characters)
- , m_sharedBuffer(sharedBuffer.leakRef())
- , m_hash(0)
- {
- ASSERT(m_data);
- ASSERT(m_length);
- }
-
- // For use only by AtomicString's XXXTranslator helpers.
- void setHash(unsigned hash)
- {
- ASSERT(!isStatic());
- ASSERT(!m_hash);
- ASSERT(hash == StringHasher::computeHash(m_data, m_length));
- m_hash = hash;
- }
-
-public:
- ~StringImpl();
-
- static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
- static PassRefPtr<StringImpl> create(const char*, unsigned length);
- static PassRefPtr<StringImpl> create(const char*);
- static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer);
- static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
- {
- ASSERT(rep);
- ASSERT(length <= rep->length());
-
- if (!length)
- return empty();
-
- StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
- return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep));
- }
-
- static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
- static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
- {
- if (!length) {
- output = 0;
- return empty();
- }
-
- if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) {
- output = 0;
- return 0;
- }
- StringImpl* resultImpl;
- if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) {
- output = 0;
- return 0;
- }
- output = reinterpret_cast<UChar*>(resultImpl + 1);
- return adoptRef(new(resultImpl) StringImpl(length));
- }
-
- static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); }
- static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
- static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
-
- template<size_t inlineCapacity>
- static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
- {
- if (size_t size = vector.size()) {
- ASSERT(vector.data());
- if (size > std::numeric_limits<unsigned>::max())
- CRASH();
- return adoptRef(new StringImpl(vector.releaseBuffer(), size));
- }
- return empty();
- }
- static PassRefPtr<StringImpl> adopt(StringBuffer&);
-
- SharedUChar* sharedBuffer();
- const UChar* characters() const { return m_data; }
-
- size_t cost()
- {
- // For substrings, return the cost of the base string.
- if (bufferOwnership() == BufferSubstring)
- return m_substringBuffer->cost();
-
- if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) {
- m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost;
- return m_length;
- }
- return 0;
- }
-
- bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; }
- void setIsIdentifier(bool isIdentifier)
- {
- ASSERT(!isStatic());
- if (isIdentifier)
- m_refCountAndFlags |= s_refCountFlagIsIdentifier;
- else
- m_refCountAndFlags &= ~s_refCountFlagIsIdentifier;
- }
-
- bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; }
-
- bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; }
- void setIsAtomic(bool isIdentifier)
- {
- ASSERT(!isStatic());
- if (isIdentifier)
- m_refCountAndFlags |= s_refCountFlagIsAtomic;
- else
- m_refCountAndFlags &= ~s_refCountFlagIsAtomic;
- }
-
- unsigned hash() const { if (!m_hash) m_hash = StringHasher::computeHash(m_data, m_length); return m_hash; }
- unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
-
- ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
- ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; }
-
- static StringImpl* empty();
-
- static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
- {
- if (numCharacters <= s_copyCharsInlineCutOff) {
- for (unsigned i = 0; i < numCharacters; ++i)
- destination[i] = source[i];
- } else
- memcpy(destination, source, numCharacters * sizeof(UChar));
- }
-
- // Returns a StringImpl suitable for use on another thread.
- PassRefPtr<StringImpl> crossThreadString();
- // Makes a deep copy. Helpful only if you need to use a String on another thread
- // (use crossThreadString if the method call doesn't need to be threadsafe).
- // Since StringImpl objects are immutable, there's no other reason to make a copy.
- PassRefPtr<StringImpl> threadsafeCopy() const;
-
- PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
-
- UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
- UChar32 characterStartingAt(unsigned);
-
- bool containsOnlyWhitespace();
-
- int toIntStrict(bool* ok = 0, int base = 10);
- unsigned toUIntStrict(bool* ok = 0, int base = 10);
- int64_t toInt64Strict(bool* ok = 0, int base = 10);
- uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
- intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
-
- int toInt(bool* ok = 0); // ignores trailing garbage
- unsigned toUInt(bool* ok = 0); // ignores trailing garbage
- int64_t toInt64(bool* ok = 0); // ignores trailing garbage
- uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
- intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
-
- double toDouble(bool* ok = 0, bool* didReadNumber = 0);
- float toFloat(bool* ok = 0, bool* didReadNumber = 0);
-
- PassRefPtr<StringImpl> lower();
- PassRefPtr<StringImpl> upper();
-
- enum LastCharacterBehavior { ObscureLastCharacter, DisplayLastCharacter };
-
- PassRefPtr<StringImpl> secure(UChar, LastCharacterBehavior = ObscureLastCharacter);
- PassRefPtr<StringImpl> foldCase();
-
- PassRefPtr<StringImpl> stripWhiteSpace();
- PassRefPtr<StringImpl> simplifyWhiteSpace();
-
- PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
-
- size_t find(UChar, unsigned index = 0);
- size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
- size_t find(const char*, unsigned index = 0);
- size_t find(StringImpl*, unsigned index = 0);
- size_t findIgnoringCase(const char*, unsigned index = 0);
- size_t findIgnoringCase(StringImpl*, unsigned index = 0);
-
- size_t reverseFind(UChar, unsigned index = UINT_MAX);
- size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
- size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
-
- bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; }
- bool endsWith(StringImpl*, bool caseSensitive = true);
-
- PassRefPtr<StringImpl> replace(UChar, UChar);
- PassRefPtr<StringImpl> replace(UChar, StringImpl*);
- PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
- PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
-
- int wordCount(int maxWordsToCount = INT_MAX);
-
- WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0);
-
-#if USE(CF)
- CFStringRef createCFString();
-#endif
-#ifdef __OBJC__
- operator NSString*();
-#endif
-
-private:
- // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
- static const unsigned s_copyCharsInlineCutOff = 20;
-
- static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
-
- BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); }
- bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; }
- const UChar* m_data;
- union {
- void* m_buffer;
- StringImpl* m_substringBuffer;
- SharedUChar* m_sharedBuffer;
- };
- mutable unsigned m_hash;
-};
-
-bool equal(const StringImpl*, const StringImpl*);
-bool equal(const StringImpl*, const char*);
-inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
-
-bool equalIgnoringCase(StringImpl*, StringImpl*);
-bool equalIgnoringCase(StringImpl*, const char*);
-inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
-bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
-inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
-
-bool equalIgnoringNullity(StringImpl*, StringImpl*);
-
-template<size_t inlineCapacity>
-bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
-{
- if (!b)
- return !a.size();
- if (a.size() != b->length())
- return false;
- return !memcmp(a.data(), b->characters(), b->length());
-}
-
-int codePointCompare(const StringImpl*, const StringImpl*);
-
-static inline bool isSpaceOrNewline(UChar c)
-{
- // Use isASCIISpace() for basic Latin-1.
- // This will include newlines, which aren't included in Unicode DirWS.
- return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
-}
-
-// This is a hot function because it's used when parsing HTML.
-inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
-{
- ASSERT(characters);
- ASSERT(length);
-
- // Optimize for the case where there are no Null characters by quickly
- // searching for nulls, and then using StringImpl::create, which will
- // memcpy the whole buffer. This is faster than assigning character by
- // character during the loop.
-
- // Fast case.
- int foundNull = 0;
- for (unsigned i = 0; !foundNull && i < length; i++) {
- int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
- foundNull |= !c;
- }
- if (!foundNull)
- return StringImpl::create(characters, length);
-
- return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
-}
-
-struct StringHash;
-
-// StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
-template<typename T> struct DefaultHash;
-template<> struct DefaultHash<StringImpl*> {
- typedef StringHash Hash;
-};
-template<> struct DefaultHash<RefPtr<StringImpl> > {
- typedef StringHash Hash;
-};
-
-}
-
-using WTF::StringImpl;
-using WTF::equal;
-using WTF::TextCaseSensitivity;
-using WTF::TextCaseSensitive;
-using WTF::TextCaseInsensitive;
-
-#endif