wtf/text/StringImpl.h

   1 /*
   2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
   3  * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
   4  * Copyright (C) 2009 Google Inc. All rights reserved.
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Library General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Library General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Library General Public License
  17  * along with this library; see the file COPYING.LIB.  If not, write to
  18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19  * Boston, MA 02110-1301, USA.
  20  *
  21  */
  22
  23 #ifndef StringImpl_h
  24 #define StringImpl_h
  25
  26 #include <limits.h>
  27 #include <wtf/ASCIICType.h>
  28 #include <wtf/CrossThreadRefCounted.h>
  29 #include <wtf/OwnFastMallocPtr.h>
  30 #include <wtf/StdLibExtras.h>
  31 #include <wtf/StringHashFunctions.h>
  32 #include <wtf/Vector.h>
  33 #include <wtf/text/StringImplBase.h>
  34 #include <wtf/unicode/Unicode.h>
  35
  36 #if PLATFORM(CF)
  37 typedef const struct __CFString * CFStringRef;
  38 #endif
  39
  40 #ifdef __OBJC__
  41 @class NSString;
  42 #endif
  43
  44 // FIXME: This is a temporary layering violation while we move string code to WTF.
  45 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
  46 namespace JSC {
  47
  48 struct IdentifierCStringTranslator;
  49 struct IdentifierUCharBufferTranslator;
  50
  51 }
  52
  53 // FIXME: This is a temporary layering violation while we move string code to WTF.
  54 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
  55 namespace WebCore {
  56
  57 class StringBuffer;
  58
  59 struct CStringTranslator;
  60 struct HashAndCharactersTranslator;
  61 struct StringHash;
  62 struct UCharBufferTranslator;
  63
  64 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
  65
  66 typedef OwnFastMallocPtr<const UChar> SharableUChar;
  67 typedef CrossThreadRefCounted<SharableUChar> SharedUChar;
  68 typedef bool (*CharacterMatchFunctionPtr)(UChar);
  69
  70 class StringImpl : public StringImplBase {
  71     friend struct JSC::IdentifierCStringTranslator;
  72     friend struct JSC::IdentifierUCharBufferTranslator;
  73     friend struct CStringTranslator;
  74     friend struct HashAndCharactersTranslator;
  75     friend struct UCharBufferTranslator;
  76     friend class AtomicStringImpl;
  77 private:
  78     // Used to construct static strings, which have an special refCount that can never hit zero.
  79     // This means that the static string will never be destroyed, which is important because
  80     // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
  81     StringImpl(const UChar* characters, unsigned length, StaticStringConstructType)
  82         : StringImplBase(length, ConstructStaticString)
  83         , m_data(characters)
  84         , m_buffer(0)
  85         , m_hash(0)
  86     {
  87         // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
  88         // with impunity. The empty string is special because it is never entered into
  89         // AtomicString's HashKey, but still needs to compare correctly.
  90         hash();
  91     }
  92
  93     // Create a normal string with internal storage (BufferInternal)
  94     StringImpl(unsigned length)
  95         : StringImplBase(length, BufferInternal)
  96         , m_data(reinterpret_cast<const UChar*>(this + 1))
  97         , m_buffer(0)
  98         , m_hash(0)
  99     {
 100         ASSERT(m_data);
 101         ASSERT(m_length);
 102     }
 103
 104     // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
 105     StringImpl(const UChar* characters, unsigned length)
 106         : StringImplBase(length, BufferOwned)
 107         , m_data(characters)
 108         , m_buffer(0)
 109         , m_hash(0)
 110     {
 111         ASSERT(m_data);
 112         ASSERT(m_length);
 113     }
 114
 115     // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
 116     StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
 117         : StringImplBase(length, BufferSubstring)
 118         , m_data(characters)
 119         , m_substringBuffer(base.releaseRef())
 120         , m_hash(0)
 121     {
 122         ASSERT(m_data);
 123         ASSERT(m_length);
 124         ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
 125     }
 126
 127     // Used to construct new strings sharing an existing SharedUChar (BufferShared)
 128     StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
 129         : StringImplBase(length, BufferShared)
 130         , m_data(characters)
 131         , m_sharedBuffer(sharedBuffer.releaseRef())
 132         , m_hash(0)
 133     {
 134         ASSERT(m_data);
 135         ASSERT(m_length);
 136     }
 137
 138     // For use only by AtomicString's XXXTranslator helpers.
 139     void setHash(unsigned hash)
 140     {
 141         ASSERT(!isStatic());
 142         ASSERT(!m_hash);
 143         ASSERT(hash == computeHash(m_data, m_length));
 144         m_hash = hash;
 145     }
 146
 147 public:
 148     ~StringImpl();
 149
 150     static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
 151     static PassRefPtr<StringImpl> create(const char*, unsigned length);
 152     static PassRefPtr<StringImpl> create(const char*);
 153     static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer);
 154     static PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
 155     {
 156         ASSERT(rep);
 157         ASSERT(length <= rep->length());
 158
 159         if (!length)
 160             return empty();
 161
 162         StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
 163         return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep));
 164     }
 165
 166     static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
 167     static PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
 168     {
 169         if (!length) {
 170             output = 0;
 171             return empty();
 172         }
 173
 174         if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) {
 175             output = 0;
 176             return 0;
 177         }
 178         StringImpl* resultImpl;
 179         if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) {
 180             output = 0;
 181             return 0;
 182         }
 183         output = reinterpret_cast<UChar*>(resultImpl + 1);
 184         return adoptRef(new(resultImpl) StringImpl(length));
 185     }
 186
 187     static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); }
 188     static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
 189     static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
 190
 191     template<size_t inlineCapacity>
 192     static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
 193     {
 194         if (size_t size = vector.size()) {
 195             ASSERT(vector.data());
 196             if (size > std::numeric_limits<unsigned>::max())
 197                 CRASH();
 198             return adoptRef(new StringImpl(vector.releaseBuffer(), size));
 199         }
 200         return empty();
 201     }
 202     static PassRefPtr<StringImpl> adopt(StringBuffer&);
 203
 204     SharedUChar* sharedBuffer();
 205     const UChar* characters() const { return m_data; }
 206
 207     size_t cost()
 208     {
 209         // For substrings, return the cost of the base string.
 210         if (bufferOwnership() == BufferSubstring)
 211             return m_substringBuffer->cost();
 212
 213         if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) {
 214             m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost;
 215             return m_length;
 216         }
 217         return 0;
 218     }
 219
 220     bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; }
 221     void setIsIdentifier(bool isIdentifier)
 222     {
 223         ASSERT(!isStatic());
 224         if (isIdentifier)
 225             m_refCountAndFlags |= s_refCountFlagIsIdentifier;
 226         else
 227             m_refCountAndFlags &= ~s_refCountFlagIsIdentifier;
 228     }
 229
 230     bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; }
 231
 232     bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; }
 233     void setIsAtomic(bool isIdentifier)
 234     {
 235         ASSERT(!isStatic());
 236         if (isIdentifier)
 237             m_refCountAndFlags |= s_refCountFlagIsAtomic;
 238         else
 239             m_refCountAndFlags &= ~s_refCountFlagIsAtomic;
 240     }
 241
 242     unsigned hash() const { if (!m_hash) m_hash = computeHash(m_data, m_length); return m_hash; }
 243     unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
 244     static unsigned computeHash(const UChar* data, unsigned length) { return WTF::stringHash(data, length); }
 245     static unsigned computeHash(const char* data, unsigned length) { return WTF::stringHash(data, length); }
 246     static unsigned computeHash(const char* data) { return WTF::stringHash(data); }
 247
 248     ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
 249     ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; }
 250
 251     static StringImpl* empty();
 252
 253     static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
 254     {
 255         if (numCharacters <= s_copyCharsInlineCutOff) {
 256             for (unsigned i = 0; i < numCharacters; ++i)
 257                 destination[i] = source[i];
 258         } else
 259             memcpy(destination, source, numCharacters * sizeof(UChar));
 260     }
 261
 262     // Returns a StringImpl suitable for use on another thread.
 263     PassRefPtr<StringImpl> crossThreadString();
 264     // Makes a deep copy. Helpful only if you need to use a String on another thread
 265     // (use crossThreadString if the method call doesn't need to be threadsafe).
 266     // Since StringImpl objects are immutable, there's no other reason to make a copy.
 267     PassRefPtr<StringImpl> threadsafeCopy() const;
 268
 269     PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
 270
 271     UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
 272     UChar32 characterStartingAt(unsigned);
 273
 274     bool containsOnlyWhitespace();
 275
 276     int toIntStrict(bool* ok = 0, int base = 10);
 277     unsigned toUIntStrict(bool* ok = 0, int base = 10);
 278     int64_t toInt64Strict(bool* ok = 0, int base = 10);
 279     uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
 280     intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
 281
 282     int toInt(bool* ok = 0); // ignores trailing garbage
 283     unsigned toUInt(bool* ok = 0); // ignores trailing garbage
 284     int64_t toInt64(bool* ok = 0); // ignores trailing garbage
 285     uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
 286     intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
 287
 288     double toDouble(bool* ok = 0);
 289     float toFloat(bool* ok = 0);
 290
 291     PassRefPtr<StringImpl> lower();
 292     PassRefPtr<StringImpl> upper();
 293     PassRefPtr<StringImpl> secure(UChar, bool hideLastCharacter = true);
 294     PassRefPtr<StringImpl> foldCase();
 295
 296     PassRefPtr<StringImpl> stripWhiteSpace();
 297     PassRefPtr<StringImpl> simplifyWhiteSpace();
 298
 299     PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
 300
 301     int find(const char*, int index = 0, bool caseSensitive = true);
 302     int find(UChar, int index = 0);
 303     int find(CharacterMatchFunctionPtr, int index = 0);
 304     int find(StringImpl*, int index, bool caseSensitive = true);
 305
 306     int reverseFind(UChar, int index);
 307     int reverseFind(StringImpl*, int index, bool caseSensitive = true);
 308
 309     bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; }
 310     bool endsWith(StringImpl*, bool caseSensitive = true);
 311
 312     PassRefPtr<StringImpl> replace(UChar, UChar);
 313     PassRefPtr<StringImpl> replace(UChar, StringImpl*);
 314     PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
 315     PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
 316
 317     Vector<char> ascii();
 318     int wordCount(int maxWordsToCount = INT_MAX);
 319
 320     WTF::Unicode::Direction defaultWritingDirection();
 321
 322 #if PLATFORM(CF)
 323     CFStringRef createCFString();
 324 #endif
 325 #ifdef __OBJC__
 326     operator NSString*();
 327 #endif
 328
 329 private:
 330     // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
 331     static const unsigned s_copyCharsInlineCutOff = 20;
 332
 333     static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
 334
 335     BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); }
 336     bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; }
 337     const UChar* m_data;
 338     union {
 339         void* m_buffer;
 340         StringImpl* m_substringBuffer;
 341         SharedUChar* m_sharedBuffer;
 342     };
 343     mutable unsigned m_hash;
 344 };
 345
 346 bool equal(const StringImpl*, const StringImpl*);
 347 bool equal(const StringImpl*, const char*);
 348 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
 349
 350 bool equalIgnoringCase(StringImpl*, StringImpl*);
 351 bool equalIgnoringCase(StringImpl*, const char*);
 352 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
 353 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
 354 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
 355
 356 bool equalIgnoringNullity(StringImpl*, StringImpl*);
 357
 358 static inline bool isSpaceOrNewline(UChar c)
 359 {
 360     // Use isASCIISpace() for basic Latin-1.
 361     // This will include newlines, which aren't included in Unicode DirWS.
 362     return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
 363 }
 364
 365 // This is a hot function because it's used when parsing HTML.
 366 inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
 367 {
 368     ASSERT(characters);
 369     ASSERT(length);
 370
 371     // Optimize for the case where there are no Null characters by quickly
 372     // searching for nulls, and then using StringImpl::create, which will
 373     // memcpy the whole buffer.  This is faster than assigning character by
 374     // character during the loop.
 375
 376     // Fast case.
 377     int foundNull = 0;
 378     for (unsigned i = 0; !foundNull && i < length; i++) {
 379         int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
 380         foundNull |= !c;
 381     }
 382     if (!foundNull)
 383         return StringImpl::create(characters, length);
 384
 385     return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
 386 }
 387
 388 }
 389
 390 using WebCore::equal;
 391
 392 namespace WTF {
 393
 394     // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
 395     template<typename T> struct DefaultHash;
 396     template<> struct DefaultHash<WebCore::StringImpl*> {
 397         typedef WebCore::StringHash Hash;
 398     };
 399     template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > {
 400         typedef WebCore::StringHash Hash;
 401     };
 402
 403 }
 404
 405 #endif