WebCore/StringImpl.h

   1 /*
   2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
   3  * Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Library General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Library General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Library General Public License
  16  * along with this library; see the file COPYING.LIB.  If not, write to
  17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  18  * Boston, MA 02110-1301, USA.
  19  *
  20  */
  21
  22 #ifndef StringImpl_h
  23 #define StringImpl_h
  24
  25 #include <limits.h>
  26 #include <wtf/ASCIICType.h>
  27 #include <wtf/Forward.h>
  28 #include <wtf/RefCounted.h>
  29 #include <wtf/Vector.h>
  30 #include <wtf/unicode/Unicode.h>
  31
  32 #if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
  33 typedef const struct __CFString * CFStringRef;
  34 #endif
  35
  36 #ifdef __OBJC__
  37 @class NSString;
  38 #endif
  39
  40 namespace WebCore {
  41
  42 class AtomicString;
  43 class StringBuffer;
  44
  45 struct CStringTranslator;
  46 struct HashAndCharactersTranslator;
  47 struct StringHash;
  48 struct UCharBufferTranslator;
  49
  50 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
  51
  52 typedef bool (*CharacterMatchFunctionPtr)(UChar);
  53
  54 class StringImpl : public RefCounted<StringImpl> {
  55     friend class AtomicString;
  56     friend struct CStringTranslator;
  57     friend struct HashAndCharactersTranslator;
  58     friend struct UCharBufferTranslator;
  59 private:
  60     friend class ThreadGlobalData;
  61     StringImpl();
  62     StringImpl(const UChar*, unsigned length);
  63     StringImpl(const char*, unsigned length);
  64
  65     struct AdoptBuffer { };
  66     StringImpl(UChar*, unsigned length, AdoptBuffer);
  67
  68     struct WithTerminatingNullCharacter { };
  69     StringImpl(const StringImpl&, WithTerminatingNullCharacter);
  70
  71     // For AtomicString.
  72     StringImpl(const UChar*, unsigned length, unsigned hash);
  73     StringImpl(const char*, unsigned length, unsigned hash);
  74
  75 public:
  76     ~StringImpl();
  77
  78     static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
  79     static PassRefPtr<StringImpl> create(const char*, unsigned length);
  80     static PassRefPtr<StringImpl> create(const char*);
  81
  82     static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
  83
  84     static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
  85     static PassRefPtr<StringImpl> adopt(StringBuffer&);
  86     static PassRefPtr<StringImpl> adopt(Vector<UChar>&);
  87
  88     const UChar* characters() { return m_data; }
  89     unsigned length() { return m_length; }
  90
  91     bool hasTerminatingNullCharacter() { return m_hasTerminatingNullCharacter; }
  92
  93     unsigned hash() { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; }
  94     unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
  95     static unsigned computeHash(const UChar*, unsigned len);
  96     static unsigned computeHash(const char*);
  97
  98     // Makes a deep copy. Helpful only if you need to use a String on another thread.
  99     // Since StringImpl objects are immutable, there's no other reason to make a copy.
 100     PassRefPtr<StringImpl> copy();
 101
 102     // Makes a deep copy like copy() but only for a substring.
 103     // (This ensures that you always get something suitable for a thread while subtring
 104     // may not.  For example, in the empty string case, substring returns empty() which
 105     // is not safe for another thread.)
 106     PassRefPtr<StringImpl> substringCopy(unsigned pos, unsigned len  = UINT_MAX);
 107
 108     PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
 109
 110     UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
 111     UChar32 characterStartingAt(unsigned);
 112
 113     bool containsOnlyWhitespace();
 114
 115     int toIntStrict(bool* ok = 0, int base = 10);
 116     unsigned toUIntStrict(bool* ok = 0, int base = 10);
 117     int64_t toInt64Strict(bool* ok = 0, int base = 10);
 118     uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
 119
 120     int toInt(bool* ok = 0); // ignores trailing garbage
 121     unsigned toUInt(bool* ok = 0); // ignores trailing garbage
 122     int64_t toInt64(bool* ok = 0); // ignores trailing garbage
 123     uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
 124
 125     double toDouble(bool* ok = 0);
 126     float toFloat(bool* ok = 0);
 127
 128     bool isLower();
 129     PassRefPtr<StringImpl> lower();
 130     PassRefPtr<StringImpl> upper();
 131     PassRefPtr<StringImpl> secure(UChar aChar, bool last = true);
 132     PassRefPtr<StringImpl> capitalize(UChar previousCharacter);
 133     PassRefPtr<StringImpl> foldCase();
 134
 135     PassRefPtr<StringImpl> stripWhiteSpace();
 136     PassRefPtr<StringImpl> simplifyWhiteSpace();
 137
 138     PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
 139
 140     int find(const char*, int index = 0, bool caseSensitive = true);
 141     int find(UChar, int index = 0);
 142     int find(CharacterMatchFunctionPtr, int index = 0);
 143     int find(StringImpl*, int index, bool caseSensitive = true);
 144
 145     int reverseFind(UChar, int index);
 146     int reverseFind(StringImpl*, int index, bool caseSensitive = true);
 147
 148     bool startsWith(StringImpl* m_data, bool caseSensitive = true) { return reverseFind(m_data, 0, caseSensitive) == 0; }
 149     bool endsWith(StringImpl*, bool caseSensitive = true);
 150
 151     PassRefPtr<StringImpl> replace(UChar, UChar);
 152     PassRefPtr<StringImpl> replace(UChar, StringImpl*);
 153     PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
 154     PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
 155
 156     static StringImpl* empty();
 157
 158     Vector<char> ascii();
 159     int wordCount(int maxWordsToCount = INT_MAX);
 160
 161     WTF::Unicode::Direction defaultWritingDirection();
 162
 163 #if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
 164     CFStringRef createCFString();
 165 #endif
 166 #ifdef __OBJC__
 167     operator NSString*();
 168 #endif
 169
 170 private:
 171     unsigned m_length;
 172     const UChar* m_data;
 173     mutable unsigned m_hash;
 174     bool m_inTable;
 175     bool m_hasTerminatingNullCharacter;
 176 };
 177
 178 bool equal(StringImpl*, StringImpl*);
 179 bool equal(StringImpl*, const char*);
 180 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
 181
 182 bool equalIgnoringCase(StringImpl*, StringImpl*);
 183 bool equalIgnoringCase(StringImpl*, const char*);
 184 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
 185
 186 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
 187 // or anything like that.
 188 const unsigned phi = 0x9e3779b9U;
 189
 190 // Paul Hsieh's SuperFastHash
 191 // http://www.azillionmonkeys.com/qed/hash.html
 192 inline unsigned StringImpl::computeHash(const UChar* data, unsigned length)
 193 {
 194     unsigned hash = phi;
 195
 196     // Main loop.
 197     for (unsigned pairCount = length >> 1; pairCount; pairCount--) {
 198         hash += data[0];
 199         unsigned tmp = (data[1] << 11) ^ hash;
 200         hash = (hash << 16) ^ tmp;
 201         data += 2;
 202         hash += hash >> 11;
 203     }
 204
 205     // Handle end case.
 206     if (length & 1) {
 207         hash += data[0];
 208         hash ^= hash << 11;
 209         hash += hash >> 17;
 210     }
 211
 212     // Force "avalanching" of final 127 bits.
 213     hash ^= hash << 3;
 214     hash += hash >> 5;
 215     hash ^= hash << 2;
 216     hash += hash >> 15;
 217     hash ^= hash << 10;
 218
 219     // This avoids ever returning a hash code of 0, since that is used to
 220     // signal "hash not computed yet", using a value that is likely to be
 221     // effectively the same as 0 when the low bits are masked.
 222     hash |= !hash << 31;
 223
 224     return hash;
 225 }
 226
 227 // Paul Hsieh's SuperFastHash
 228 // http://www.azillionmonkeys.com/qed/hash.html
 229 inline unsigned StringImpl::computeHash(const char* data)
 230 {
 231     // This hash is designed to work on 16-bit chunks at a time. But since the normal case
 232     // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
 233     // were 16-bit chunks, which should give matching results
 234
 235     unsigned hash = phi;
 236
 237     // Main loop
 238     for (;;) {
 239         unsigned char b0 = data[0];
 240         if (!b0)
 241             break;
 242         unsigned char b1 = data[1];
 243         if (!b1) {
 244             hash += b0;
 245             hash ^= hash << 11;
 246             hash += hash >> 17;
 247             break;
 248         }
 249         hash += b0;
 250         unsigned tmp = (b1 << 11) ^ hash;
 251         hash = (hash << 16) ^ tmp;
 252         data += 2;
 253         hash += hash >> 11;
 254     }
 255
 256     // Force "avalanching" of final 127 bits.
 257     hash ^= hash << 3;
 258     hash += hash >> 5;
 259     hash ^= hash << 2;
 260     hash += hash >> 15;
 261     hash ^= hash << 10;
 262
 263     // This avoids ever returning a hash code of 0, since that is used to
 264     // signal "hash not computed yet", using a value that is likely to be
 265     // effectively the same as 0 when the low bits are masked.
 266     hash |= !hash << 31;
 267
 268     return hash;
 269 }
 270
 271 static inline bool isSpaceOrNewline(UChar c)
 272 {
 273     // Use isASCIISpace() for basic Latin-1.
 274     // This will include newlines, which aren't included in Unicode DirWS.
 275     return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
 276 }
 277
 278 }
 279
 280 namespace WTF {
 281
 282     // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
 283     template<typename T> struct DefaultHash;
 284     template<> struct DefaultHash<WebCore::StringImpl*> {
 285         typedef WebCore::StringHash Hash;
 286     };
 287     template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > {
 288         typedef WebCore::StringHash Hash;
 289     };
 290
 291 }
 292
 293 #endif