2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
26 #include <wtf/ASCIICType.h>
27 #include <wtf/Forward.h>
28 #include <wtf/RefCounted.h>
29 #include <wtf/Vector.h>
30 #include <wtf/unicode/Unicode.h>
32 #if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
33 typedef const struct __CFString
* CFStringRef
;
45 struct CStringTranslator
;
46 struct HashAndCharactersTranslator
;
48 struct UCharBufferTranslator
;
50 enum TextCaseSensitivity
{ TextCaseSensitive
, TextCaseInsensitive
};
52 typedef bool (*CharacterMatchFunctionPtr
)(UChar
);
54 class StringImpl
: public RefCounted
<StringImpl
> {
55 friend class AtomicString
;
56 friend struct CStringTranslator
;
57 friend struct HashAndCharactersTranslator
;
58 friend struct UCharBufferTranslator
;
60 friend class ThreadGlobalData
;
62 StringImpl(const UChar
*, unsigned length
);
63 StringImpl(const char*, unsigned length
);
65 struct AdoptBuffer
{ };
66 StringImpl(UChar
*, unsigned length
, AdoptBuffer
);
68 struct WithTerminatingNullCharacter
{ };
69 StringImpl(const StringImpl
&, WithTerminatingNullCharacter
);
72 StringImpl(const UChar
*, unsigned length
, unsigned hash
);
73 StringImpl(const char*, unsigned length
, unsigned hash
);
78 static PassRefPtr
<StringImpl
> create(const UChar
*, unsigned length
);
79 static PassRefPtr
<StringImpl
> create(const char*, unsigned length
);
80 static PassRefPtr
<StringImpl
> create(const char*);
82 static PassRefPtr
<StringImpl
> createWithTerminatingNullCharacter(const StringImpl
&);
84 static PassRefPtr
<StringImpl
> createStrippingNullCharacters(const UChar
*, unsigned length
);
85 static PassRefPtr
<StringImpl
> adopt(StringBuffer
&);
86 static PassRefPtr
<StringImpl
> adopt(Vector
<UChar
>&);
88 const UChar
* characters() { return m_data
; }
89 unsigned length() { return m_length
; }
91 bool hasTerminatingNullCharacter() { return m_hasTerminatingNullCharacter
; }
93 unsigned hash() { if (m_hash
== 0) m_hash
= computeHash(m_data
, m_length
); return m_hash
; }
94 unsigned existingHash() const { ASSERT(m_hash
); return m_hash
; }
95 static unsigned computeHash(const UChar
*, unsigned len
);
96 static unsigned computeHash(const char*);
98 // Makes a deep copy. Helpful only if you need to use a String on another thread.
99 // Since StringImpl objects are immutable, there's no other reason to make a copy.
100 PassRefPtr
<StringImpl
> copy();
102 // Makes a deep copy like copy() but only for a substring.
103 // (This ensures that you always get something suitable for a thread while subtring
104 // may not. For example, in the empty string case, substring returns empty() which
105 // is not safe for another thread.)
106 PassRefPtr
<StringImpl
> substringCopy(unsigned pos
, unsigned len
= UINT_MAX
);
108 PassRefPtr
<StringImpl
> substring(unsigned pos
, unsigned len
= UINT_MAX
);
110 UChar
operator[](unsigned i
) { ASSERT(i
< m_length
); return m_data
[i
]; }
111 UChar32
characterStartingAt(unsigned);
113 bool containsOnlyWhitespace();
115 int toIntStrict(bool* ok
= 0, int base
= 10);
116 unsigned toUIntStrict(bool* ok
= 0, int base
= 10);
117 int64_t toInt64Strict(bool* ok
= 0, int base
= 10);
118 uint64_t toUInt64Strict(bool* ok
= 0, int base
= 10);
120 int toInt(bool* ok
= 0); // ignores trailing garbage
121 unsigned toUInt(bool* ok
= 0); // ignores trailing garbage
122 int64_t toInt64(bool* ok
= 0); // ignores trailing garbage
123 uint64_t toUInt64(bool* ok
= 0); // ignores trailing garbage
125 double toDouble(bool* ok
= 0);
126 float toFloat(bool* ok
= 0);
129 PassRefPtr
<StringImpl
> lower();
130 PassRefPtr
<StringImpl
> upper();
131 PassRefPtr
<StringImpl
> secure(UChar aChar
, bool last
= true);
132 PassRefPtr
<StringImpl
> capitalize(UChar previousCharacter
);
133 PassRefPtr
<StringImpl
> foldCase();
135 PassRefPtr
<StringImpl
> stripWhiteSpace();
136 PassRefPtr
<StringImpl
> simplifyWhiteSpace();
138 PassRefPtr
<StringImpl
> removeCharacters(CharacterMatchFunctionPtr
);
140 int find(const char*, int index
= 0, bool caseSensitive
= true);
141 int find(UChar
, int index
= 0);
142 int find(CharacterMatchFunctionPtr
, int index
= 0);
143 int find(StringImpl
*, int index
, bool caseSensitive
= true);
145 int reverseFind(UChar
, int index
);
146 int reverseFind(StringImpl
*, int index
, bool caseSensitive
= true);
148 bool startsWith(StringImpl
* m_data
, bool caseSensitive
= true) { return reverseFind(m_data
, 0, caseSensitive
) == 0; }
149 bool endsWith(StringImpl
*, bool caseSensitive
= true);
151 PassRefPtr
<StringImpl
> replace(UChar
, UChar
);
152 PassRefPtr
<StringImpl
> replace(UChar
, StringImpl
*);
153 PassRefPtr
<StringImpl
> replace(StringImpl
*, StringImpl
*);
154 PassRefPtr
<StringImpl
> replace(unsigned index
, unsigned len
, StringImpl
*);
156 static StringImpl
* empty();
158 Vector
<char> ascii();
159 int wordCount(int maxWordsToCount
= INT_MAX
);
161 WTF::Unicode::Direction
defaultWritingDirection();
163 #if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
164 CFStringRef
createCFString();
167 operator NSString
*();
173 mutable unsigned m_hash
;
175 bool m_hasTerminatingNullCharacter
;
178 bool equal(StringImpl
*, StringImpl
*);
179 bool equal(StringImpl
*, const char*);
180 inline bool equal(const char* a
, StringImpl
* b
) { return equal(b
, a
); }
182 bool equalIgnoringCase(StringImpl
*, StringImpl
*);
183 bool equalIgnoringCase(StringImpl
*, const char*);
184 inline bool equalIgnoringCase(const char* a
, StringImpl
* b
) { return equalIgnoringCase(b
, a
); }
186 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
187 // or anything like that.
188 const unsigned phi
= 0x9e3779b9U
;
190 // Paul Hsieh's SuperFastHash
191 // http://www.azillionmonkeys.com/qed/hash.html
192 inline unsigned StringImpl::computeHash(const UChar
* data
, unsigned length
)
197 for (unsigned pairCount
= length
>> 1; pairCount
; pairCount
--) {
199 unsigned tmp
= (data
[1] << 11) ^ hash
;
200 hash
= (hash
<< 16) ^ tmp
;
212 // Force "avalanching" of final 127 bits.
219 // This avoids ever returning a hash code of 0, since that is used to
220 // signal "hash not computed yet", using a value that is likely to be
221 // effectively the same as 0 when the low bits are masked.
227 // Paul Hsieh's SuperFastHash
228 // http://www.azillionmonkeys.com/qed/hash.html
229 inline unsigned StringImpl::computeHash(const char* data
)
231 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
232 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
233 // were 16-bit chunks, which should give matching results
239 unsigned char b0
= data
[0];
242 unsigned char b1
= data
[1];
250 unsigned tmp
= (b1
<< 11) ^ hash
;
251 hash
= (hash
<< 16) ^ tmp
;
256 // Force "avalanching" of final 127 bits.
263 // This avoids ever returning a hash code of 0, since that is used to
264 // signal "hash not computed yet", using a value that is likely to be
265 // effectively the same as 0 when the low bits are masked.
271 static inline bool isSpaceOrNewline(UChar c
)
273 // Use isASCIISpace() for basic Latin-1.
274 // This will include newlines, which aren't included in Unicode DirWS.
275 return c
<= 0x7F ? WTF::isASCIISpace(c
) : WTF::Unicode::direction(c
) == WTF::Unicode::WhiteSpaceNeutral
;
282 // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
283 template<typename T
> struct DefaultHash
;
284 template<> struct DefaultHash
<WebCore::StringImpl
*> {
285 typedef WebCore::StringHash Hash
;
287 template<> struct DefaultHash
<RefPtr
<WebCore::StringImpl
> > {
288 typedef WebCore::StringHash Hash
;