]> git.saurik.com Git - iphone-api.git/blob - WebCore/StringImpl.h
Adding the WebCore headers (for Cydget).
[iphone-api.git] / WebCore / StringImpl.h
1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22 #ifndef StringImpl_h
23 #define StringImpl_h
24
25 #include <limits.h>
26 #include <wtf/ASCIICType.h>
27 #include <wtf/Forward.h>
28 #include <wtf/RefCounted.h>
29 #include <wtf/Vector.h>
30 #include <wtf/unicode/Unicode.h>
31
32 #if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
33 typedef const struct __CFString * CFStringRef;
34 #endif
35
36 #ifdef __OBJC__
37 @class NSString;
38 #endif
39
40 namespace WebCore {
41
42 class AtomicString;
43 class StringBuffer;
44
45 struct CStringTranslator;
46 struct HashAndCharactersTranslator;
47 struct StringHash;
48 struct UCharBufferTranslator;
49
50 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
51
52 typedef bool (*CharacterMatchFunctionPtr)(UChar);
53
54 class StringImpl : public RefCounted<StringImpl> {
55 friend class AtomicString;
56 friend struct CStringTranslator;
57 friend struct HashAndCharactersTranslator;
58 friend struct UCharBufferTranslator;
59 private:
60 friend class ThreadGlobalData;
61 StringImpl();
62 StringImpl(const UChar*, unsigned length);
63 StringImpl(const char*, unsigned length);
64
65 struct AdoptBuffer { };
66 StringImpl(UChar*, unsigned length, AdoptBuffer);
67
68 struct WithTerminatingNullCharacter { };
69 StringImpl(const StringImpl&, WithTerminatingNullCharacter);
70
71 // For AtomicString.
72 StringImpl(const UChar*, unsigned length, unsigned hash);
73 StringImpl(const char*, unsigned length, unsigned hash);
74
75 public:
76 ~StringImpl();
77
78 static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
79 static PassRefPtr<StringImpl> create(const char*, unsigned length);
80 static PassRefPtr<StringImpl> create(const char*);
81
82 static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
83
84 static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
85 static PassRefPtr<StringImpl> adopt(StringBuffer&);
86 static PassRefPtr<StringImpl> adopt(Vector<UChar>&);
87
88 const UChar* characters() { return m_data; }
89 unsigned length() { return m_length; }
90
91 bool hasTerminatingNullCharacter() { return m_hasTerminatingNullCharacter; }
92
93 unsigned hash() { if (m_hash == 0) m_hash = computeHash(m_data, m_length); return m_hash; }
94 unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
95 static unsigned computeHash(const UChar*, unsigned len);
96 static unsigned computeHash(const char*);
97
98 // Makes a deep copy. Helpful only if you need to use a String on another thread.
99 // Since StringImpl objects are immutable, there's no other reason to make a copy.
100 PassRefPtr<StringImpl> copy();
101
102 // Makes a deep copy like copy() but only for a substring.
103 // (This ensures that you always get something suitable for a thread while subtring
104 // may not. For example, in the empty string case, substring returns empty() which
105 // is not safe for another thread.)
106 PassRefPtr<StringImpl> substringCopy(unsigned pos, unsigned len = UINT_MAX);
107
108 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
109
110 UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
111 UChar32 characterStartingAt(unsigned);
112
113 bool containsOnlyWhitespace();
114
115 int toIntStrict(bool* ok = 0, int base = 10);
116 unsigned toUIntStrict(bool* ok = 0, int base = 10);
117 int64_t toInt64Strict(bool* ok = 0, int base = 10);
118 uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
119
120 int toInt(bool* ok = 0); // ignores trailing garbage
121 unsigned toUInt(bool* ok = 0); // ignores trailing garbage
122 int64_t toInt64(bool* ok = 0); // ignores trailing garbage
123 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
124
125 double toDouble(bool* ok = 0);
126 float toFloat(bool* ok = 0);
127
128 bool isLower();
129 PassRefPtr<StringImpl> lower();
130 PassRefPtr<StringImpl> upper();
131 PassRefPtr<StringImpl> secure(UChar aChar, bool last = true);
132 PassRefPtr<StringImpl> capitalize(UChar previousCharacter);
133 PassRefPtr<StringImpl> foldCase();
134
135 PassRefPtr<StringImpl> stripWhiteSpace();
136 PassRefPtr<StringImpl> simplifyWhiteSpace();
137
138 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
139
140 int find(const char*, int index = 0, bool caseSensitive = true);
141 int find(UChar, int index = 0);
142 int find(CharacterMatchFunctionPtr, int index = 0);
143 int find(StringImpl*, int index, bool caseSensitive = true);
144
145 int reverseFind(UChar, int index);
146 int reverseFind(StringImpl*, int index, bool caseSensitive = true);
147
148 bool startsWith(StringImpl* m_data, bool caseSensitive = true) { return reverseFind(m_data, 0, caseSensitive) == 0; }
149 bool endsWith(StringImpl*, bool caseSensitive = true);
150
151 PassRefPtr<StringImpl> replace(UChar, UChar);
152 PassRefPtr<StringImpl> replace(UChar, StringImpl*);
153 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
154 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
155
156 static StringImpl* empty();
157
158 Vector<char> ascii();
159 int wordCount(int maxWordsToCount = INT_MAX);
160
161 WTF::Unicode::Direction defaultWritingDirection();
162
163 #if PLATFORM(CF) || (PLATFORM(QT) && PLATFORM(DARWIN))
164 CFStringRef createCFString();
165 #endif
166 #ifdef __OBJC__
167 operator NSString*();
168 #endif
169
170 private:
171 unsigned m_length;
172 const UChar* m_data;
173 mutable unsigned m_hash;
174 bool m_inTable;
175 bool m_hasTerminatingNullCharacter;
176 };
177
178 bool equal(StringImpl*, StringImpl*);
179 bool equal(StringImpl*, const char*);
180 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
181
182 bool equalIgnoringCase(StringImpl*, StringImpl*);
183 bool equalIgnoringCase(StringImpl*, const char*);
184 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
185
186 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
187 // or anything like that.
188 const unsigned phi = 0x9e3779b9U;
189
190 // Paul Hsieh's SuperFastHash
191 // http://www.azillionmonkeys.com/qed/hash.html
192 inline unsigned StringImpl::computeHash(const UChar* data, unsigned length)
193 {
194 unsigned hash = phi;
195
196 // Main loop.
197 for (unsigned pairCount = length >> 1; pairCount; pairCount--) {
198 hash += data[0];
199 unsigned tmp = (data[1] << 11) ^ hash;
200 hash = (hash << 16) ^ tmp;
201 data += 2;
202 hash += hash >> 11;
203 }
204
205 // Handle end case.
206 if (length & 1) {
207 hash += data[0];
208 hash ^= hash << 11;
209 hash += hash >> 17;
210 }
211
212 // Force "avalanching" of final 127 bits.
213 hash ^= hash << 3;
214 hash += hash >> 5;
215 hash ^= hash << 2;
216 hash += hash >> 15;
217 hash ^= hash << 10;
218
219 // This avoids ever returning a hash code of 0, since that is used to
220 // signal "hash not computed yet", using a value that is likely to be
221 // effectively the same as 0 when the low bits are masked.
222 hash |= !hash << 31;
223
224 return hash;
225 }
226
227 // Paul Hsieh's SuperFastHash
228 // http://www.azillionmonkeys.com/qed/hash.html
229 inline unsigned StringImpl::computeHash(const char* data)
230 {
231 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
232 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
233 // were 16-bit chunks, which should give matching results
234
235 unsigned hash = phi;
236
237 // Main loop
238 for (;;) {
239 unsigned char b0 = data[0];
240 if (!b0)
241 break;
242 unsigned char b1 = data[1];
243 if (!b1) {
244 hash += b0;
245 hash ^= hash << 11;
246 hash += hash >> 17;
247 break;
248 }
249 hash += b0;
250 unsigned tmp = (b1 << 11) ^ hash;
251 hash = (hash << 16) ^ tmp;
252 data += 2;
253 hash += hash >> 11;
254 }
255
256 // Force "avalanching" of final 127 bits.
257 hash ^= hash << 3;
258 hash += hash >> 5;
259 hash ^= hash << 2;
260 hash += hash >> 15;
261 hash ^= hash << 10;
262
263 // This avoids ever returning a hash code of 0, since that is used to
264 // signal "hash not computed yet", using a value that is likely to be
265 // effectively the same as 0 when the low bits are masked.
266 hash |= !hash << 31;
267
268 return hash;
269 }
270
271 static inline bool isSpaceOrNewline(UChar c)
272 {
273 // Use isASCIISpace() for basic Latin-1.
274 // This will include newlines, which aren't included in Unicode DirWS.
275 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
276 }
277
278 }
279
280 namespace WTF {
281
282 // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
283 template<typename T> struct DefaultHash;
284 template<> struct DefaultHash<WebCore::StringImpl*> {
285 typedef WebCore::StringHash Hash;
286 };
287 template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > {
288 typedef WebCore::StringHash Hash;
289 };
290
291 }
292
293 #endif