]> git.saurik.com Git - apple/javascriptcore.git/blob - wtf/text/StringImpl.h
4354cabaf2ba3736a188e2110fa7ce74e6ff69ae
[apple/javascriptcore.git] / wtf / text / StringImpl.h
1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #ifndef StringImpl_h
24 #define StringImpl_h
25
26 #include <limits.h>
27 #include <wtf/ASCIICType.h>
28 #include <wtf/CrossThreadRefCounted.h>
29 #include <wtf/OwnFastMallocPtr.h>
30 #include <wtf/StdLibExtras.h>
31 #include <wtf/StringHashFunctions.h>
32 #include <wtf/Vector.h>
33 #include <wtf/text/StringImplBase.h>
34 #include <wtf/unicode/Unicode.h>
35
36 #if PLATFORM(CF)
37 typedef const struct __CFString * CFStringRef;
38 #endif
39
40 #ifdef __OBJC__
41 @class NSString;
42 #endif
43
44 // FIXME: This is a temporary layering violation while we move string code to WTF.
45 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
46 namespace JSC {
47
48 struct IdentifierCStringTranslator;
49 struct IdentifierUCharBufferTranslator;
50
51 }
52
53 // FIXME: This is a temporary layering violation while we move string code to WTF.
54 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
55 namespace WebCore {
56
57 class StringBuffer;
58
59 struct CStringTranslator;
60 struct HashAndCharactersTranslator;
61 struct StringHash;
62 struct UCharBufferTranslator;
63
64 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
65
66 typedef OwnFastMallocPtr<const UChar> SharableUChar;
67 typedef CrossThreadRefCounted<SharableUChar> SharedUChar;
68 typedef bool (*CharacterMatchFunctionPtr)(UChar);
69
70 class StringImpl : public StringImplBase {
71 friend struct JSC::IdentifierCStringTranslator;
72 friend struct JSC::IdentifierUCharBufferTranslator;
73 friend struct CStringTranslator;
74 friend struct HashAndCharactersTranslator;
75 friend struct UCharBufferTranslator;
76 friend class AtomicStringImpl;
77 private:
78 // Used to construct static strings, which have an special refCount that can never hit zero.
79 // This means that the static string will never be destroyed, which is important because
80 // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
81 StringImpl(const UChar* characters, unsigned length, StaticStringConstructType)
82 : StringImplBase(length, ConstructStaticString)
83 , m_data(characters)
84 , m_buffer(0)
85 , m_hash(0)
86 {
87 // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
88 // with impunity. The empty string is special because it is never entered into
89 // AtomicString's HashKey, but still needs to compare correctly.
90 hash();
91 }
92
93 // Create a normal string with internal storage (BufferInternal)
94 StringImpl(unsigned length)
95 : StringImplBase(length, BufferInternal)
96 , m_data(reinterpret_cast<const UChar*>(this + 1))
97 , m_buffer(0)
98 , m_hash(0)
99 {
100 ASSERT(m_data);
101 ASSERT(m_length);
102 }
103
104 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
105 StringImpl(const UChar* characters, unsigned length)
106 : StringImplBase(length, BufferOwned)
107 , m_data(characters)
108 , m_buffer(0)
109 , m_hash(0)
110 {
111 ASSERT(m_data);
112 ASSERT(m_length);
113 }
114
115 // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
116 StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
117 : StringImplBase(length, BufferSubstring)
118 , m_data(characters)
119 , m_substringBuffer(base.releaseRef())
120 , m_hash(0)
121 {
122 ASSERT(m_data);
123 ASSERT(m_length);
124 ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
125 }
126
127 // Used to construct new strings sharing an existing SharedUChar (BufferShared)
128 StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
129 : StringImplBase(length, BufferShared)
130 , m_data(characters)
131 , m_sharedBuffer(sharedBuffer.releaseRef())
132 , m_hash(0)
133 {
134 ASSERT(m_data);
135 ASSERT(m_length);
136 }
137
138 // For use only by AtomicString's XXXTranslator helpers.
139 void setHash(unsigned hash)
140 {
141 ASSERT(!isStatic());
142 ASSERT(!m_hash);
143 ASSERT(hash == computeHash(m_data, m_length));
144 m_hash = hash;
145 }
146
147 public:
148 ~StringImpl();
149
150 static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
151 static PassRefPtr<StringImpl> create(const char*, unsigned length);
152 static PassRefPtr<StringImpl> create(const char*);
153 static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer);
154 static PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
155 {
156 ASSERT(rep);
157 ASSERT(length <= rep->length());
158
159 if (!length)
160 return empty();
161
162 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
163 return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep));
164 }
165
166 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
167 static PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
168 {
169 if (!length) {
170 output = 0;
171 return empty();
172 }
173
174 if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) {
175 output = 0;
176 return 0;
177 }
178 StringImpl* resultImpl;
179 if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) {
180 output = 0;
181 return 0;
182 }
183 output = reinterpret_cast<UChar*>(resultImpl + 1);
184 return adoptRef(new(resultImpl) StringImpl(length));
185 }
186
187 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); }
188 static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
189 static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
190
191 template<size_t inlineCapacity>
192 static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
193 {
194 if (size_t size = vector.size()) {
195 ASSERT(vector.data());
196 if (size > std::numeric_limits<unsigned>::max())
197 CRASH();
198 return adoptRef(new StringImpl(vector.releaseBuffer(), size));
199 }
200 return empty();
201 }
202 static PassRefPtr<StringImpl> adopt(StringBuffer&);
203
204 SharedUChar* sharedBuffer();
205 const UChar* characters() const { return m_data; }
206
207 size_t cost()
208 {
209 // For substrings, return the cost of the base string.
210 if (bufferOwnership() == BufferSubstring)
211 return m_substringBuffer->cost();
212
213 if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) {
214 m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost;
215 return m_length;
216 }
217 return 0;
218 }
219
220 bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; }
221 void setIsIdentifier(bool isIdentifier)
222 {
223 ASSERT(!isStatic());
224 if (isIdentifier)
225 m_refCountAndFlags |= s_refCountFlagIsIdentifier;
226 else
227 m_refCountAndFlags &= ~s_refCountFlagIsIdentifier;
228 }
229
230 bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; }
231
232 bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; }
233 void setIsAtomic(bool isIdentifier)
234 {
235 ASSERT(!isStatic());
236 if (isIdentifier)
237 m_refCountAndFlags |= s_refCountFlagIsAtomic;
238 else
239 m_refCountAndFlags &= ~s_refCountFlagIsAtomic;
240 }
241
242 unsigned hash() const { if (!m_hash) m_hash = computeHash(m_data, m_length); return m_hash; }
243 unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
244 static unsigned computeHash(const UChar* data, unsigned length) { return WTF::stringHash(data, length); }
245 static unsigned computeHash(const char* data, unsigned length) { return WTF::stringHash(data, length); }
246 static unsigned computeHash(const char* data) { return WTF::stringHash(data); }
247
248 ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
249 ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; }
250
251 static StringImpl* empty();
252
253 static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
254 {
255 if (numCharacters <= s_copyCharsInlineCutOff) {
256 for (unsigned i = 0; i < numCharacters; ++i)
257 destination[i] = source[i];
258 } else
259 memcpy(destination, source, numCharacters * sizeof(UChar));
260 }
261
262 // Returns a StringImpl suitable for use on another thread.
263 PassRefPtr<StringImpl> crossThreadString();
264 // Makes a deep copy. Helpful only if you need to use a String on another thread
265 // (use crossThreadString if the method call doesn't need to be threadsafe).
266 // Since StringImpl objects are immutable, there's no other reason to make a copy.
267 PassRefPtr<StringImpl> threadsafeCopy() const;
268
269 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
270
271 UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
272 UChar32 characterStartingAt(unsigned);
273
274 bool containsOnlyWhitespace();
275
276 int toIntStrict(bool* ok = 0, int base = 10);
277 unsigned toUIntStrict(bool* ok = 0, int base = 10);
278 int64_t toInt64Strict(bool* ok = 0, int base = 10);
279 uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
280 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
281
282 int toInt(bool* ok = 0); // ignores trailing garbage
283 unsigned toUInt(bool* ok = 0); // ignores trailing garbage
284 int64_t toInt64(bool* ok = 0); // ignores trailing garbage
285 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
286 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
287
288 double toDouble(bool* ok = 0);
289 float toFloat(bool* ok = 0);
290
291 PassRefPtr<StringImpl> lower();
292 PassRefPtr<StringImpl> upper();
293 PassRefPtr<StringImpl> secure(UChar, bool hideLastCharacter = true);
294 PassRefPtr<StringImpl> foldCase();
295
296 PassRefPtr<StringImpl> stripWhiteSpace();
297 PassRefPtr<StringImpl> simplifyWhiteSpace();
298
299 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
300
301 int find(const char*, int index = 0, bool caseSensitive = true);
302 int find(UChar, int index = 0);
303 int find(CharacterMatchFunctionPtr, int index = 0);
304 int find(StringImpl*, int index, bool caseSensitive = true);
305
306 int reverseFind(UChar, int index);
307 int reverseFind(StringImpl*, int index, bool caseSensitive = true);
308
309 bool startsWith(StringImpl* str, bool caseSensitive = true) { return reverseFind(str, 0, caseSensitive) == 0; }
310 bool endsWith(StringImpl*, bool caseSensitive = true);
311
312 PassRefPtr<StringImpl> replace(UChar, UChar);
313 PassRefPtr<StringImpl> replace(UChar, StringImpl*);
314 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
315 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
316
317 Vector<char> ascii();
318 int wordCount(int maxWordsToCount = INT_MAX);
319
320 WTF::Unicode::Direction defaultWritingDirection();
321
322 #if PLATFORM(CF)
323 CFStringRef createCFString();
324 #endif
325 #ifdef __OBJC__
326 operator NSString*();
327 #endif
328
329 private:
330 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
331 static const unsigned s_copyCharsInlineCutOff = 20;
332
333 static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
334
335 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); }
336 bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; }
337 const UChar* m_data;
338 union {
339 void* m_buffer;
340 StringImpl* m_substringBuffer;
341 SharedUChar* m_sharedBuffer;
342 };
343 mutable unsigned m_hash;
344 };
345
346 bool equal(const StringImpl*, const StringImpl*);
347 bool equal(const StringImpl*, const char*);
348 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
349
350 bool equalIgnoringCase(StringImpl*, StringImpl*);
351 bool equalIgnoringCase(StringImpl*, const char*);
352 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
353 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
354 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
355
356 bool equalIgnoringNullity(StringImpl*, StringImpl*);
357
358 static inline bool isSpaceOrNewline(UChar c)
359 {
360 // Use isASCIISpace() for basic Latin-1.
361 // This will include newlines, which aren't included in Unicode DirWS.
362 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
363 }
364
365 // This is a hot function because it's used when parsing HTML.
366 inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
367 {
368 ASSERT(characters);
369 ASSERT(length);
370
371 // Optimize for the case where there are no Null characters by quickly
372 // searching for nulls, and then using StringImpl::create, which will
373 // memcpy the whole buffer. This is faster than assigning character by
374 // character during the loop.
375
376 // Fast case.
377 int foundNull = 0;
378 for (unsigned i = 0; !foundNull && i < length; i++) {
379 int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
380 foundNull |= !c;
381 }
382 if (!foundNull)
383 return StringImpl::create(characters, length);
384
385 return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
386 }
387
388 }
389
390 using WebCore::equal;
391
392 namespace WTF {
393
394 // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
395 template<typename T> struct DefaultHash;
396 template<> struct DefaultHash<WebCore::StringImpl*> {
397 typedef WebCore::StringHash Hash;
398 };
399 template<> struct DefaultHash<RefPtr<WebCore::StringImpl> > {
400 typedef WebCore::StringHash Hash;
401 };
402
403 }
404
405 #endif