2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
27 #include <wtf/ASCIICType.h>
28 #include <wtf/CrossThreadRefCounted.h>
29 #include <wtf/OwnFastMallocPtr.h>
30 #include <wtf/StdLibExtras.h>
31 #include <wtf/StringHashFunctions.h>
32 #include <wtf/Vector.h>
33 #include <wtf/text/StringImplBase.h>
34 #include <wtf/unicode/Unicode.h>
37 typedef const struct __CFString
* CFStringRef
;
44 // FIXME: This is a temporary layering violation while we move string code to WTF.
45 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
48 struct IdentifierCStringTranslator
;
49 struct IdentifierUCharBufferTranslator
;
53 // FIXME: This is a temporary layering violation while we move string code to WTF.
54 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
59 struct CStringTranslator
;
60 struct HashAndCharactersTranslator
;
62 struct UCharBufferTranslator
;
64 enum TextCaseSensitivity
{ TextCaseSensitive
, TextCaseInsensitive
};
66 typedef OwnFastMallocPtr
<const UChar
> SharableUChar
;
67 typedef CrossThreadRefCounted
<SharableUChar
> SharedUChar
;
68 typedef bool (*CharacterMatchFunctionPtr
)(UChar
);
70 class StringImpl
: public StringImplBase
{
71 friend struct JSC::IdentifierCStringTranslator
;
72 friend struct JSC::IdentifierUCharBufferTranslator
;
73 friend struct CStringTranslator
;
74 friend struct HashAndCharactersTranslator
;
75 friend struct UCharBufferTranslator
;
76 friend class AtomicStringImpl
;
78 // Used to construct static strings, which have an special refCount that can never hit zero.
79 // This means that the static string will never be destroyed, which is important because
80 // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
81 StringImpl(const UChar
* characters
, unsigned length
, StaticStringConstructType
)
82 : StringImplBase(length
, ConstructStaticString
)
87 // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
88 // with impunity. The empty string is special because it is never entered into
89 // AtomicString's HashKey, but still needs to compare correctly.
93 // Create a normal string with internal storage (BufferInternal)
94 StringImpl(unsigned length
)
95 : StringImplBase(length
, BufferInternal
)
96 , m_data(reinterpret_cast<const UChar
*>(this + 1))
104 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
105 StringImpl(const UChar
* characters
, unsigned length
)
106 : StringImplBase(length
, BufferOwned
)
115 // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
116 StringImpl(const UChar
* characters
, unsigned length
, PassRefPtr
<StringImpl
> base
)
117 : StringImplBase(length
, BufferSubstring
)
119 , m_substringBuffer(base
.releaseRef())
124 ASSERT(m_substringBuffer
->bufferOwnership() != BufferSubstring
);
127 // Used to construct new strings sharing an existing SharedUChar (BufferShared)
128 StringImpl(const UChar
* characters
, unsigned length
, PassRefPtr
<SharedUChar
> sharedBuffer
)
129 : StringImplBase(length
, BufferShared
)
131 , m_sharedBuffer(sharedBuffer
.releaseRef())
138 // For use only by AtomicString's XXXTranslator helpers.
139 void setHash(unsigned hash
)
143 ASSERT(hash
== computeHash(m_data
, m_length
));
150 static PassRefPtr
<StringImpl
> create(const UChar
*, unsigned length
);
151 static PassRefPtr
<StringImpl
> create(const char*, unsigned length
);
152 static PassRefPtr
<StringImpl
> create(const char*);
153 static PassRefPtr
<StringImpl
> create(const UChar
*, unsigned length
, PassRefPtr
<SharedUChar
> sharedBuffer
);
154 static PassRefPtr
<StringImpl
> create(PassRefPtr
<StringImpl
> rep
, unsigned offset
, unsigned length
)
157 ASSERT(length
<= rep
->length());
162 StringImpl
* ownerRep
= (rep
->bufferOwnership() == BufferSubstring
) ? rep
->m_substringBuffer
: rep
.get();
163 return adoptRef(new StringImpl(rep
->m_data
+ offset
, length
, ownerRep
));
166 static PassRefPtr
<StringImpl
> createUninitialized(unsigned length
, UChar
*& data
);
167 static PassRefPtr
<StringImpl
> tryCreateUninitialized(unsigned length
, UChar
*& output
)
174 if (length
> ((std::numeric_limits
<unsigned>::max() - sizeof(StringImpl
)) / sizeof(UChar
))) {
178 StringImpl
* resultImpl
;
179 if (!tryFastMalloc(sizeof(UChar
) * length
+ sizeof(StringImpl
)).getValue(resultImpl
)) {
183 output
= reinterpret_cast<UChar
*>(resultImpl
+ 1);
184 return adoptRef(new(resultImpl
) StringImpl(length
));
187 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl
, m_data
); }
188 static PassRefPtr
<StringImpl
> createWithTerminatingNullCharacter(const StringImpl
&);
189 static PassRefPtr
<StringImpl
> createStrippingNullCharacters(const UChar
*, unsigned length
);
191 template<size_t inlineCapacity
>
192 static PassRefPtr
<StringImpl
> adopt(Vector
<UChar
, inlineCapacity
>& vector
)
194 if (size_t size
= vector
.size()) {
195 ASSERT(vector
.data());
196 if (size
> std::numeric_limits
<unsigned>::max())
198 return adoptRef(new StringImpl(vector
.releaseBuffer(), size
));
202 static PassRefPtr
<StringImpl
> adopt(StringBuffer
&);
204 SharedUChar
* sharedBuffer();
205 const UChar
* characters() const { return m_data
; }
209 // For substrings, return the cost of the base string.
210 if (bufferOwnership() == BufferSubstring
)
211 return m_substringBuffer
->cost();
213 if (m_refCountAndFlags
& s_refCountFlagShouldReportedCost
) {
214 m_refCountAndFlags
&= ~s_refCountFlagShouldReportedCost
;
220 bool isIdentifier() const { return m_refCountAndFlags
& s_refCountFlagIsIdentifier
; }
221 void setIsIdentifier(bool isIdentifier
)
225 m_refCountAndFlags
|= s_refCountFlagIsIdentifier
;
227 m_refCountAndFlags
&= ~s_refCountFlagIsIdentifier
;
230 bool hasTerminatingNullCharacter() const { return m_refCountAndFlags
& s_refCountFlagHasTerminatingNullCharacter
; }
232 bool isAtomic() const { return m_refCountAndFlags
& s_refCountFlagIsAtomic
; }
233 void setIsAtomic(bool isIdentifier
)
237 m_refCountAndFlags
|= s_refCountFlagIsAtomic
;
239 m_refCountAndFlags
&= ~s_refCountFlagIsAtomic
;
242 unsigned hash() const { if (!m_hash
) m_hash
= computeHash(m_data
, m_length
); return m_hash
; }
243 unsigned existingHash() const { ASSERT(m_hash
); return m_hash
; }
244 static unsigned computeHash(const UChar
* data
, unsigned length
) { return WTF::stringHash(data
, length
); }
245 static unsigned computeHash(const char* data
, unsigned length
) { return WTF::stringHash(data
, length
); }
246 static unsigned computeHash(const char* data
) { return WTF::stringHash(data
); }
248 ALWAYS_INLINE
void deref() { m_refCountAndFlags
-= s_refCountIncrement
; if (!(m_refCountAndFlags
& (s_refCountMask
| s_refCountFlagStatic
))) delete this; }
249 ALWAYS_INLINE
bool hasOneRef() const { return (m_refCountAndFlags
& (s_refCountMask
| s_refCountFlagStatic
)) == s_refCountIncrement
; }
251 static StringImpl
* empty();
253 static void copyChars(UChar
* destination
, const UChar
* source
, unsigned numCharacters
)
255 if (numCharacters
<= s_copyCharsInlineCutOff
) {
256 for (unsigned i
= 0; i
< numCharacters
; ++i
)
257 destination
[i
] = source
[i
];
259 memcpy(destination
, source
, numCharacters
* sizeof(UChar
));
262 // Returns a StringImpl suitable for use on another thread.
263 PassRefPtr
<StringImpl
> crossThreadString();
264 // Makes a deep copy. Helpful only if you need to use a String on another thread
265 // (use crossThreadString if the method call doesn't need to be threadsafe).
266 // Since StringImpl objects are immutable, there's no other reason to make a copy.
267 PassRefPtr
<StringImpl
> threadsafeCopy() const;
269 PassRefPtr
<StringImpl
> substring(unsigned pos
, unsigned len
= UINT_MAX
);
271 UChar
operator[](unsigned i
) { ASSERT(i
< m_length
); return m_data
[i
]; }
272 UChar32
characterStartingAt(unsigned);
274 bool containsOnlyWhitespace();
276 int toIntStrict(bool* ok
= 0, int base
= 10);
277 unsigned toUIntStrict(bool* ok
= 0, int base
= 10);
278 int64_t toInt64Strict(bool* ok
= 0, int base
= 10);
279 uint64_t toUInt64Strict(bool* ok
= 0, int base
= 10);
280 intptr_t toIntPtrStrict(bool* ok
= 0, int base
= 10);
282 int toInt(bool* ok
= 0); // ignores trailing garbage
283 unsigned toUInt(bool* ok
= 0); // ignores trailing garbage
284 int64_t toInt64(bool* ok
= 0); // ignores trailing garbage
285 uint64_t toUInt64(bool* ok
= 0); // ignores trailing garbage
286 intptr_t toIntPtr(bool* ok
= 0); // ignores trailing garbage
288 double toDouble(bool* ok
= 0);
289 float toFloat(bool* ok
= 0);
291 PassRefPtr
<StringImpl
> lower();
292 PassRefPtr
<StringImpl
> upper();
293 PassRefPtr
<StringImpl
> secure(UChar
, bool hideLastCharacter
= true);
294 PassRefPtr
<StringImpl
> foldCase();
296 PassRefPtr
<StringImpl
> stripWhiteSpace();
297 PassRefPtr
<StringImpl
> simplifyWhiteSpace();
299 PassRefPtr
<StringImpl
> removeCharacters(CharacterMatchFunctionPtr
);
301 int find(const char*, int index
= 0, bool caseSensitive
= true);
302 int find(UChar
, int index
= 0);
303 int find(CharacterMatchFunctionPtr
, int index
= 0);
304 int find(StringImpl
*, int index
, bool caseSensitive
= true);
306 int reverseFind(UChar
, int index
);
307 int reverseFind(StringImpl
*, int index
, bool caseSensitive
= true);
309 bool startsWith(StringImpl
* str
, bool caseSensitive
= true) { return reverseFind(str
, 0, caseSensitive
) == 0; }
310 bool endsWith(StringImpl
*, bool caseSensitive
= true);
312 PassRefPtr
<StringImpl
> replace(UChar
, UChar
);
313 PassRefPtr
<StringImpl
> replace(UChar
, StringImpl
*);
314 PassRefPtr
<StringImpl
> replace(StringImpl
*, StringImpl
*);
315 PassRefPtr
<StringImpl
> replace(unsigned index
, unsigned len
, StringImpl
*);
317 Vector
<char> ascii();
318 int wordCount(int maxWordsToCount
= INT_MAX
);
320 WTF::Unicode::Direction
defaultWritingDirection();
323 CFStringRef
createCFString();
326 operator NSString
*();
330 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
331 static const unsigned s_copyCharsInlineCutOff
= 20;
333 static PassRefPtr
<StringImpl
> createStrippingNullCharactersSlowCase(const UChar
*, unsigned length
);
335 BufferOwnership
bufferOwnership() const { return static_cast<BufferOwnership
>(m_refCountAndFlags
& s_refCountMaskBufferOwnership
); }
336 bool isStatic() const { return m_refCountAndFlags
& s_refCountFlagStatic
; }
340 StringImpl
* m_substringBuffer
;
341 SharedUChar
* m_sharedBuffer
;
343 mutable unsigned m_hash
;
346 bool equal(const StringImpl
*, const StringImpl
*);
347 bool equal(const StringImpl
*, const char*);
348 inline bool equal(const char* a
, StringImpl
* b
) { return equal(b
, a
); }
350 bool equalIgnoringCase(StringImpl
*, StringImpl
*);
351 bool equalIgnoringCase(StringImpl
*, const char*);
352 inline bool equalIgnoringCase(const char* a
, StringImpl
* b
) { return equalIgnoringCase(b
, a
); }
353 bool equalIgnoringCase(const UChar
* a
, const char* b
, unsigned length
);
354 inline bool equalIgnoringCase(const char* a
, const UChar
* b
, unsigned length
) { return equalIgnoringCase(b
, a
, length
); }
356 bool equalIgnoringNullity(StringImpl
*, StringImpl
*);
358 static inline bool isSpaceOrNewline(UChar c
)
360 // Use isASCIISpace() for basic Latin-1.
361 // This will include newlines, which aren't included in Unicode DirWS.
362 return c
<= 0x7F ? WTF::isASCIISpace(c
) : WTF::Unicode::direction(c
) == WTF::Unicode::WhiteSpaceNeutral
;
365 // This is a hot function because it's used when parsing HTML.
366 inline PassRefPtr
<StringImpl
> StringImpl::createStrippingNullCharacters(const UChar
* characters
, unsigned length
)
371 // Optimize for the case where there are no Null characters by quickly
372 // searching for nulls, and then using StringImpl::create, which will
373 // memcpy the whole buffer. This is faster than assigning character by
374 // character during the loop.
378 for (unsigned i
= 0; !foundNull
&& i
< length
; i
++) {
379 int c
= characters
[i
]; // more efficient than using UChar here (at least on Intel Mac OS)
383 return StringImpl::create(characters
, length
);
385 return StringImpl::createStrippingNullCharactersSlowCase(characters
, length
);
390 using WebCore::equal
;
394 // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
395 template<typename T
> struct DefaultHash
;
396 template<> struct DefaultHash
<WebCore::StringImpl
*> {
397 typedef WebCore::StringHash Hash
;
399 template<> struct DefaultHash
<RefPtr
<WebCore::StringImpl
> > {
400 typedef WebCore::StringHash Hash
;