2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
27 #include <wtf/ASCIICType.h>
28 #include <wtf/CrossThreadRefCounted.h>
29 #include <wtf/OwnFastMallocPtr.h>
30 #include <wtf/StdLibExtras.h>
31 #include <wtf/StringHashFunctions.h>
32 #include <wtf/Vector.h>
33 #include <wtf/text/StringImplBase.h>
34 #include <wtf/unicode/Unicode.h>
37 typedef const struct __CFString
* CFStringRef
;
44 // FIXME: This is a temporary layering violation while we move string code to WTF.
45 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
48 struct IdentifierCStringTranslator
;
49 struct IdentifierUCharBufferTranslator
;
53 // FIXME: This is a temporary layering violation while we move string code to WTF.
54 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
59 struct CStringTranslator
;
60 struct HashAndCharactersTranslator
;
62 struct UCharBufferTranslator
;
64 enum TextCaseSensitivity
{ TextCaseSensitive
, TextCaseInsensitive
};
66 typedef OwnFastMallocPtr
<const UChar
> SharableUChar
;
67 typedef CrossThreadRefCounted
<SharableUChar
> SharedUChar
;
68 typedef bool (*CharacterMatchFunctionPtr
)(UChar
);
70 class StringImpl
: public StringImplBase
{
71 friend struct JSC::IdentifierCStringTranslator
;
72 friend struct JSC::IdentifierUCharBufferTranslator
;
73 friend struct CStringTranslator
;
74 friend struct HashAndCharactersTranslator
;
75 friend struct UCharBufferTranslator
;
76 friend class AtomicStringImpl
;
78 // Used to construct static strings, which have an special refCount that can never hit zero.
79 // This means that the static string will never be destroyed, which is important because
80 // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
81 StringImpl(const UChar
* characters
, unsigned length
, StaticStringConstructType
)
82 : StringImplBase(length
, ConstructStaticString
)
87 // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
88 // with impunity. The empty string is special because it is never entered into
89 // AtomicString's HashKey, but still needs to compare correctly.
93 // Create a normal string with internal storage (BufferInternal)
94 StringImpl(unsigned length
)
95 : StringImplBase(length
, BufferInternal
)
96 , m_data(reinterpret_cast<const UChar
*>(this + 1))
104 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
105 StringImpl(const UChar
* characters
, unsigned length
)
106 : StringImplBase(length
, BufferOwned
)
115 // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
116 StringImpl(const UChar
* characters
, unsigned length
, PassRefPtr
<StringImpl
> base
)
117 : StringImplBase(length
, BufferSubstring
)
119 , m_substringBuffer(base
.releaseRef())
124 ASSERT(m_substringBuffer
->bufferOwnership() != BufferSubstring
);
127 // Used to construct new strings sharing an existing SharedUChar (BufferShared)
128 StringImpl(const UChar
* characters
, unsigned length
, PassRefPtr
<SharedUChar
> sharedBuffer
)
129 : StringImplBase(length
, BufferShared
)
131 , m_sharedBuffer(sharedBuffer
.releaseRef())
138 // For use only by AtomicString's XXXTranslator helpers.
139 void setHash(unsigned hash
)
143 ASSERT(hash
== computeHash(m_data
, m_length
));
150 static PassRefPtr
<StringImpl
> create(const UChar
*, unsigned length
);
151 static PassRefPtr
<StringImpl
> create(const char*, unsigned length
);
152 static PassRefPtr
<StringImpl
> create(const char*);
153 static PassRefPtr
<StringImpl
> create(const UChar
*, unsigned length
, PassRefPtr
<SharedUChar
> sharedBuffer
);
154 static PassRefPtr
<StringImpl
> create(PassRefPtr
<StringImpl
> rep
, unsigned offset
, unsigned length
)
157 ASSERT(length
<= rep
->length());
162 StringImpl
* ownerRep
= (rep
->bufferOwnership() == BufferSubstring
) ? rep
->m_substringBuffer
: rep
.get();
163 return adoptRef(new StringImpl(rep
->m_data
+ offset
, length
, ownerRep
));
166 static PassRefPtr
<StringImpl
> createUninitialized(unsigned length
, UChar
*& data
);
167 static PassRefPtr
<StringImpl
> tryCreateUninitialized(unsigned length
, UChar
*& output
)
174 if (length
> ((std::numeric_limits
<size_t>::max() - sizeof(StringImpl
)) / sizeof(UChar
)))
176 StringImpl
* resultImpl
;
177 if (!tryFastMalloc(sizeof(UChar
) * length
+ sizeof(StringImpl
)).getValue(resultImpl
))
179 output
= reinterpret_cast<UChar
*>(resultImpl
+ 1);
180 return adoptRef(new(resultImpl
) StringImpl(length
));
183 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl
, m_data
); }
184 static PassRefPtr
<StringImpl
> createWithTerminatingNullCharacter(const StringImpl
&);
185 static PassRefPtr
<StringImpl
> createStrippingNullCharacters(const UChar
*, unsigned length
);
187 template<size_t inlineCapacity
>
188 static PassRefPtr
<StringImpl
> adopt(Vector
<UChar
, inlineCapacity
>& vector
)
190 if (size_t size
= vector
.size()) {
191 ASSERT(vector
.data());
192 return adoptRef(new StringImpl(vector
.releaseBuffer(), size
));
196 static PassRefPtr
<StringImpl
> adopt(StringBuffer
&);
198 SharedUChar
* sharedBuffer();
199 const UChar
* characters() const { return m_data
; }
203 // For substrings, return the cost of the base string.
204 if (bufferOwnership() == BufferSubstring
)
205 return m_substringBuffer
->cost();
207 if (m_refCountAndFlags
& s_refCountFlagShouldReportedCost
) {
208 m_refCountAndFlags
&= ~s_refCountFlagShouldReportedCost
;
214 bool isIdentifier() const { return m_refCountAndFlags
& s_refCountFlagIsIdentifier
; }
215 void setIsIdentifier(bool isIdentifier
)
219 m_refCountAndFlags
|= s_refCountFlagIsIdentifier
;
221 m_refCountAndFlags
&= ~s_refCountFlagIsIdentifier
;
224 bool hasTerminatingNullCharacter() const { return m_refCountAndFlags
& s_refCountFlagHasTerminatingNullCharacter
; }
226 bool isAtomic() const { return m_refCountAndFlags
& s_refCountFlagIsAtomic
; }
227 void setIsAtomic(bool isIdentifier
)
231 m_refCountAndFlags
|= s_refCountFlagIsAtomic
;
233 m_refCountAndFlags
&= ~s_refCountFlagIsAtomic
;
236 unsigned hash() const { if (!m_hash
) m_hash
= computeHash(m_data
, m_length
); return m_hash
; }
237 unsigned existingHash() const { ASSERT(m_hash
); return m_hash
; }
238 static unsigned computeHash(const UChar
* data
, unsigned length
) { return WTF::stringHash(data
, length
); }
239 static unsigned computeHash(const char* data
, unsigned length
) { return WTF::stringHash(data
, length
); }
240 static unsigned computeHash(const char* data
) { return WTF::stringHash(data
); }
242 ALWAYS_INLINE
void deref() { m_refCountAndFlags
-= s_refCountIncrement
; if (!(m_refCountAndFlags
& (s_refCountMask
| s_refCountFlagStatic
))) delete this; }
243 ALWAYS_INLINE
bool hasOneRef() const { return (m_refCountAndFlags
& (s_refCountMask
| s_refCountFlagStatic
)) == s_refCountIncrement
; }
245 static StringImpl
* empty();
247 static void copyChars(UChar
* destination
, const UChar
* source
, unsigned numCharacters
)
249 if (numCharacters
<= s_copyCharsInlineCutOff
) {
250 for (unsigned i
= 0; i
< numCharacters
; ++i
)
251 destination
[i
] = source
[i
];
253 memcpy(destination
, source
, numCharacters
* sizeof(UChar
));
256 // Returns a StringImpl suitable for use on another thread.
257 PassRefPtr
<StringImpl
> crossThreadString();
258 // Makes a deep copy. Helpful only if you need to use a String on another thread
259 // (use crossThreadString if the method call doesn't need to be threadsafe).
260 // Since StringImpl objects are immutable, there's no other reason to make a copy.
261 PassRefPtr
<StringImpl
> threadsafeCopy() const;
263 PassRefPtr
<StringImpl
> substring(unsigned pos
, unsigned len
= UINT_MAX
);
265 UChar
operator[](unsigned i
) { ASSERT(i
< m_length
); return m_data
[i
]; }
266 UChar32
characterStartingAt(unsigned);
268 bool containsOnlyWhitespace();
270 int toIntStrict(bool* ok
= 0, int base
= 10);
271 unsigned toUIntStrict(bool* ok
= 0, int base
= 10);
272 int64_t toInt64Strict(bool* ok
= 0, int base
= 10);
273 uint64_t toUInt64Strict(bool* ok
= 0, int base
= 10);
274 intptr_t toIntPtrStrict(bool* ok
= 0, int base
= 10);
276 int toInt(bool* ok
= 0); // ignores trailing garbage
277 unsigned toUInt(bool* ok
= 0); // ignores trailing garbage
278 int64_t toInt64(bool* ok
= 0); // ignores trailing garbage
279 uint64_t toUInt64(bool* ok
= 0); // ignores trailing garbage
280 intptr_t toIntPtr(bool* ok
= 0); // ignores trailing garbage
282 double toDouble(bool* ok
= 0);
283 float toFloat(bool* ok
= 0);
285 PassRefPtr
<StringImpl
> lower();
286 PassRefPtr
<StringImpl
> upper();
287 PassRefPtr
<StringImpl
> secure(UChar aChar
, bool last
= true);
288 PassRefPtr
<StringImpl
> foldCase();
290 PassRefPtr
<StringImpl
> stripWhiteSpace();
291 PassRefPtr
<StringImpl
> simplifyWhiteSpace();
293 PassRefPtr
<StringImpl
> removeCharacters(CharacterMatchFunctionPtr
);
295 int find(const char*, int index
= 0, bool caseSensitive
= true);
296 int find(UChar
, int index
= 0);
297 int find(CharacterMatchFunctionPtr
, int index
= 0);
298 int find(StringImpl
*, int index
, bool caseSensitive
= true);
300 int reverseFind(UChar
, int index
);
301 int reverseFind(StringImpl
*, int index
, bool caseSensitive
= true);
303 bool startsWith(StringImpl
* str
, bool caseSensitive
= true) { return reverseFind(str
, 0, caseSensitive
) == 0; }
304 bool endsWith(StringImpl
*, bool caseSensitive
= true);
306 PassRefPtr
<StringImpl
> replace(UChar
, UChar
);
307 PassRefPtr
<StringImpl
> replace(UChar
, StringImpl
*);
308 PassRefPtr
<StringImpl
> replace(StringImpl
*, StringImpl
*);
309 PassRefPtr
<StringImpl
> replace(unsigned index
, unsigned len
, StringImpl
*);
311 Vector
<char> ascii();
312 int wordCount(int maxWordsToCount
= INT_MAX
);
314 WTF::Unicode::Direction
defaultWritingDirection();
317 CFStringRef
createCFString();
320 operator NSString
*();
324 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
325 static const unsigned s_copyCharsInlineCutOff
= 20;
327 static PassRefPtr
<StringImpl
> createStrippingNullCharactersSlowCase(const UChar
*, unsigned length
);
329 BufferOwnership
bufferOwnership() const { return static_cast<BufferOwnership
>(m_refCountAndFlags
& s_refCountMaskBufferOwnership
); }
330 bool isStatic() const { return m_refCountAndFlags
& s_refCountFlagStatic
; }
334 StringImpl
* m_substringBuffer
;
335 SharedUChar
* m_sharedBuffer
;
337 mutable unsigned m_hash
;
340 bool equal(const StringImpl
*, const StringImpl
*);
341 bool equal(const StringImpl
*, const char*);
342 inline bool equal(const char* a
, StringImpl
* b
) { return equal(b
, a
); }
344 bool equalIgnoringCase(StringImpl
*, StringImpl
*);
345 bool equalIgnoringCase(StringImpl
*, const char*);
346 inline bool equalIgnoringCase(const char* a
, StringImpl
* b
) { return equalIgnoringCase(b
, a
); }
347 bool equalIgnoringCase(const UChar
* a
, const char* b
, unsigned length
);
348 inline bool equalIgnoringCase(const char* a
, const UChar
* b
, unsigned length
) { return equalIgnoringCase(b
, a
, length
); }
350 bool equalIgnoringNullity(StringImpl
*, StringImpl
*);
352 static inline bool isSpaceOrNewline(UChar c
)
354 // Use isASCIISpace() for basic Latin-1.
355 // This will include newlines, which aren't included in Unicode DirWS.
356 return c
<= 0x7F ? WTF::isASCIISpace(c
) : WTF::Unicode::direction(c
) == WTF::Unicode::WhiteSpaceNeutral
;
359 // This is a hot function because it's used when parsing HTML.
360 inline PassRefPtr
<StringImpl
> StringImpl::createStrippingNullCharacters(const UChar
* characters
, unsigned length
)
365 // Optimize for the case where there are no Null characters by quickly
366 // searching for nulls, and then using StringImpl::create, which will
367 // memcpy the whole buffer. This is faster than assigning character by
368 // character during the loop.
372 for (unsigned i
= 0; !foundNull
&& i
< length
; i
++) {
373 int c
= characters
[i
]; // more efficient than using UChar here (at least on Intel Mac OS)
377 return StringImpl::create(characters
, length
);
379 return StringImpl::createStrippingNullCharactersSlowCase(characters
, length
);
384 using WebCore::equal
;
388 // WebCore::StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
389 template<typename T
> struct DefaultHash
;
390 template<> struct DefaultHash
<WebCore::StringImpl
*> {
391 typedef WebCore::StringHash Hash
;
393 template<> struct DefaultHash
<RefPtr
<WebCore::StringImpl
> > {
394 typedef WebCore::StringHash Hash
;