2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
27 #include <wtf/ASCIICType.h>
28 #include <wtf/CrossThreadRefCounted.h>
29 #include <wtf/Forward.h>
30 #include <wtf/OwnFastMallocPtr.h>
31 #include <wtf/StdLibExtras.h>
32 #include <wtf/StringHasher.h>
33 #include <wtf/Vector.h>
34 #include <wtf/text/StringImplBase.h>
35 #include <wtf/unicode/Unicode.h>
38 typedef const struct __CFString
* CFStringRef
;
45 // FIXME: This is a temporary layering violation while we move string code to WTF.
46 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
48 struct IdentifierCStringTranslator
;
49 struct IdentifierUCharBufferTranslator
;
54 struct CStringTranslator
;
55 struct HashAndCharactersTranslator
;
56 struct HashAndUTF8CharactersTranslator
;
57 struct UCharBufferTranslator
;
59 enum TextCaseSensitivity
{ TextCaseSensitive
, TextCaseInsensitive
};
61 typedef OwnFastMallocPtr
<const UChar
> SharableUChar
;
62 typedef CrossThreadRefCounted
<SharableUChar
> SharedUChar
;
63 typedef bool (*CharacterMatchFunctionPtr
)(UChar
);
65 class StringImpl
: public StringImplBase
{
66 friend struct JSC::IdentifierCStringTranslator
;
67 friend struct JSC::IdentifierUCharBufferTranslator
;
68 friend struct WTF::CStringTranslator
;
69 friend struct WTF::HashAndCharactersTranslator
;
70 friend struct WTF::HashAndUTF8CharactersTranslator
;
71 friend struct WTF::UCharBufferTranslator
;
72 friend class AtomicStringImpl
;
74 // Used to construct static strings, which have an special refCount that can never hit zero.
75 // This means that the static string will never be destroyed, which is important because
76 // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
77 StringImpl(const UChar
* characters
, unsigned length
, StaticStringConstructType
)
78 : StringImplBase(length
, ConstructStaticString
)
83 // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
84 // with impunity. The empty string is special because it is never entered into
85 // AtomicString's HashKey, but still needs to compare correctly.
89 // Create a normal string with internal storage (BufferInternal)
90 StringImpl(unsigned length
)
91 : StringImplBase(length
, BufferInternal
)
92 , m_data(reinterpret_cast<const UChar
*>(this + 1))
100 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
101 StringImpl(const UChar
* characters
, unsigned length
)
102 : StringImplBase(length
, BufferOwned
)
111 // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
112 StringImpl(const UChar
* characters
, unsigned length
, PassRefPtr
<StringImpl
> base
)
113 : StringImplBase(length
, BufferSubstring
)
115 , m_substringBuffer(base
.leakRef())
120 ASSERT(m_substringBuffer
->bufferOwnership() != BufferSubstring
);
123 // Used to construct new strings sharing an existing SharedUChar (BufferShared)
124 StringImpl(const UChar
* characters
, unsigned length
, PassRefPtr
<SharedUChar
> sharedBuffer
)
125 : StringImplBase(length
, BufferShared
)
127 , m_sharedBuffer(sharedBuffer
.leakRef())
134 // For use only by AtomicString's XXXTranslator helpers.
135 void setHash(unsigned hash
)
139 ASSERT(hash
== StringHasher::computeHash(m_data
, m_length
));
146 static PassRefPtr
<StringImpl
> create(const UChar
*, unsigned length
);
147 static PassRefPtr
<StringImpl
> create(const char*, unsigned length
);
148 static PassRefPtr
<StringImpl
> create(const char*);
149 static PassRefPtr
<StringImpl
> create(const UChar
*, unsigned length
, PassRefPtr
<SharedUChar
> sharedBuffer
);
150 static ALWAYS_INLINE PassRefPtr
<StringImpl
> create(PassRefPtr
<StringImpl
> rep
, unsigned offset
, unsigned length
)
153 ASSERT(length
<= rep
->length());
158 StringImpl
* ownerRep
= (rep
->bufferOwnership() == BufferSubstring
) ? rep
->m_substringBuffer
: rep
.get();
159 return adoptRef(new StringImpl(rep
->m_data
+ offset
, length
, ownerRep
));
162 static PassRefPtr
<StringImpl
> createUninitialized(unsigned length
, UChar
*& data
);
163 static ALWAYS_INLINE PassRefPtr
<StringImpl
> tryCreateUninitialized(unsigned length
, UChar
*& output
)
170 if (length
> ((std::numeric_limits
<unsigned>::max() - sizeof(StringImpl
)) / sizeof(UChar
))) {
174 StringImpl
* resultImpl
;
175 if (!tryFastMalloc(sizeof(UChar
) * length
+ sizeof(StringImpl
)).getValue(resultImpl
)) {
179 output
= reinterpret_cast<UChar
*>(resultImpl
+ 1);
180 return adoptRef(new(resultImpl
) StringImpl(length
));
183 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl
, m_data
); }
184 static PassRefPtr
<StringImpl
> createWithTerminatingNullCharacter(const StringImpl
&);
185 static PassRefPtr
<StringImpl
> createStrippingNullCharacters(const UChar
*, unsigned length
);
187 template<size_t inlineCapacity
>
188 static PassRefPtr
<StringImpl
> adopt(Vector
<UChar
, inlineCapacity
>& vector
)
190 if (size_t size
= vector
.size()) {
191 ASSERT(vector
.data());
192 if (size
> std::numeric_limits
<unsigned>::max())
194 return adoptRef(new StringImpl(vector
.releaseBuffer(), size
));
198 static PassRefPtr
<StringImpl
> adopt(StringBuffer
&);
200 SharedUChar
* sharedBuffer();
201 const UChar
* characters() const { return m_data
; }
205 // For substrings, return the cost of the base string.
206 if (bufferOwnership() == BufferSubstring
)
207 return m_substringBuffer
->cost();
209 if (m_refCountAndFlags
& s_refCountFlagShouldReportedCost
) {
210 m_refCountAndFlags
&= ~s_refCountFlagShouldReportedCost
;
216 bool isIdentifier() const { return m_refCountAndFlags
& s_refCountFlagIsIdentifier
; }
217 void setIsIdentifier(bool isIdentifier
)
221 m_refCountAndFlags
|= s_refCountFlagIsIdentifier
;
223 m_refCountAndFlags
&= ~s_refCountFlagIsIdentifier
;
226 bool hasTerminatingNullCharacter() const { return m_refCountAndFlags
& s_refCountFlagHasTerminatingNullCharacter
; }
228 bool isAtomic() const { return m_refCountAndFlags
& s_refCountFlagIsAtomic
; }
229 void setIsAtomic(bool isIdentifier
)
233 m_refCountAndFlags
|= s_refCountFlagIsAtomic
;
235 m_refCountAndFlags
&= ~s_refCountFlagIsAtomic
;
238 unsigned hash() const { if (!m_hash
) m_hash
= StringHasher::computeHash(m_data
, m_length
); return m_hash
; }
239 unsigned existingHash() const { ASSERT(m_hash
); return m_hash
; }
241 ALWAYS_INLINE
void deref() { m_refCountAndFlags
-= s_refCountIncrement
; if (!(m_refCountAndFlags
& (s_refCountMask
| s_refCountFlagStatic
))) delete this; }
242 ALWAYS_INLINE
bool hasOneRef() const { return (m_refCountAndFlags
& (s_refCountMask
| s_refCountFlagStatic
)) == s_refCountIncrement
; }
244 static StringImpl
* empty();
246 static void copyChars(UChar
* destination
, const UChar
* source
, unsigned numCharacters
)
248 if (numCharacters
<= s_copyCharsInlineCutOff
) {
249 for (unsigned i
= 0; i
< numCharacters
; ++i
)
250 destination
[i
] = source
[i
];
252 memcpy(destination
, source
, numCharacters
* sizeof(UChar
));
255 // Returns a StringImpl suitable for use on another thread.
256 PassRefPtr
<StringImpl
> crossThreadString();
257 // Makes a deep copy. Helpful only if you need to use a String on another thread
258 // (use crossThreadString if the method call doesn't need to be threadsafe).
259 // Since StringImpl objects are immutable, there's no other reason to make a copy.
260 PassRefPtr
<StringImpl
> threadsafeCopy() const;
262 PassRefPtr
<StringImpl
> substring(unsigned pos
, unsigned len
= UINT_MAX
);
264 UChar
operator[](unsigned i
) { ASSERT(i
< m_length
); return m_data
[i
]; }
265 UChar32
characterStartingAt(unsigned);
267 bool containsOnlyWhitespace();
269 int toIntStrict(bool* ok
= 0, int base
= 10);
270 unsigned toUIntStrict(bool* ok
= 0, int base
= 10);
271 int64_t toInt64Strict(bool* ok
= 0, int base
= 10);
272 uint64_t toUInt64Strict(bool* ok
= 0, int base
= 10);
273 intptr_t toIntPtrStrict(bool* ok
= 0, int base
= 10);
275 int toInt(bool* ok
= 0); // ignores trailing garbage
276 unsigned toUInt(bool* ok
= 0); // ignores trailing garbage
277 int64_t toInt64(bool* ok
= 0); // ignores trailing garbage
278 uint64_t toUInt64(bool* ok
= 0); // ignores trailing garbage
279 intptr_t toIntPtr(bool* ok
= 0); // ignores trailing garbage
281 double toDouble(bool* ok
= 0, bool* didReadNumber
= 0);
282 float toFloat(bool* ok
= 0, bool* didReadNumber
= 0);
284 PassRefPtr
<StringImpl
> lower();
285 PassRefPtr
<StringImpl
> upper();
287 enum LastCharacterBehavior
{ ObscureLastCharacter
, DisplayLastCharacter
};
289 PassRefPtr
<StringImpl
> secure(UChar
, LastCharacterBehavior
= ObscureLastCharacter
);
290 PassRefPtr
<StringImpl
> foldCase();
292 PassRefPtr
<StringImpl
> stripWhiteSpace();
293 PassRefPtr
<StringImpl
> simplifyWhiteSpace();
295 PassRefPtr
<StringImpl
> removeCharacters(CharacterMatchFunctionPtr
);
297 size_t find(UChar
, unsigned index
= 0);
298 size_t find(CharacterMatchFunctionPtr
, unsigned index
= 0);
299 size_t find(const char*, unsigned index
= 0);
300 size_t find(StringImpl
*, unsigned index
= 0);
301 size_t findIgnoringCase(const char*, unsigned index
= 0);
302 size_t findIgnoringCase(StringImpl
*, unsigned index
= 0);
304 size_t reverseFind(UChar
, unsigned index
= UINT_MAX
);
305 size_t reverseFind(StringImpl
*, unsigned index
= UINT_MAX
);
306 size_t reverseFindIgnoringCase(StringImpl
*, unsigned index
= UINT_MAX
);
308 bool startsWith(StringImpl
* str
, bool caseSensitive
= true) { return (caseSensitive
? reverseFind(str
, 0) : reverseFindIgnoringCase(str
, 0)) == 0; }
309 bool endsWith(StringImpl
*, bool caseSensitive
= true);
311 PassRefPtr
<StringImpl
> replace(UChar
, UChar
);
312 PassRefPtr
<StringImpl
> replace(UChar
, StringImpl
*);
313 PassRefPtr
<StringImpl
> replace(StringImpl
*, StringImpl
*);
314 PassRefPtr
<StringImpl
> replace(unsigned index
, unsigned len
, StringImpl
*);
316 int wordCount(int maxWordsToCount
= INT_MAX
);
318 WTF::Unicode::Direction
defaultWritingDirection(bool* hasStrongDirectionality
= 0);
321 CFStringRef
createCFString();
324 operator NSString
*();
328 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
329 static const unsigned s_copyCharsInlineCutOff
= 20;
331 static PassRefPtr
<StringImpl
> createStrippingNullCharactersSlowCase(const UChar
*, unsigned length
);
333 BufferOwnership
bufferOwnership() const { return static_cast<BufferOwnership
>(m_refCountAndFlags
& s_refCountMaskBufferOwnership
); }
334 bool isStatic() const { return m_refCountAndFlags
& s_refCountFlagStatic
; }
338 StringImpl
* m_substringBuffer
;
339 SharedUChar
* m_sharedBuffer
;
341 mutable unsigned m_hash
;
344 bool equal(const StringImpl
*, const StringImpl
*);
345 bool equal(const StringImpl
*, const char*);
346 inline bool equal(const char* a
, StringImpl
* b
) { return equal(b
, a
); }
348 bool equalIgnoringCase(StringImpl
*, StringImpl
*);
349 bool equalIgnoringCase(StringImpl
*, const char*);
350 inline bool equalIgnoringCase(const char* a
, StringImpl
* b
) { return equalIgnoringCase(b
, a
); }
351 bool equalIgnoringCase(const UChar
* a
, const char* b
, unsigned length
);
352 inline bool equalIgnoringCase(const char* a
, const UChar
* b
, unsigned length
) { return equalIgnoringCase(b
, a
, length
); }
354 bool equalIgnoringNullity(StringImpl
*, StringImpl
*);
356 template<size_t inlineCapacity
>
357 bool equalIgnoringNullity(const Vector
<UChar
, inlineCapacity
>& a
, StringImpl
* b
)
361 if (a
.size() != b
->length())
363 return !memcmp(a
.data(), b
->characters(), b
->length());
366 int codePointCompare(const StringImpl
*, const StringImpl
*);
368 static inline bool isSpaceOrNewline(UChar c
)
370 // Use isASCIISpace() for basic Latin-1.
371 // This will include newlines, which aren't included in Unicode DirWS.
372 return c
<= 0x7F ? WTF::isASCIISpace(c
) : WTF::Unicode::direction(c
) == WTF::Unicode::WhiteSpaceNeutral
;
375 // This is a hot function because it's used when parsing HTML.
376 inline PassRefPtr
<StringImpl
> StringImpl::createStrippingNullCharacters(const UChar
* characters
, unsigned length
)
381 // Optimize for the case where there are no Null characters by quickly
382 // searching for nulls, and then using StringImpl::create, which will
383 // memcpy the whole buffer. This is faster than assigning character by
384 // character during the loop.
388 for (unsigned i
= 0; !foundNull
&& i
< length
; i
++) {
389 int c
= characters
[i
]; // more efficient than using UChar here (at least on Intel Mac OS)
393 return StringImpl::create(characters
, length
);
395 return StringImpl::createStrippingNullCharactersSlowCase(characters
, length
);
400 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
401 template<typename T
> struct DefaultHash
;
402 template<> struct DefaultHash
<StringImpl
*> {
403 typedef StringHash Hash
;
405 template<> struct DefaultHash
<RefPtr
<StringImpl
> > {
406 typedef StringHash Hash
;
411 using WTF::StringImpl
;
413 using WTF::TextCaseSensitivity
;
414 using WTF::TextCaseSensitive
;
415 using WTF::TextCaseInsensitive
;