2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
25 // This file would be called String.h, but that conflicts with <string.h>
26 // on systems without case-sensitive file systems.
28 #include "StringImpl.h"
31 #include <objc/objc.h>
35 typedef const struct __CFString
* CFStringRef
;
42 #include <QDataStream>
54 // AECHAR is defined in AEEStdDef.h, but don't include it here to avoid conflicts.
55 #ifndef _AECHAR_DEFINED
56 typedef uint16 AECHAR
;
57 #define _AECHAR_DEFINED
66 // Declarations of string operations
68 bool charactersAreAllASCII(const UChar
*, size_t);
69 bool charactersAreAllLatin1(const UChar
*, size_t);
70 int charactersToIntStrict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
71 unsigned charactersToUIntStrict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
72 int64_t charactersToInt64Strict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
73 uint64_t charactersToUInt64Strict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
74 intptr_t charactersToIntPtrStrict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
76 int charactersToInt(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
77 unsigned charactersToUInt(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
78 int64_t charactersToInt64(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
79 uint64_t charactersToUInt64(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
80 intptr_t charactersToIntPtr(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
82 double charactersToDouble(const UChar
*, size_t, bool* ok
= 0, bool* didReadNumber
= 0);
83 float charactersToFloat(const UChar
*, size_t, bool* ok
= 0, bool* didReadNumber
= 0);
85 template<bool isSpecialCharacter(UChar
)> bool isAllSpecialCharacters(const UChar
*, size_t);
89 // Construct a null string, distinguishable from an empty string.
92 // Construct a string with UTF-16 data.
93 String(const UChar
* characters
, unsigned length
);
95 // Construct a string by copying the contents of a vector. To avoid
96 // copying, consider using String::adopt instead.
97 template<size_t inlineCapacity
>
98 explicit String(const Vector
<UChar
, inlineCapacity
>&);
100 // Construct a string with UTF-16 data, from a null-terminated source.
101 String(const UChar
*);
103 // Construct a string with latin1 data.
104 String(const char* characters
, unsigned length
);
106 // Construct a string with latin1 data, from a null-terminated source.
107 String(const char* characters
);
109 // Construct a string referencing an existing StringImpl.
110 String(StringImpl
* impl
) : m_impl(impl
) { }
111 String(PassRefPtr
<StringImpl
> impl
) : m_impl(impl
) { }
112 String(RefPtr
<StringImpl
> impl
) : m_impl(impl
) { }
114 // Inline the destructor.
115 ALWAYS_INLINE
~String() { }
117 void swap(String
& o
) { m_impl
.swap(o
.m_impl
); }
119 static String
adopt(StringBuffer
& buffer
) { return StringImpl::adopt(buffer
); }
120 template<size_t inlineCapacity
>
121 static String
adopt(Vector
<UChar
, inlineCapacity
>& vector
) { return StringImpl::adopt(vector
); }
123 bool isNull() const { return !m_impl
; }
124 bool isEmpty() const { return !m_impl
|| !m_impl
->length(); }
126 StringImpl
* impl() const { return m_impl
.get(); }
128 unsigned length() const
132 return m_impl
->length();
135 const UChar
* characters() const
139 return m_impl
->characters();
142 CString
ascii() const;
143 CString
latin1() const;
144 CString
utf8(bool strict
= false) const;
146 UChar
operator[](unsigned index
) const
148 if (!m_impl
|| index
>= m_impl
->length())
150 return m_impl
->characters()[index
];
153 static String
number(short);
154 static String
number(unsigned short);
155 static String
number(int);
156 static String
number(unsigned);
157 static String
number(long);
158 static String
number(unsigned long);
159 static String
number(long long);
160 static String
number(unsigned long long);
161 static String
number(double);
163 // Find a single character or string, also with match function & latin1 forms.
164 size_t find(UChar c
, unsigned start
= 0) const
165 { return m_impl
? m_impl
->find(c
, start
) : notFound
; }
166 size_t find(const String
& str
, unsigned start
= 0) const
167 { return m_impl
? m_impl
->find(str
.impl(), start
) : notFound
; }
168 size_t find(CharacterMatchFunctionPtr matchFunction
, unsigned start
= 0) const
169 { return m_impl
? m_impl
->find(matchFunction
, start
) : notFound
; }
170 size_t find(const char* str
, unsigned start
= 0) const
171 { return m_impl
? m_impl
->find(str
, start
) : notFound
; }
173 // Find the last instance of a single character or string.
174 size_t reverseFind(UChar c
, unsigned start
= UINT_MAX
) const
175 { return m_impl
? m_impl
->reverseFind(c
, start
) : notFound
; }
176 size_t reverseFind(const String
& str
, unsigned start
= UINT_MAX
) const
177 { return m_impl
? m_impl
->reverseFind(str
.impl(), start
) : notFound
; }
179 // Case insensitive string matching.
180 size_t findIgnoringCase(const char* str
, unsigned start
= 0) const
181 { return m_impl
? m_impl
->findIgnoringCase(str
, start
) : notFound
; }
182 size_t findIgnoringCase(const String
& str
, unsigned start
= 0) const
183 { return m_impl
? m_impl
->findIgnoringCase(str
.impl(), start
) : notFound
; }
184 size_t reverseFindIgnoringCase(const String
& str
, unsigned start
= UINT_MAX
) const
185 { return m_impl
? m_impl
->reverseFindIgnoringCase(str
.impl(), start
) : notFound
; }
187 // Wrappers for find & reverseFind adding dynamic sensitivity check.
188 size_t find(const char* str
, unsigned start
, bool caseSensitive
) const
189 { return caseSensitive
? find(str
, start
) : findIgnoringCase(str
, start
); }
190 size_t find(const String
& str
, unsigned start
, bool caseSensitive
) const
191 { return caseSensitive
? find(str
, start
) : findIgnoringCase(str
, start
); }
192 size_t reverseFind(const String
& str
, unsigned start
, bool caseSensitive
) const
193 { return caseSensitive
? reverseFind(str
, start
) : reverseFindIgnoringCase(str
, start
); }
195 const UChar
* charactersWithNullTermination();
197 UChar32
characterStartingAt(unsigned) const; // Ditto.
199 bool contains(UChar c
) const { return find(c
) != notFound
; }
200 bool contains(const char* str
, bool caseSensitive
= true) const { return find(str
, 0, caseSensitive
) != notFound
; }
201 bool contains(const String
& str
, bool caseSensitive
= true) const { return find(str
, 0, caseSensitive
) != notFound
; }
203 bool startsWith(const String
& s
, bool caseSensitive
= true) const
204 { return m_impl
? m_impl
->startsWith(s
.impl(), caseSensitive
) : s
.isEmpty(); }
205 bool endsWith(const String
& s
, bool caseSensitive
= true) const
206 { return m_impl
? m_impl
->endsWith(s
.impl(), caseSensitive
) : s
.isEmpty(); }
208 void append(const String
&);
211 void append(const UChar
*, unsigned length
);
212 void insert(const String
&, unsigned pos
);
213 void insert(const UChar
*, unsigned length
, unsigned pos
);
215 String
& replace(UChar a
, UChar b
) { if (m_impl
) m_impl
= m_impl
->replace(a
, b
); return *this; }
216 String
& replace(UChar a
, const String
& b
) { if (m_impl
) m_impl
= m_impl
->replace(a
, b
.impl()); return *this; }
217 String
& replace(const String
& a
, const String
& b
) { if (m_impl
) m_impl
= m_impl
->replace(a
.impl(), b
.impl()); return *this; }
218 String
& replace(unsigned index
, unsigned len
, const String
& b
) { if (m_impl
) m_impl
= m_impl
->replace(index
, len
, b
.impl()); return *this; }
220 void makeLower() { if (m_impl
) m_impl
= m_impl
->lower(); }
221 void makeUpper() { if (m_impl
) m_impl
= m_impl
->upper(); }
222 void makeSecure(UChar aChar
, StringImpl::LastCharacterBehavior behavior
= StringImpl::ObscureLastCharacter
)
225 m_impl
= m_impl
->secure(aChar
, behavior
);
228 void truncate(unsigned len
);
229 void remove(unsigned pos
, int len
= 1);
231 String
substring(unsigned pos
, unsigned len
= UINT_MAX
) const;
232 String
substringSharingImpl(unsigned pos
, unsigned len
= UINT_MAX
) const;
233 String
left(unsigned len
) const { return substring(0, len
); }
234 String
right(unsigned len
) const { return substring(length() - len
, len
); }
236 // Returns a lowercase/uppercase version of the string
237 String
lower() const;
238 String
upper() const;
240 String
stripWhiteSpace() const;
241 String
simplifyWhiteSpace() const;
243 String
removeCharacters(CharacterMatchFunctionPtr
) const;
244 template<bool isSpecialCharacter(UChar
)> bool isAllSpecialCharacters() const;
246 // Return the string with case folded for case insensitive comparison.
247 String
foldCase() const;
250 static String
format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
252 static String
format(const char *, ...);
255 // Returns an uninitialized string. The characters needs to be written
256 // into the buffer returned in data before the returned string is used.
257 // Failure to do this will have unpredictable results.
258 static String
createUninitialized(unsigned length
, UChar
*& data
) { return StringImpl::createUninitialized(length
, data
); }
260 void split(const String
& separator
, Vector
<String
>& result
) const;
261 void split(const String
& separator
, bool allowEmptyEntries
, Vector
<String
>& result
) const;
262 void split(UChar separator
, Vector
<String
>& result
) const;
263 void split(UChar separator
, bool allowEmptyEntries
, Vector
<String
>& result
) const;
265 int toIntStrict(bool* ok
= 0, int base
= 10) const;
266 unsigned toUIntStrict(bool* ok
= 0, int base
= 10) const;
267 int64_t toInt64Strict(bool* ok
= 0, int base
= 10) const;
268 uint64_t toUInt64Strict(bool* ok
= 0, int base
= 10) const;
269 intptr_t toIntPtrStrict(bool* ok
= 0, int base
= 10) const;
271 int toInt(bool* ok
= 0) const;
272 unsigned toUInt(bool* ok
= 0) const;
273 int64_t toInt64(bool* ok
= 0) const;
274 uint64_t toUInt64(bool* ok
= 0) const;
275 intptr_t toIntPtr(bool* ok
= 0) const;
276 double toDouble(bool* ok
= 0, bool* didReadNumber
= 0) const;
277 float toFloat(bool* ok
= 0, bool* didReadNumber
= 0) const;
279 bool percentage(int& percentage
) const;
281 // Returns a StringImpl suitable for use on another thread.
282 String
crossThreadString() const;
283 // Makes a deep copy. Helpful only if you need to use a String on another thread
284 // (use crossThreadString if the method call doesn't need to be threadsafe).
285 // Since the underlying StringImpl objects are immutable, there's no other reason
286 // to ever prefer copy() over plain old assignment.
287 String
threadsafeCopy() const;
289 // Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that
290 // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*).
291 typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA
* (String::*UnspecifiedBoolTypeA
);
292 typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB
* (String::*UnspecifiedBoolTypeB
);
293 operator UnspecifiedBoolTypeA() const;
294 operator UnspecifiedBoolTypeB() const;
298 CFStringRef
createCFString() const;
304 // This conversion maps NULL to "", which loses the meaning of NULL, but we
305 // need this mapping because AppKit crashes when passed nil NSStrings.
306 operator NSString
*() const { if (!m_impl
) return @
""; return *m_impl
; }
310 String(const QString
&);
311 String(const QStringRef
&);
312 operator QString() const;
316 String(const wxString
&);
317 operator wxString() const;
321 String(const BString
&);
322 operator BString() const;
326 String(const AECHAR
*);
329 // String::fromUTF8 will return a null string if
330 // the input data contains invalid UTF-8 sequences.
331 static String
fromUTF8(const char*, size_t);
332 static String
fromUTF8(const char*);
334 // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8.
335 static String
fromUTF8WithLatin1Fallback(const char*, size_t);
337 // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
338 WTF::Unicode::Direction
defaultWritingDirection(bool* hasStrongDirectionality
= 0) const
341 return m_impl
->defaultWritingDirection(hasStrongDirectionality
);
342 if (hasStrongDirectionality
)
343 *hasStrongDirectionality
= false;
344 return WTF::Unicode::LeftToRight
;
347 bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); }
348 bool containsOnlyLatin1() const { return charactersAreAllLatin1(characters(), length()); }
350 // Hash table deleted values, which are only constructed and never copied or destroyed.
351 String(WTF::HashTableDeletedValueType
) : m_impl(WTF::HashTableDeletedValue
) { }
352 bool isHashTableDeletedValue() const { return m_impl
.isHashTableDeletedValue(); }
355 RefPtr
<StringImpl
> m_impl
;
359 QDataStream
& operator<<(QDataStream
& stream
, const String
& str
);
360 QDataStream
& operator>>(QDataStream
& stream
, String
& str
);
363 inline String
& operator+=(String
& a
, const String
& b
) { a
.append(b
); return a
; }
365 inline bool operator==(const String
& a
, const String
& b
) { return equal(a
.impl(), b
.impl()); }
366 inline bool operator==(const String
& a
, const char* b
) { return equal(a
.impl(), b
); }
367 inline bool operator==(const char* a
, const String
& b
) { return equal(a
, b
.impl()); }
369 inline bool operator!=(const String
& a
, const String
& b
) { return !equal(a
.impl(), b
.impl()); }
370 inline bool operator!=(const String
& a
, const char* b
) { return !equal(a
.impl(), b
); }
371 inline bool operator!=(const char* a
, const String
& b
) { return !equal(a
, b
.impl()); }
373 inline bool equalIgnoringCase(const String
& a
, const String
& b
) { return equalIgnoringCase(a
.impl(), b
.impl()); }
374 inline bool equalIgnoringCase(const String
& a
, const char* b
) { return equalIgnoringCase(a
.impl(), b
); }
375 inline bool equalIgnoringCase(const char* a
, const String
& b
) { return equalIgnoringCase(a
, b
.impl()); }
377 inline bool equalPossiblyIgnoringCase(const String
& a
, const String
& b
, bool ignoreCase
)
379 return ignoreCase
? equalIgnoringCase(a
, b
) : (a
== b
);
382 inline bool equalIgnoringNullity(const String
& a
, const String
& b
) { return equalIgnoringNullity(a
.impl(), b
.impl()); }
384 template<size_t inlineCapacity
>
385 inline bool equalIgnoringNullity(const Vector
<UChar
, inlineCapacity
>& a
, const String
& b
) { return equalIgnoringNullity(a
, b
.impl()); }
387 inline bool operator!(const String
& str
) { return str
.isNull(); }
389 inline void swap(String
& a
, String
& b
) { a
.swap(b
); }
391 // Definitions of string operations
393 template<size_t inlineCapacity
>
394 String::String(const Vector
<UChar
, inlineCapacity
>& vector
)
395 : m_impl(vector
.size() ? StringImpl::create(vector
.data(), vector
.size()) : 0)
400 // This is for situations in WebKit where the long standing behavior has been
401 // "nil if empty", so we try to maintain longstanding behavior for the sake of
402 // entrenched clients
403 inline NSString
* nsStringNilIfEmpty(const String
& str
) { return str
.isEmpty() ? nil
: (NSString
*)str
; }
406 inline bool charactersAreAllASCII(const UChar
* characters
, size_t length
)
409 for (size_t i
= 0; i
< length
; ++i
)
410 ored
|= characters
[i
];
411 return !(ored
& 0xFF80);
414 inline bool charactersAreAllLatin1(const UChar
* characters
, size_t length
)
417 for (size_t i
= 0; i
< length
; ++i
)
418 ored
|= characters
[i
];
419 return !(ored
& 0xFF00);
422 int codePointCompare(const String
&, const String
&);
424 inline size_t find(const UChar
* characters
, unsigned length
, UChar matchCharacter
, unsigned index
= 0)
426 while (index
< length
) {
427 if (characters
[index
] == matchCharacter
)
434 inline size_t find(const UChar
* characters
, unsigned length
, CharacterMatchFunctionPtr matchFunction
, unsigned index
= 0)
436 while (index
< length
) {
437 if (matchFunction(characters
[index
]))
444 inline size_t reverseFind(const UChar
* characters
, unsigned length
, UChar matchCharacter
, unsigned index
= UINT_MAX
)
450 while (characters
[index
] != matchCharacter
) {
457 inline void append(Vector
<UChar
>& vector
, const String
& string
)
459 vector
.append(string
.characters(), string
.length());
462 inline void appendNumber(Vector
<UChar
>& vector
, unsigned char number
)
464 int numberLength
= number
> 99 ? 3 : (number
> 9 ? 2 : 1);
465 size_t vectorSize
= vector
.size();
466 vector
.grow(vectorSize
+ numberLength
);
468 switch (numberLength
) {
470 vector
[vectorSize
+ 2] = number
% 10 + '0';
474 vector
[vectorSize
+ 1] = number
% 10 + '0';
478 vector
[vectorSize
] = number
% 10 + '0';
482 template<bool isSpecialCharacter(UChar
)> inline bool isAllSpecialCharacters(const UChar
* characters
, size_t length
)
484 for (size_t i
= 0; i
< length
; ++i
) {
485 if (!isSpecialCharacter(characters
[i
]))
491 template<bool isSpecialCharacter(UChar
)> inline bool String::isAllSpecialCharacters() const
493 return WTF::isAllSpecialCharacters
<isSpecialCharacter
>(characters(), length());
496 // StringHash is the default hash for String
497 template<typename T
> struct DefaultHash
;
498 template<> struct DefaultHash
<String
> {
499 typedef StringHash Hash
;
502 template <> struct VectorTraits
<String
> : SimpleClassVectorTraits
{ };
504 // Shared global empty string.
505 const String
& emptyString();
511 using WTF::emptyString
;
513 using WTF::appendNumber
;
514 using WTF::charactersAreAllASCII
;
515 using WTF::charactersAreAllLatin1
;
516 using WTF::charactersToIntStrict
;
517 using WTF::charactersToUIntStrict
;
518 using WTF::charactersToInt64Strict
;
519 using WTF::charactersToUInt64Strict
;
520 using WTF::charactersToIntPtrStrict
;
521 using WTF::charactersToInt
;
522 using WTF::charactersToUInt
;
523 using WTF::charactersToInt64
;
524 using WTF::charactersToUInt64
;
525 using WTF::charactersToIntPtr
;
526 using WTF::charactersToDouble
;
527 using WTF::charactersToFloat
;
529 using WTF::equalIgnoringCase
;
531 using WTF::isAllSpecialCharacters
;
532 using WTF::isSpaceOrNewline
;
533 using WTF::reverseFind
;
535 #include "AtomicString.h"