2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
25 // This file would be called String.h, but that conflicts with <string.h>
26 // on systems without case-sensitive file systems.
28 #include "StringImpl.h"
31 #include <objc/objc.h>
35 typedef const struct __CFString
* CFStringRef
;
42 #include <QDataStream>
58 // FIXME: This is a temporary layering violation while we move string code to WTF.
59 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
65 // Declarations of string operations
67 bool charactersAreAllASCII(const UChar
*, size_t);
68 int charactersToIntStrict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
69 unsigned charactersToUIntStrict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
70 int64_t charactersToInt64Strict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
71 uint64_t charactersToUInt64Strict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
72 intptr_t charactersToIntPtrStrict(const UChar
*, size_t, bool* ok
= 0, int base
= 10);
74 int charactersToInt(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
75 unsigned charactersToUInt(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
76 int64_t charactersToInt64(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
77 uint64_t charactersToUInt64(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
78 intptr_t charactersToIntPtr(const UChar
*, size_t, bool* ok
= 0); // ignores trailing garbage
80 double charactersToDouble(const UChar
*, size_t, bool* ok
= 0);
81 float charactersToFloat(const UChar
*, size_t, bool* ok
= 0);
83 int find(const UChar
*, size_t, UChar
, int startPosition
= 0);
84 int reverseFind(const UChar
*, size_t, UChar
, int startPosition
= -1);
88 String() { } // gives null string, distinguishable from an empty string
89 String(const UChar
* str
, unsigned len
)
93 m_impl
= StringImpl::create(str
, len
);
95 String(const char* str
)
99 m_impl
= StringImpl::create(str
);
101 String(const char* str
, unsigned length
)
105 m_impl
= StringImpl::create(str
, length
);
107 String(const UChar
*); // Specifically for null terminated UTF-16
108 String(StringImpl
* i
) : m_impl(i
) { }
109 String(PassRefPtr
<StringImpl
> i
) : m_impl(i
) { }
110 String(RefPtr
<StringImpl
> i
) : m_impl(i
) { }
112 void swap(String
& o
) { m_impl
.swap(o
.m_impl
); }
114 // Hash table deleted values, which are only constructed and never copied or destroyed.
115 String(WTF::HashTableDeletedValueType
) : m_impl(WTF::HashTableDeletedValue
) { }
116 bool isHashTableDeletedValue() const { return m_impl
.isHashTableDeletedValue(); }
118 static String
adopt(StringBuffer
& buffer
) { return StringImpl::adopt(buffer
); }
119 static String
adopt(Vector
<UChar
>& vector
) { return StringImpl::adopt(vector
); }
121 ALWAYS_INLINE
unsigned length() const
125 return m_impl
->length();
128 const UChar
* characters() const
132 return m_impl
->characters();
135 const UChar
* charactersWithNullTermination();
137 UChar
operator[](unsigned i
) const // if i >= length(), returns 0
139 if (!m_impl
|| i
>= m_impl
->length())
141 return m_impl
->characters()[i
];
143 UChar32
characterStartingAt(unsigned) const; // Ditto.
145 bool contains(UChar c
) const { return find(c
) != -1; }
146 bool contains(const char* str
, bool caseSensitive
= true) const { return find(str
, 0, caseSensitive
) != -1; }
147 bool contains(const String
& str
, bool caseSensitive
= true) const { return find(str
, 0, caseSensitive
) != -1; }
149 int find(UChar c
, int start
= 0) const
150 { return m_impl
? m_impl
->find(c
, start
) : -1; }
151 int find(CharacterMatchFunctionPtr matchFunction
, int start
= 0) const
152 { return m_impl
? m_impl
->find(matchFunction
, start
) : -1; }
153 int find(const char* str
, int start
= 0, bool caseSensitive
= true) const
154 { return m_impl
? m_impl
->find(str
, start
, caseSensitive
) : -1; }
155 int find(const String
& str
, int start
= 0, bool caseSensitive
= true) const
156 { return m_impl
? m_impl
->find(str
.impl(), start
, caseSensitive
) : -1; }
158 int reverseFind(UChar c
, int start
= -1) const
159 { return m_impl
? m_impl
->reverseFind(c
, start
) : -1; }
160 int reverseFind(const String
& str
, int start
= -1, bool caseSensitive
= true) const
161 { return m_impl
? m_impl
->reverseFind(str
.impl(), start
, caseSensitive
) : -1; }
163 bool startsWith(const String
& s
, bool caseSensitive
= true) const
164 { return m_impl
? m_impl
->startsWith(s
.impl(), caseSensitive
) : s
.isEmpty(); }
165 bool endsWith(const String
& s
, bool caseSensitive
= true) const
166 { return m_impl
? m_impl
->endsWith(s
.impl(), caseSensitive
) : s
.isEmpty(); }
168 void append(const String
&);
171 void append(const UChar
*, unsigned length
);
172 void insert(const String
&, unsigned pos
);
173 void insert(const UChar
*, unsigned length
, unsigned pos
);
175 String
& replace(UChar a
, UChar b
) { if (m_impl
) m_impl
= m_impl
->replace(a
, b
); return *this; }
176 String
& replace(UChar a
, const String
& b
) { if (m_impl
) m_impl
= m_impl
->replace(a
, b
.impl()); return *this; }
177 String
& replace(const String
& a
, const String
& b
) { if (m_impl
) m_impl
= m_impl
->replace(a
.impl(), b
.impl()); return *this; }
178 String
& replace(unsigned index
, unsigned len
, const String
& b
) { if (m_impl
) m_impl
= m_impl
->replace(index
, len
, b
.impl()); return *this; }
180 void makeLower() { if (m_impl
) m_impl
= m_impl
->lower(); }
181 void makeUpper() { if (m_impl
) m_impl
= m_impl
->upper(); }
182 void makeSecure(UChar aChar
, bool last
= true) { if (m_impl
) m_impl
= m_impl
->secure(aChar
, last
); }
184 void truncate(unsigned len
);
185 void remove(unsigned pos
, int len
= 1);
187 String
substring(unsigned pos
, unsigned len
= UINT_MAX
) const;
188 String
left(unsigned len
) const { return substring(0, len
); }
189 String
right(unsigned len
) const { return substring(length() - len
, len
); }
191 // Returns a lowercase/uppercase version of the string
192 String
lower() const;
193 String
upper() const;
195 String
stripWhiteSpace() const;
196 String
simplifyWhiteSpace() const;
198 String
removeCharacters(CharacterMatchFunctionPtr
) const;
200 // Return the string with case folded for case insensitive comparison.
201 String
foldCase() const;
203 static String
number(short);
204 static String
number(unsigned short);
205 static String
number(int);
206 static String
number(unsigned);
207 static String
number(long);
208 static String
number(unsigned long);
209 static String
number(long long);
210 static String
number(unsigned long long);
211 static String
number(double);
213 static String
format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
215 // Returns an uninitialized string. The characters needs to be written
216 // into the buffer returned in data before the returned string is used.
217 // Failure to do this will have unpredictable results.
218 static String
createUninitialized(unsigned length
, UChar
*& data
) { return StringImpl::createUninitialized(length
, data
); }
220 void split(const String
& separator
, Vector
<String
>& result
) const;
221 void split(const String
& separator
, bool allowEmptyEntries
, Vector
<String
>& result
) const;
222 void split(UChar separator
, Vector
<String
>& result
) const;
223 void split(UChar separator
, bool allowEmptyEntries
, Vector
<String
>& result
) const;
225 int toIntStrict(bool* ok
= 0, int base
= 10) const;
226 unsigned toUIntStrict(bool* ok
= 0, int base
= 10) const;
227 int64_t toInt64Strict(bool* ok
= 0, int base
= 10) const;
228 uint64_t toUInt64Strict(bool* ok
= 0, int base
= 10) const;
229 intptr_t toIntPtrStrict(bool* ok
= 0, int base
= 10) const;
231 int toInt(bool* ok
= 0) const;
232 unsigned toUInt(bool* ok
= 0) const;
233 int64_t toInt64(bool* ok
= 0) const;
234 uint64_t toUInt64(bool* ok
= 0) const;
235 intptr_t toIntPtr(bool* ok
= 0) const;
236 double toDouble(bool* ok
= 0) const;
237 float toFloat(bool* ok
= 0) const;
239 bool percentage(int& percentage
) const;
241 // Returns a StringImpl suitable for use on another thread.
242 String
crossThreadString() const;
243 // Makes a deep copy. Helpful only if you need to use a String on another thread
244 // (use crossThreadString if the method call doesn't need to be threadsafe).
245 // Since the underlying StringImpl objects are immutable, there's no other reason
246 // to ever prefer copy() over plain old assignment.
247 String
threadsafeCopy() const;
249 bool isNull() const { return !m_impl
; }
250 ALWAYS_INLINE
bool isEmpty() const { return !m_impl
|| !m_impl
->length(); }
252 StringImpl
* impl() const { return m_impl
.get(); }
256 CFStringRef
createCFString() const;
262 // This conversion maps NULL to "", which loses the meaning of NULL, but we
263 // need this mapping because AppKit crashes when passed nil NSStrings.
264 operator NSString
*() const { if (!m_impl
) return @
""; return *m_impl
; }
268 String(const QString
&);
269 String(const QStringRef
&);
270 operator QString() const;
274 String(const wxString
&);
275 operator wxString() const;
279 String(const BString
&);
280 operator BString() const;
283 Vector
<char> ascii() const;
285 CString
latin1() const;
286 CString
utf8() const;
288 static String
fromUTF8(const char*, size_t);
289 static String
fromUTF8(const char*);
291 // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8.
292 static String
fromUTF8WithLatin1Fallback(const char*, size_t);
294 // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
295 WTF::Unicode::Direction
defaultWritingDirection() const { return m_impl
? m_impl
->defaultWritingDirection() : WTF::Unicode::LeftToRight
; }
297 bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); }
300 RefPtr
<StringImpl
> m_impl
;
304 QDataStream
& operator<<(QDataStream
& stream
, const String
& str
);
305 QDataStream
& operator>>(QDataStream
& stream
, String
& str
);
308 String
operator+(const String
&, const String
&);
309 String
operator+(const String
&, const char*);
310 String
operator+(const char*, const String
&);
312 inline String
& operator+=(String
& a
, const String
& b
) { a
.append(b
); return a
; }
314 inline bool operator==(const String
& a
, const String
& b
) { return equal(a
.impl(), b
.impl()); }
315 inline bool operator==(const String
& a
, const char* b
) { return equal(a
.impl(), b
); }
316 inline bool operator==(const char* a
, const String
& b
) { return equal(a
, b
.impl()); }
318 inline bool operator!=(const String
& a
, const String
& b
) { return !equal(a
.impl(), b
.impl()); }
319 inline bool operator!=(const String
& a
, const char* b
) { return !equal(a
.impl(), b
); }
320 inline bool operator!=(const char* a
, const String
& b
) { return !equal(a
, b
.impl()); }
322 inline bool equalIgnoringCase(const String
& a
, const String
& b
) { return equalIgnoringCase(a
.impl(), b
.impl()); }
323 inline bool equalIgnoringCase(const String
& a
, const char* b
) { return equalIgnoringCase(a
.impl(), b
); }
324 inline bool equalIgnoringCase(const char* a
, const String
& b
) { return equalIgnoringCase(a
, b
.impl()); }
326 inline bool equalPossiblyIgnoringCase(const String
& a
, const String
& b
, bool ignoreCase
)
328 return ignoreCase
? equalIgnoringCase(a
, b
) : (a
== b
);
331 inline bool equalIgnoringNullity(const String
& a
, const String
& b
) { return equalIgnoringNullity(a
.impl(), b
.impl()); }
333 inline bool operator!(const String
& str
) { return str
.isNull(); }
335 inline void swap(String
& a
, String
& b
) { a
.swap(b
); }
337 // Definitions of string operations
340 // This is for situations in WebKit where the long standing behavior has been
341 // "nil if empty", so we try to maintain longstanding behavior for the sake of
342 // entrenched clients
343 inline NSString
* nsStringNilIfEmpty(const String
& str
) { return str
.isEmpty() ? nil
: (NSString
*)str
; }
346 inline bool charactersAreAllASCII(const UChar
* characters
, size_t length
)
349 for (size_t i
= 0; i
< length
; ++i
)
350 ored
|= characters
[i
];
351 return !(ored
& 0xFF80);
354 inline int find(const UChar
* characters
, size_t length
, UChar character
, int startPosition
)
356 if (startPosition
>= static_cast<int>(length
))
358 for (size_t i
= startPosition
; i
< length
; ++i
) {
359 if (characters
[i
] == character
)
360 return static_cast<int>(i
);
365 inline int find(const UChar
* characters
, size_t length
, CharacterMatchFunctionPtr matchFunction
, int startPosition
)
367 if (startPosition
>= static_cast<int>(length
))
369 for (size_t i
= startPosition
; i
< length
; ++i
) {
370 if (matchFunction(characters
[i
]))
371 return static_cast<int>(i
);
376 inline int reverseFind(const UChar
* characters
, size_t length
, UChar character
, int startPosition
)
378 if (startPosition
>= static_cast<int>(length
) || !length
)
380 if (startPosition
< 0)
381 startPosition
+= static_cast<int>(length
);
383 if (characters
[startPosition
] == character
)
384 return startPosition
;
389 ASSERT_NOT_REACHED();
393 inline void append(Vector
<UChar
>& vector
, const String
& string
)
395 vector
.append(string
.characters(), string
.length());
398 inline void appendNumber(Vector
<UChar
>& vector
, unsigned char number
)
400 int numberLength
= number
> 99 ? 3 : (number
> 9 ? 2 : 1);
401 size_t vectorSize
= vector
.size();
402 vector
.grow(vectorSize
+ numberLength
);
404 switch (numberLength
) {
406 vector
[vectorSize
+ 2] = number
% 10 + '0';
410 vector
[vectorSize
+ 1] = number
% 10 + '0';
414 vector
[vectorSize
] = number
% 10 + '0';
418 } // namespace WebCore
422 // StringHash is the default hash for String
423 template<typename T
> struct DefaultHash
;
424 template<> struct DefaultHash
<WebCore::String
> {
425 typedef WebCore::StringHash Hash
;