2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserved.
4 * Copyright (C) 2007-2009 Torch Mobile, Inc.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
23 #include "WTFString.h"
26 #include <wtf/ASCIICType.h>
27 #include <wtf/text/CString.h>
28 #include <wtf/StringExtras.h>
29 #include <wtf/Vector.h>
31 #include <wtf/unicode/UTF8.h>
32 #include <wtf/unicode/Unicode.h>
35 using namespace WTF::Unicode
;
40 String::String(const UChar
* str
)
46 while (str
[len
] != UChar(0))
49 if (len
> numeric_limits
<unsigned>::max())
52 m_impl
= StringImpl::create(str
, len
);
55 void String::append(const String
& str
)
60 // FIXME: This is extremely inefficient. So much so that we might want to take this
61 // out of String's API. We can make it better by optimizing the case where exactly
62 // one String is pointing at this StringImpl, but even then it's going to require a
63 // call to fastMalloc every single time.
67 if (str
.length() > numeric_limits
<unsigned>::max() - m_impl
->length())
69 RefPtr
<StringImpl
> newImpl
= StringImpl::createUninitialized(m_impl
->length() + str
.length(), data
);
70 memcpy(data
, m_impl
->characters(), m_impl
->length() * sizeof(UChar
));
71 memcpy(data
+ m_impl
->length(), str
.characters(), str
.length() * sizeof(UChar
));
72 m_impl
= newImpl
.release();
78 void String::append(char c
)
80 // FIXME: This is extremely inefficient. So much so that we might want to take this
81 // out of String's API. We can make it better by optimizing the case where exactly
82 // one String is pointing at this StringImpl, but even then it's going to require a
83 // call to fastMalloc every single time.
86 if (m_impl
->length() >= numeric_limits
<unsigned>::max())
88 RefPtr
<StringImpl
> newImpl
= StringImpl::createUninitialized(m_impl
->length() + 1, data
);
89 memcpy(data
, m_impl
->characters(), m_impl
->length() * sizeof(UChar
));
90 data
[m_impl
->length()] = c
;
91 m_impl
= newImpl
.release();
93 m_impl
= StringImpl::create(&c
, 1);
96 void String::append(UChar c
)
98 // FIXME: This is extremely inefficient. So much so that we might want to take this
99 // out of String's API. We can make it better by optimizing the case where exactly
100 // one String is pointing at this StringImpl, but even then it's going to require a
101 // call to fastMalloc every single time.
104 if (m_impl
->length() >= numeric_limits
<unsigned>::max())
106 RefPtr
<StringImpl
> newImpl
= StringImpl::createUninitialized(m_impl
->length() + 1, data
);
107 memcpy(data
, m_impl
->characters(), m_impl
->length() * sizeof(UChar
));
108 data
[m_impl
->length()] = c
;
109 m_impl
= newImpl
.release();
111 m_impl
= StringImpl::create(&c
, 1);
114 String
operator+(const String
& a
, const String
& b
)
125 String
operator+(const String
& s
, const char* cs
)
127 return s
+ String(cs
);
130 String
operator+(const char* cs
, const String
& s
)
132 return String(cs
) + s
;
135 void String::insert(const String
& str
, unsigned pos
)
144 insert(str
.characters(), str
.length(), pos
);
147 void String::append(const UChar
* charactersToAppend
, unsigned lengthToAppend
)
150 if (!charactersToAppend
)
152 m_impl
= StringImpl::create(charactersToAppend
, lengthToAppend
);
159 ASSERT(charactersToAppend
);
161 if (lengthToAppend
> numeric_limits
<unsigned>::max() - length())
163 RefPtr
<StringImpl
> newImpl
= StringImpl::createUninitialized(length() + lengthToAppend
, data
);
164 memcpy(data
, characters(), length() * sizeof(UChar
));
165 memcpy(data
+ length(), charactersToAppend
, lengthToAppend
* sizeof(UChar
));
166 m_impl
= newImpl
.release();
169 void String::insert(const UChar
* charactersToInsert
, unsigned lengthToInsert
, unsigned position
)
171 if (position
>= length()) {
172 append(charactersToInsert
, lengthToInsert
);
181 ASSERT(charactersToInsert
);
183 if (lengthToInsert
> numeric_limits
<unsigned>::max() - length())
185 RefPtr
<StringImpl
> newImpl
= StringImpl::createUninitialized(length() + lengthToInsert
, data
);
186 memcpy(data
, characters(), position
* sizeof(UChar
));
187 memcpy(data
+ position
, charactersToInsert
, lengthToInsert
* sizeof(UChar
));
188 memcpy(data
+ position
+ lengthToInsert
, characters() + position
, (length() - position
) * sizeof(UChar
));
189 m_impl
= newImpl
.release();
192 UChar32
String::characterStartingAt(unsigned i
) const
194 if (!m_impl
|| i
>= m_impl
->length())
196 return m_impl
->characterStartingAt(i
);
199 void String::truncate(unsigned position
)
201 if (position
>= length())
204 RefPtr
<StringImpl
> newImpl
= StringImpl::createUninitialized(position
, data
);
205 memcpy(data
, characters(), position
* sizeof(UChar
));
206 m_impl
= newImpl
.release();
209 void String::remove(unsigned position
, int lengthToRemove
)
211 if (lengthToRemove
<= 0)
213 if (position
>= length())
215 if (static_cast<unsigned>(lengthToRemove
) > length() - position
)
216 lengthToRemove
= length() - position
;
218 RefPtr
<StringImpl
> newImpl
= StringImpl::createUninitialized(length() - lengthToRemove
, data
);
219 memcpy(data
, characters(), position
* sizeof(UChar
));
220 memcpy(data
+ position
, characters() + position
+ lengthToRemove
,
221 (length() - lengthToRemove
- position
) * sizeof(UChar
));
222 m_impl
= newImpl
.release();
225 String
String::substring(unsigned pos
, unsigned len
) const
229 return m_impl
->substring(pos
, len
);
232 String
String::lower() const
236 return m_impl
->lower();
239 String
String::upper() const
243 return m_impl
->upper();
246 String
String::stripWhiteSpace() const
250 return m_impl
->stripWhiteSpace();
253 String
String::simplifyWhiteSpace() const
257 return m_impl
->simplifyWhiteSpace();
260 String
String::removeCharacters(CharacterMatchFunctionPtr findMatch
) const
264 return m_impl
->removeCharacters(findMatch
);
267 String
String::foldCase() const
271 return m_impl
->foldCase();
274 bool String::percentage(int& result
) const
276 if (!m_impl
|| !m_impl
->length())
279 if ((*m_impl
)[m_impl
->length() - 1] != '%')
282 result
= charactersToIntStrict(m_impl
->characters(), m_impl
->length() - 1);
286 const UChar
* String::charactersWithNullTermination()
290 if (m_impl
->hasTerminatingNullCharacter())
291 return m_impl
->characters();
292 m_impl
= StringImpl::createWithTerminatingNullCharacter(*m_impl
);
293 return m_impl
->characters();
296 String
String::format(const char *format
, ...)
299 // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf.
300 // https://bugs.webkit.org/show_bug.cgi?id=18994
302 va_start(args
, format
);
305 buffer
.vsprintf(format
, args
);
313 va_start(args
, format
);
315 Vector
<char, 256> buffer
;
317 int bufferSize
= 256;
318 buffer
.resize(bufferSize
);
320 int written
= vsnprintf(buffer
.data(), bufferSize
, format
, args
);
326 return StringImpl::create(buffer
.data(), written
);
329 buffer
.resize(bufferSize
);
330 va_start(args
, format
);
335 va_start(args
, format
);
337 Vector
<char, 256> buffer
;
339 // Do the format once to get the length.
341 int result
= _vscprintf(format
, args
);
344 int result
= vsnprintf(&ch
, 1, format
, args
);
345 // We need to call va_end() and then va_start() again here, as the
346 // contents of args is undefined after the call to vsnprintf
347 // according to http://man.cx/snprintf(3)
349 // Not calling va_end/va_start here happens to work on lots of
350 // systems, but fails e.g. on 64bit Linux.
352 va_start(args
, format
);
359 unsigned len
= result
;
360 buffer
.grow(len
+ 1);
362 // Now do the formatting again, guaranteed to fit.
363 vsnprintf(buffer
.data(), buffer
.size(), format
, args
);
367 return StringImpl::create(buffer
.data(), len
);
371 String
String::number(short n
)
373 return String::format("%hd", n
);
376 String
String::number(unsigned short n
)
378 return String::format("%hu", n
);
381 String
String::number(int n
)
383 return String::format("%d", n
);
386 String
String::number(unsigned n
)
388 return String::format("%u", n
);
391 String
String::number(long n
)
393 return String::format("%ld", n
);
396 String
String::number(unsigned long n
)
398 return String::format("%lu", n
);
401 String
String::number(long long n
)
403 #if OS(WINDOWS) && !PLATFORM(QT)
404 return String::format("%I64i", n
);
406 return String::format("%lli", n
);
410 String
String::number(unsigned long long n
)
412 #if OS(WINDOWS) && !PLATFORM(QT)
413 return String::format("%I64u", n
);
415 return String::format("%llu", n
);
419 String
String::number(double n
)
421 return String::format("%.6lg", n
);
424 int String::toIntStrict(bool* ok
, int base
) const
431 return m_impl
->toIntStrict(ok
, base
);
434 unsigned String::toUIntStrict(bool* ok
, int base
) const
441 return m_impl
->toUIntStrict(ok
, base
);
444 int64_t String::toInt64Strict(bool* ok
, int base
) const
451 return m_impl
->toInt64Strict(ok
, base
);
454 uint64_t String::toUInt64Strict(bool* ok
, int base
) const
461 return m_impl
->toUInt64Strict(ok
, base
);
464 intptr_t String::toIntPtrStrict(bool* ok
, int base
) const
471 return m_impl
->toIntPtrStrict(ok
, base
);
475 int String::toInt(bool* ok
) const
482 return m_impl
->toInt(ok
);
485 unsigned String::toUInt(bool* ok
) const
492 return m_impl
->toUInt(ok
);
495 int64_t String::toInt64(bool* ok
) const
502 return m_impl
->toInt64(ok
);
505 uint64_t String::toUInt64(bool* ok
) const
512 return m_impl
->toUInt64(ok
);
515 intptr_t String::toIntPtr(bool* ok
) const
522 return m_impl
->toIntPtr(ok
);
525 double String::toDouble(bool* ok
) const
532 return m_impl
->toDouble(ok
);
535 float String::toFloat(bool* ok
) const
542 return m_impl
->toFloat(ok
);
545 String
String::threadsafeCopy() const
549 return m_impl
->threadsafeCopy();
552 String
String::crossThreadString() const
556 return m_impl
->crossThreadString();
559 void String::split(const String
& separator
, bool allowEmptyEntries
, Vector
<String
>& result
) const
565 while ((endPos
= find(separator
, startPos
)) != -1) {
566 if (allowEmptyEntries
|| startPos
!= endPos
)
567 result
.append(substring(startPos
, endPos
- startPos
));
568 startPos
= endPos
+ separator
.length();
570 if (allowEmptyEntries
|| startPos
!= static_cast<int>(length()))
571 result
.append(substring(startPos
));
574 void String::split(const String
& separator
, Vector
<String
>& result
) const
576 return split(separator
, false, result
);
579 void String::split(UChar separator
, bool allowEmptyEntries
, Vector
<String
>& result
) const
585 while ((endPos
= find(separator
, startPos
)) != -1) {
586 if (allowEmptyEntries
|| startPos
!= endPos
)
587 result
.append(substring(startPos
, endPos
- startPos
));
588 startPos
= endPos
+ 1;
590 if (allowEmptyEntries
|| startPos
!= static_cast<int>(length()))
591 result
.append(substring(startPos
));
594 void String::split(UChar separator
, Vector
<String
>& result
) const
596 return split(String(&separator
, 1), false, result
);
599 Vector
<char> String::ascii() const
602 return m_impl
->ascii();
604 const char* nullMsg
= "(null impl)";
605 Vector
<char, 2048> buffer
;
606 for (int i
= 0; nullMsg
[i
]; ++i
)
607 buffer
.append(nullMsg
[i
]);
613 CString
String::latin1() const
615 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
616 // preserved, characters outside of this range are converted to '?'.
618 unsigned length
= this->length();
619 const UChar
* characters
= this->characters();
621 char* characterBuffer
;
622 CString result
= CString::newUninitialized(length
, characterBuffer
);
624 for (unsigned i
= 0; i
< length
; ++i
) {
625 UChar ch
= characters
[i
];
626 characterBuffer
[i
] = ch
> 255 ? '?' : ch
;
632 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
633 static inline void putUTF8Triple(char*& buffer
, UChar ch
)
635 ASSERT(ch
>= 0x0800);
636 *buffer
++ = static_cast<char>(((ch
>> 12) & 0x0F) | 0xE0);
637 *buffer
++ = static_cast<char>(((ch
>> 6) & 0x3F) | 0x80);
638 *buffer
++ = static_cast<char>((ch
& 0x3F) | 0x80);
641 CString
String::utf8() const
643 unsigned length
= this->length();
644 const UChar
* characters
= this->characters();
646 // Allocate a buffer big enough to hold all the characters
647 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
648 // Optimization ideas, if we find this function is hot:
649 // * We could speculatively create a CStringBuffer to contain 'length'
650 // characters, and resize if necessary (i.e. if the buffer contains
651 // non-ascii characters). (Alternatively, scan the buffer first for
652 // ascii characters, so we know this will be sufficient).
653 // * We could allocate a CStringBuffer with an appropriate size to
654 // have a good chance of being able to write the string into the
655 // buffer without reallocing (say, 1.5 x length).
656 if (length
> numeric_limits
<unsigned>::max() / 3)
658 Vector
<char, 1024> bufferVector(length
* 3);
660 char* buffer
= bufferVector
.data();
661 ConversionResult result
= convertUTF16ToUTF8(&characters
, characters
+ length
, &buffer
, buffer
+ bufferVector
.size(), false);
662 ASSERT(result
!= sourceIllegal
); // Only produced from strict conversion.
663 ASSERT(result
!= targetExhausted
); // (length * 3) should be sufficient for any conversion
665 // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate
666 // would have been handled in the middle of a string with non-strict conversion - which is to say,
667 // simply encode it to UTF-8.
668 if (result
== sourceExhausted
) {
669 // This should be one unpaired high surrogate.
670 ASSERT((characters
+ 1) == (characters
+ length
));
671 ASSERT((*characters
>= 0xD800) && (*characters
<= 0xDBFF));
672 // There should be room left, since one UChar hasn't been converted.
673 ASSERT((buffer
+ 3) <= (buffer
+ bufferVector
.size()));
674 putUTF8Triple(buffer
, *characters
);
677 return CString(bufferVector
.data(), buffer
- bufferVector
.data());
680 String
String::fromUTF8(const char* stringStart
, size_t length
)
682 if (length
> numeric_limits
<unsigned>::max())
688 // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be
689 // the right length, if there are any multi-byte sequences this buffer will be too large.
691 String
stringBuffer(StringImpl::createUninitialized(length
, buffer
));
692 UChar
* bufferEnd
= buffer
+ length
;
694 // Try converting into the buffer.
695 const char* stringCurrent
= stringStart
;
696 if (convertUTF8ToUTF16(&stringCurrent
, stringStart
+ length
, &buffer
, bufferEnd
) != conversionOK
)
699 // stringBuffer is full (the input must have been all ascii) so just return it!
700 if (buffer
== bufferEnd
)
703 // stringBuffer served its purpose as a buffer, copy the contents out into a new string.
704 unsigned utf16Length
= buffer
- stringBuffer
.characters();
705 ASSERT(utf16Length
< length
);
706 return String(stringBuffer
.characters(), utf16Length
);
709 String
String::fromUTF8(const char* string
)
713 return fromUTF8(string
, strlen(string
));
716 String
String::fromUTF8WithLatin1Fallback(const char* string
, size_t size
)
718 String utf8
= fromUTF8(string
, size
);
720 return String(string
, size
);
726 static bool isCharacterAllowedInBase(UChar c
, int base
)
731 return c
- '0' < base
;
732 if (isASCIIAlpha(c
)) {
735 return (c
>= 'a' && c
< 'a' + base
- 10)
736 || (c
>= 'A' && c
< 'A' + base
- 10);
741 template <typename IntegralType
>
742 static inline IntegralType
toIntegralType(const UChar
* data
, size_t length
, bool* ok
, int base
)
744 static const IntegralType integralMax
= numeric_limits
<IntegralType
>::max();
745 static const bool isSigned
= numeric_limits
<IntegralType
>::is_signed
;
746 const IntegralType maxMultiplier
= integralMax
/ base
;
748 IntegralType value
= 0;
750 bool isNegative
= false;
755 // skip leading whitespace
756 while (length
&& isSpaceOrNewline(*data
)) {
761 if (isSigned
&& length
&& *data
== '-') {
765 } else if (length
&& *data
== '+') {
770 if (!length
|| !isCharacterAllowedInBase(*data
, base
))
773 while (length
&& isCharacterAllowedInBase(*data
, base
)) {
775 IntegralType digitValue
;
778 digitValue
= c
- '0';
780 digitValue
= c
- 'a' + 10;
782 digitValue
= c
- 'A' + 10;
784 if (value
> maxMultiplier
|| (value
== maxMultiplier
&& digitValue
> (integralMax
% base
) + isNegative
))
787 value
= base
* value
+ digitValue
;
792 #pragma warning(push, 0)
793 #pragma warning(disable:4146)
803 // skip trailing space
804 while (length
&& isSpaceOrNewline(*data
)) {
814 return isOk
? value
: 0;
817 static unsigned lengthOfCharactersAsInteger(const UChar
* data
, size_t length
)
821 // Allow leading spaces.
822 for (; i
!= length
; ++i
) {
823 if (!isSpaceOrNewline(data
[i
]))
828 if (i
!= length
&& (data
[i
] == '+' || data
[i
] == '-'))
832 for (; i
!= length
; ++i
) {
833 if (!isASCIIDigit(data
[i
]))
840 int charactersToIntStrict(const UChar
* data
, size_t length
, bool* ok
, int base
)
842 return toIntegralType
<int>(data
, length
, ok
, base
);
845 unsigned charactersToUIntStrict(const UChar
* data
, size_t length
, bool* ok
, int base
)
847 return toIntegralType
<unsigned>(data
, length
, ok
, base
);
850 int64_t charactersToInt64Strict(const UChar
* data
, size_t length
, bool* ok
, int base
)
852 return toIntegralType
<int64_t>(data
, length
, ok
, base
);
855 uint64_t charactersToUInt64Strict(const UChar
* data
, size_t length
, bool* ok
, int base
)
857 return toIntegralType
<uint64_t>(data
, length
, ok
, base
);
860 intptr_t charactersToIntPtrStrict(const UChar
* data
, size_t length
, bool* ok
, int base
)
862 return toIntegralType
<intptr_t>(data
, length
, ok
, base
);
865 int charactersToInt(const UChar
* data
, size_t length
, bool* ok
)
867 return toIntegralType
<int>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
870 unsigned charactersToUInt(const UChar
* data
, size_t length
, bool* ok
)
872 return toIntegralType
<unsigned>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
875 int64_t charactersToInt64(const UChar
* data
, size_t length
, bool* ok
)
877 return toIntegralType
<int64_t>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
880 uint64_t charactersToUInt64(const UChar
* data
, size_t length
, bool* ok
)
882 return toIntegralType
<uint64_t>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
885 intptr_t charactersToIntPtr(const UChar
* data
, size_t length
, bool* ok
)
887 return toIntegralType
<intptr_t>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
890 double charactersToDouble(const UChar
* data
, size_t length
, bool* ok
)
898 Vector
<char, 256> bytes(length
+ 1);
899 for (unsigned i
= 0; i
< length
; ++i
)
900 bytes
[i
] = data
[i
] < 0x7F ? data
[i
] : '?';
901 bytes
[length
] = '\0';
903 double val
= WTF::strtod(bytes
.data(), &end
);
905 *ok
= (end
== 0 || *end
== '\0');
909 float charactersToFloat(const UChar
* data
, size_t length
, bool* ok
)
911 // FIXME: This will return ok even when the string fits into a double but not a float.
912 return static_cast<float>(charactersToDouble(data
, length
, ok
));
915 } // namespace WebCore
918 // For use in the debugger - leaks memory
919 WebCore::String
* string(const char*);
921 WebCore::String
* string(const char* s
)
923 return new WebCore::String(s
);