2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2007-2009 Torch Mobile, Inc.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
23 #include "WTFString.h"
27 #include <wtf/ASCIICType.h>
28 #include <wtf/text/CString.h>
29 #include <wtf/StringExtras.h>
30 #include <wtf/Vector.h>
32 #include <wtf/unicode/UTF8.h>
33 #include <wtf/unicode/Unicode.h>
36 using namespace WTF::Unicode
;
40 String::String(const UChar
* str
)
46 while (str
[len
] != UChar(0))
49 m_impl
= StringImpl::create(str
, len
);
52 void String::append(const String
& str
)
57 // FIXME: This is extremely inefficient. So much so that we might want to take this
58 // out of String's API. We can make it better by optimizing the case where exactly
59 // one String is pointing at this StringImpl, but even then it's going to require a
60 // call to fastMalloc every single time.
64 RefPtr
<StringImpl
> newImpl
=
65 StringImpl::createUninitialized(m_impl
->length() + str
.length(), data
);
66 memcpy(data
, m_impl
->characters(), m_impl
->length() * sizeof(UChar
));
67 memcpy(data
+ m_impl
->length(), str
.characters(), str
.length() * sizeof(UChar
));
68 m_impl
= newImpl
.release();
74 void String::append(char c
)
76 // FIXME: This is extremely inefficient. So much so that we might want to take this
77 // out of String's API. We can make it better by optimizing the case where exactly
78 // one String is pointing at this StringImpl, but even then it's going to require a
79 // call to fastMalloc every single time.
82 RefPtr
<StringImpl
> newImpl
=
83 StringImpl::createUninitialized(m_impl
->length() + 1, data
);
84 memcpy(data
, m_impl
->characters(), m_impl
->length() * sizeof(UChar
));
85 data
[m_impl
->length()] = c
;
86 m_impl
= newImpl
.release();
88 m_impl
= StringImpl::create(&c
, 1);
91 void String::append(UChar c
)
93 // FIXME: This is extremely inefficient. So much so that we might want to take this
94 // out of String's API. We can make it better by optimizing the case where exactly
95 // one String is pointing at this StringImpl, but even then it's going to require a
96 // call to fastMalloc every single time.
99 RefPtr
<StringImpl
> newImpl
=
100 StringImpl::createUninitialized(m_impl
->length() + 1, data
);
101 memcpy(data
, m_impl
->characters(), m_impl
->length() * sizeof(UChar
));
102 data
[m_impl
->length()] = c
;
103 m_impl
= newImpl
.release();
105 m_impl
= StringImpl::create(&c
, 1);
108 String
operator+(const String
& a
, const String
& b
)
119 String
operator+(const String
& s
, const char* cs
)
121 return s
+ String(cs
);
124 String
operator+(const char* cs
, const String
& s
)
126 return String(cs
) + s
;
129 void String::insert(const String
& str
, unsigned pos
)
138 insert(str
.characters(), str
.length(), pos
);
141 void String::append(const UChar
* charactersToAppend
, unsigned lengthToAppend
)
144 if (!charactersToAppend
)
146 m_impl
= StringImpl::create(charactersToAppend
, lengthToAppend
);
153 ASSERT(charactersToAppend
);
155 RefPtr
<StringImpl
> newImpl
=
156 StringImpl::createUninitialized(length() + lengthToAppend
, data
);
157 memcpy(data
, characters(), length() * sizeof(UChar
));
158 memcpy(data
+ length(), charactersToAppend
, lengthToAppend
* sizeof(UChar
));
159 m_impl
= newImpl
.release();
162 void String::insert(const UChar
* charactersToInsert
, unsigned lengthToInsert
, unsigned position
)
164 if (position
>= length()) {
165 append(charactersToInsert
, lengthToInsert
);
174 ASSERT(charactersToInsert
);
176 RefPtr
<StringImpl
> newImpl
=
177 StringImpl::createUninitialized(length() + lengthToInsert
, data
);
178 memcpy(data
, characters(), position
* sizeof(UChar
));
179 memcpy(data
+ position
, charactersToInsert
, lengthToInsert
* sizeof(UChar
));
180 memcpy(data
+ position
+ lengthToInsert
, characters() + position
, (length() - position
) * sizeof(UChar
));
181 m_impl
= newImpl
.release();
184 UChar32
String::characterStartingAt(unsigned i
) const
186 if (!m_impl
|| i
>= m_impl
->length())
188 return m_impl
->characterStartingAt(i
);
191 void String::truncate(unsigned position
)
193 if (position
>= length())
196 RefPtr
<StringImpl
> newImpl
= StringImpl::createUninitialized(position
, data
);
197 memcpy(data
, characters(), position
* sizeof(UChar
));
198 m_impl
= newImpl
.release();
201 void String::remove(unsigned position
, int lengthToRemove
)
203 if (lengthToRemove
<= 0)
205 if (position
>= length())
207 if (static_cast<unsigned>(lengthToRemove
) > length() - position
)
208 lengthToRemove
= length() - position
;
210 RefPtr
<StringImpl
> newImpl
=
211 StringImpl::createUninitialized(length() - lengthToRemove
, data
);
212 memcpy(data
, characters(), position
* sizeof(UChar
));
213 memcpy(data
+ position
, characters() + position
+ lengthToRemove
,
214 (length() - lengthToRemove
- position
) * sizeof(UChar
));
215 m_impl
= newImpl
.release();
218 String
String::substring(unsigned pos
, unsigned len
) const
222 return m_impl
->substring(pos
, len
);
225 String
String::lower() const
229 return m_impl
->lower();
232 String
String::upper() const
236 return m_impl
->upper();
239 String
String::stripWhiteSpace() const
243 return m_impl
->stripWhiteSpace();
246 String
String::simplifyWhiteSpace() const
250 return m_impl
->simplifyWhiteSpace();
253 String
String::removeCharacters(CharacterMatchFunctionPtr findMatch
) const
257 return m_impl
->removeCharacters(findMatch
);
260 String
String::foldCase() const
264 return m_impl
->foldCase();
267 bool String::percentage(int& result
) const
269 if (!m_impl
|| !m_impl
->length())
272 if ((*m_impl
)[m_impl
->length() - 1] != '%')
275 result
= charactersToIntStrict(m_impl
->characters(), m_impl
->length() - 1);
279 const UChar
* String::charactersWithNullTermination()
283 if (m_impl
->hasTerminatingNullCharacter())
284 return m_impl
->characters();
285 m_impl
= StringImpl::createWithTerminatingNullCharacter(*m_impl
);
286 return m_impl
->characters();
289 String
String::format(const char *format
, ...)
292 // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf.
293 // https://bugs.webkit.org/show_bug.cgi?id=18994
295 va_start(args
, format
);
298 buffer
.vsprintf(format
, args
);
306 va_start(args
, format
);
308 Vector
<char, 256> buffer
;
310 int bufferSize
= 256;
311 buffer
.resize(bufferSize
);
313 int written
= vsnprintf(buffer
.data(), bufferSize
, format
, args
);
319 return StringImpl::create(buffer
.data(), written
);
322 buffer
.resize(bufferSize
);
323 va_start(args
, format
);
328 va_start(args
, format
);
330 Vector
<char, 256> buffer
;
332 // Do the format once to get the length.
334 int result
= _vscprintf(format
, args
);
337 int result
= vsnprintf(&ch
, 1, format
, args
);
338 // We need to call va_end() and then va_start() again here, as the
339 // contents of args is undefined after the call to vsnprintf
340 // according to http://man.cx/snprintf(3)
342 // Not calling va_end/va_start here happens to work on lots of
343 // systems, but fails e.g. on 64bit Linux.
345 va_start(args
, format
);
352 unsigned len
= result
;
353 buffer
.grow(len
+ 1);
355 // Now do the formatting again, guaranteed to fit.
356 vsnprintf(buffer
.data(), buffer
.size(), format
, args
);
360 return StringImpl::create(buffer
.data(), len
);
364 String
String::number(short n
)
366 return String::format("%hd", n
);
369 String
String::number(unsigned short n
)
371 return String::format("%hu", n
);
374 String
String::number(int n
)
376 return String::format("%d", n
);
379 String
String::number(unsigned n
)
381 return String::format("%u", n
);
384 String
String::number(long n
)
386 return String::format("%ld", n
);
389 String
String::number(unsigned long n
)
391 return String::format("%lu", n
);
394 String
String::number(long long n
)
396 #if OS(WINDOWS) && !PLATFORM(QT)
397 return String::format("%I64i", n
);
399 return String::format("%lli", n
);
403 String
String::number(unsigned long long n
)
405 #if OS(WINDOWS) && !PLATFORM(QT)
406 return String::format("%I64u", n
);
408 return String::format("%llu", n
);
412 String
String::number(double n
)
414 return String::format("%.6lg", n
);
417 int String::toIntStrict(bool* ok
, int base
) const
424 return m_impl
->toIntStrict(ok
, base
);
427 unsigned String::toUIntStrict(bool* ok
, int base
) const
434 return m_impl
->toUIntStrict(ok
, base
);
437 int64_t String::toInt64Strict(bool* ok
, int base
) const
444 return m_impl
->toInt64Strict(ok
, base
);
447 uint64_t String::toUInt64Strict(bool* ok
, int base
) const
454 return m_impl
->toUInt64Strict(ok
, base
);
457 intptr_t String::toIntPtrStrict(bool* ok
, int base
) const
464 return m_impl
->toIntPtrStrict(ok
, base
);
468 int String::toInt(bool* ok
) const
475 return m_impl
->toInt(ok
);
478 unsigned String::toUInt(bool* ok
) const
485 return m_impl
->toUInt(ok
);
488 int64_t String::toInt64(bool* ok
) const
495 return m_impl
->toInt64(ok
);
498 uint64_t String::toUInt64(bool* ok
) const
505 return m_impl
->toUInt64(ok
);
508 intptr_t String::toIntPtr(bool* ok
) const
515 return m_impl
->toIntPtr(ok
);
518 double String::toDouble(bool* ok
) const
525 return m_impl
->toDouble(ok
);
528 float String::toFloat(bool* ok
) const
535 return m_impl
->toFloat(ok
);
538 String
String::threadsafeCopy() const
542 return m_impl
->threadsafeCopy();
545 String
String::crossThreadString() const
549 return m_impl
->crossThreadString();
552 void String::split(const String
& separator
, bool allowEmptyEntries
, Vector
<String
>& result
) const
558 while ((endPos
= find(separator
, startPos
)) != -1) {
559 if (allowEmptyEntries
|| startPos
!= endPos
)
560 result
.append(substring(startPos
, endPos
- startPos
));
561 startPos
= endPos
+ separator
.length();
563 if (allowEmptyEntries
|| startPos
!= static_cast<int>(length()))
564 result
.append(substring(startPos
));
567 void String::split(const String
& separator
, Vector
<String
>& result
) const
569 return split(separator
, false, result
);
572 void String::split(UChar separator
, bool allowEmptyEntries
, Vector
<String
>& result
) const
578 while ((endPos
= find(separator
, startPos
)) != -1) {
579 if (allowEmptyEntries
|| startPos
!= endPos
)
580 result
.append(substring(startPos
, endPos
- startPos
));
581 startPos
= endPos
+ 1;
583 if (allowEmptyEntries
|| startPos
!= static_cast<int>(length()))
584 result
.append(substring(startPos
));
587 void String::split(UChar separator
, Vector
<String
>& result
) const
589 return split(String(&separator
, 1), false, result
);
592 Vector
<char> String::ascii() const
595 return m_impl
->ascii();
597 const char* nullMsg
= "(null impl)";
598 Vector
<char, 2048> buffer
;
599 for (int i
= 0; nullMsg
[i
]; ++i
)
600 buffer
.append(nullMsg
[i
]);
606 CString
String::latin1() const
608 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
609 // preserved, characters outside of this range are converted to '?'.
611 unsigned length
= this->length();
612 const UChar
* characters
= this->characters();
614 char* characterBuffer
;
615 CString result
= CString::newUninitialized(length
, characterBuffer
);
617 for (unsigned i
= 0; i
< length
; ++i
) {
618 UChar ch
= characters
[i
];
619 characterBuffer
[i
] = ch
> 255 ? '?' : ch
;
625 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
626 static inline void putUTF8Triple(char*& buffer
, UChar ch
)
628 ASSERT(ch
>= 0x0800);
629 *buffer
++ = static_cast<char>(((ch
>> 12) & 0x0F) | 0xE0);
630 *buffer
++ = static_cast<char>(((ch
>> 6) & 0x3F) | 0x80);
631 *buffer
++ = static_cast<char>((ch
& 0x3F) | 0x80);
634 CString
String::utf8() const
636 unsigned length
= this->length();
637 const UChar
* characters
= this->characters();
639 // Allocate a buffer big enough to hold all the characters
640 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
641 // Optimization ideas, if we find this function is hot:
642 // * We could speculatively create a CStringBuffer to contain 'length'
643 // characters, and resize if necessary (i.e. if the buffer contains
644 // non-ascii characters). (Alternatively, scan the buffer first for
645 // ascii characters, so we know this will be sufficient).
646 // * We could allocate a CStringBuffer with an appropriate size to
647 // have a good chance of being able to write the string into the
648 // buffer without reallocing (say, 1.5 x length).
649 Vector
<char, 1024> bufferVector(length
* 3);
651 char* buffer
= bufferVector
.data();
652 ConversionResult result
= convertUTF16ToUTF8(&characters
, characters
+ length
, &buffer
, buffer
+ bufferVector
.size(), false);
653 ASSERT(result
!= sourceIllegal
); // Only produced from strict conversion.
654 ASSERT(result
!= targetExhausted
); // (length * 3) should be sufficient for any conversion
656 // If a high surrogate is left unconverted, treat it the same was as an unpaired high surrogate
657 // would have been handled in the middle of a string with non-strict conversion - which is to say,
658 // simply encode it to UTF-8.
659 if (result
== sourceExhausted
) {
660 // This should be one unpaired high surrogate.
661 ASSERT((characters
+ 1) == (characters
+ length
));
662 ASSERT((*characters
>= 0xD800) && (*characters
<= 0xDBFF));
663 // There should be room left, since one UChar hasn't been converted.
664 ASSERT((buffer
+ 3) <= (buffer
+ bufferVector
.size()));
665 putUTF8Triple(buffer
, *characters
);
668 return CString(bufferVector
.data(), buffer
- bufferVector
.data());
671 String
String::fromUTF8(const char* stringStart
, size_t length
)
676 // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be
677 // the right length, if there are any multi-byte sequences this buffer will be too large.
679 String
stringBuffer(StringImpl::createUninitialized(length
, buffer
));
680 UChar
* bufferEnd
= buffer
+ length
;
682 // Try converting into the buffer.
683 const char* stringCurrent
= stringStart
;
684 if (convertUTF8ToUTF16(&stringCurrent
, stringStart
+ length
, &buffer
, bufferEnd
) != conversionOK
)
687 // stringBuffer is full (the input must have been all ascii) so just return it!
688 if (buffer
== bufferEnd
)
691 // stringBuffer served its purpose as a buffer, copy the contents out into a new string.
692 unsigned utf16Length
= buffer
- stringBuffer
.characters();
693 ASSERT(utf16Length
< length
);
694 return String(stringBuffer
.characters(), utf16Length
);
697 String
String::fromUTF8(const char* string
)
701 return fromUTF8(string
, strlen(string
));
704 String
String::fromUTF8WithLatin1Fallback(const char* string
, size_t size
)
706 String utf8
= fromUTF8(string
, size
);
708 return String(string
, size
);
714 static bool isCharacterAllowedInBase(UChar c
, int base
)
719 return c
- '0' < base
;
720 if (isASCIIAlpha(c
)) {
723 return (c
>= 'a' && c
< 'a' + base
- 10)
724 || (c
>= 'A' && c
< 'A' + base
- 10);
729 template <typename IntegralType
>
730 static inline IntegralType
toIntegralType(const UChar
* data
, size_t length
, bool* ok
, int base
)
732 static const IntegralType integralMax
= std::numeric_limits
<IntegralType
>::max();
733 static const bool isSigned
= std::numeric_limits
<IntegralType
>::is_signed
;
734 const IntegralType maxMultiplier
= integralMax
/ base
;
736 IntegralType value
= 0;
738 bool isNegative
= false;
743 // skip leading whitespace
744 while (length
&& isSpaceOrNewline(*data
)) {
749 if (isSigned
&& length
&& *data
== '-') {
753 } else if (length
&& *data
== '+') {
758 if (!length
|| !isCharacterAllowedInBase(*data
, base
))
761 while (length
&& isCharacterAllowedInBase(*data
, base
)) {
763 IntegralType digitValue
;
766 digitValue
= c
- '0';
768 digitValue
= c
- 'a' + 10;
770 digitValue
= c
- 'A' + 10;
772 if (value
> maxMultiplier
|| (value
== maxMultiplier
&& digitValue
> (integralMax
% base
) + isNegative
))
775 value
= base
* value
+ digitValue
;
780 #pragma warning(push, 0)
781 #pragma warning(disable:4146)
791 // skip trailing space
792 while (length
&& isSpaceOrNewline(*data
)) {
802 return isOk
? value
: 0;
805 static unsigned lengthOfCharactersAsInteger(const UChar
* data
, size_t length
)
809 // Allow leading spaces.
810 for (; i
!= length
; ++i
) {
811 if (!isSpaceOrNewline(data
[i
]))
816 if (i
!= length
&& (data
[i
] == '+' || data
[i
] == '-'))
820 for (; i
!= length
; ++i
) {
821 if (!isASCIIDigit(data
[i
]))
828 int charactersToIntStrict(const UChar
* data
, size_t length
, bool* ok
, int base
)
830 return toIntegralType
<int>(data
, length
, ok
, base
);
833 unsigned charactersToUIntStrict(const UChar
* data
, size_t length
, bool* ok
, int base
)
835 return toIntegralType
<unsigned>(data
, length
, ok
, base
);
838 int64_t charactersToInt64Strict(const UChar
* data
, size_t length
, bool* ok
, int base
)
840 return toIntegralType
<int64_t>(data
, length
, ok
, base
);
843 uint64_t charactersToUInt64Strict(const UChar
* data
, size_t length
, bool* ok
, int base
)
845 return toIntegralType
<uint64_t>(data
, length
, ok
, base
);
848 intptr_t charactersToIntPtrStrict(const UChar
* data
, size_t length
, bool* ok
, int base
)
850 return toIntegralType
<intptr_t>(data
, length
, ok
, base
);
853 int charactersToInt(const UChar
* data
, size_t length
, bool* ok
)
855 return toIntegralType
<int>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
858 unsigned charactersToUInt(const UChar
* data
, size_t length
, bool* ok
)
860 return toIntegralType
<unsigned>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
863 int64_t charactersToInt64(const UChar
* data
, size_t length
, bool* ok
)
865 return toIntegralType
<int64_t>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
868 uint64_t charactersToUInt64(const UChar
* data
, size_t length
, bool* ok
)
870 return toIntegralType
<uint64_t>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
873 intptr_t charactersToIntPtr(const UChar
* data
, size_t length
, bool* ok
)
875 return toIntegralType
<intptr_t>(data
, lengthOfCharactersAsInteger(data
, length
), ok
, 10);
878 double charactersToDouble(const UChar
* data
, size_t length
, bool* ok
)
886 Vector
<char, 256> bytes(length
+ 1);
887 for (unsigned i
= 0; i
< length
; ++i
)
888 bytes
[i
] = data
[i
] < 0x7F ? data
[i
] : '?';
889 bytes
[length
] = '\0';
891 double val
= WTF::strtod(bytes
.data(), &end
);
893 *ok
= (end
== 0 || *end
== '\0');
897 float charactersToFloat(const UChar
* data
, size_t length
, bool* ok
)
899 // FIXME: This will return ok even when the string fits into a double but not a float.
900 return static_cast<float>(charactersToDouble(data
, length
, ok
));
903 } // namespace WebCore
906 // For use in the debugger - leaks memory
907 WebCore::String
* string(const char*);
909 WebCore::String
* string(const char* s
)
911 return new WebCore::String(s
);