2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (c) 2009, Google Inc. All rights reserved.
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
27 #include "JSGlobalObjectFunctions.h"
28 #include "Collector.h"
30 #include "Identifier.h"
31 #include "Operations.h"
38 #include <wtf/ASCIICType.h>
39 #include <wtf/Assertions.h>
40 #include <wtf/MathExtras.h>
41 #include <wtf/Vector.h>
42 #include <wtf/unicode/UTF8.h>
52 using namespace WTF::Unicode
;
55 // This can be tuned differently per platform by putting platform #ifs right here.
56 // If you don't define this macro at all, then copyChars will just call directly
58 #define USTRING_COPY_CHARS_INLINE_CUTOFF 20
62 extern const double NaN
;
63 extern const double Inf
;
65 static inline size_t overflowIndicator() { return std::numeric_limits
<size_t>::max(); }
66 static inline size_t maxUChars() { return std::numeric_limits
<size_t>::max() / sizeof(UChar
); }
68 static inline UChar
* allocChars(size_t length
)
71 if (length
> maxUChars())
73 return static_cast<UChar
*>(tryFastMalloc(sizeof(UChar
) * length
));
76 static inline UChar
* reallocChars(UChar
* buffer
, size_t length
)
79 if (length
> maxUChars())
81 return static_cast<UChar
*>(tryFastRealloc(buffer
, sizeof(UChar
) * length
));
84 static inline void copyChars(UChar
* destination
, const UChar
* source
, unsigned numCharacters
)
86 #ifdef USTRING_COPY_CHARS_INLINE_CUTOFF
87 if (numCharacters
<= USTRING_COPY_CHARS_INLINE_CUTOFF
) {
88 for (unsigned i
= 0; i
< numCharacters
; ++i
)
89 destination
[i
] = source
[i
];
93 memcpy(destination
, source
, numCharacters
* sizeof(UChar
));
96 COMPILE_ASSERT(sizeof(UChar
) == 2, uchar_is_2_bytes
)
98 CString::CString(const char* c
)
100 , m_data(new char[m_length
+ 1])
102 memcpy(m_data
, c
, m_length
+ 1);
105 CString::CString(const char* c
, size_t length
)
107 , m_data(new char[length
+ 1])
109 memcpy(m_data
, c
, m_length
);
110 m_data
[m_length
] = 0;
113 CString::CString(const CString
& b
)
115 m_length
= b
.m_length
;
117 m_data
= new char[m_length
+ 1];
118 memcpy(m_data
, b
.m_data
, m_length
+ 1);
128 CString
CString::adopt(char* c
, size_t length
)
136 CString
& CString::append(const CString
& t
)
139 n
= new char[m_length
+ t
.m_length
+ 1];
141 memcpy(n
, m_data
, m_length
);
143 memcpy(n
+ m_length
, t
.m_data
, t
.m_length
);
144 m_length
+= t
.m_length
;
153 CString
& CString::operator=(const char* c
)
157 m_length
= strlen(c
);
158 m_data
= new char[m_length
+ 1];
159 memcpy(m_data
, c
, m_length
+ 1);
164 CString
& CString::operator=(const CString
& str
)
171 m_length
= str
.m_length
;
173 m_data
= new char[m_length
+ 1];
174 memcpy(m_data
, str
.m_data
, m_length
+ 1);
181 bool operator==(const CString
& c1
, const CString
& c2
)
183 size_t len
= c1
.size();
184 return len
== c2
.size() && (len
== 0 || memcmp(c1
.c_str(), c2
.c_str(), len
) == 0);
187 // These static strings are immutable, except for rc, whose initial value is chosen to
188 // reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
189 static UChar sharedEmptyChar
;
190 UString::BaseString
* UString::Rep::nullBaseString
;
191 UString::BaseString
* UString::Rep::emptyBaseString
;
192 UString
* UString::nullUString
;
194 static void initializeStaticBaseString(int len
, UChar
* buf
, UString::BaseString
& base
)
198 base
.rc
= INT_MAX
/ 2;
200 base
.m_identifierTableAndFlags
.setFlag(UString::Rep::StaticFlag
);
201 base
.m_baseString
= 0;
203 base
.preCapacity
= 0;
204 base
.usedPreCapacity
= 0;
206 base
.usedCapacity
= 0;
207 base
.reportedCost
= 0;
208 base
.checkConsistency();
211 void initializeUString()
213 UString::Rep::nullBaseString
= new UString::BaseString
;
214 initializeStaticBaseString(0, 0, *UString::Rep::nullBaseString
);
216 UString::Rep::emptyBaseString
= new UString::BaseString
;
217 initializeStaticBaseString(0, &sharedEmptyChar
, *UString::Rep::emptyBaseString
);
219 UString::nullUString
= new UString
;
222 static char* statBuffer
= 0; // Only used for debugging via UString::ascii().
224 PassRefPtr
<UString::Rep
> UString::Rep::createCopying(const UChar
* d
, int l
)
226 UChar
* copyD
= static_cast<UChar
*>(fastMalloc(l
* sizeof(UChar
)));
227 copyChars(copyD
, d
, l
);
228 return create(copyD
, l
);
231 PassRefPtr
<UString::Rep
> UString::Rep::create(UChar
* d
, int l
)
233 BaseString
* r
= new BaseString
;
243 r
->usedPreCapacity
= 0;
246 r
->checkConsistency();
248 // steal the single reference this Rep was created with
252 PassRefPtr
<UString::Rep
> UString::Rep::create(PassRefPtr
<Rep
> rep
, int offset
, int length
)
255 rep
->checkConsistency();
257 int repOffset
= rep
->offset
;
259 PassRefPtr
<BaseString
> base
= rep
->baseString();
261 ASSERT(-(offset
+ repOffset
) <= base
->usedPreCapacity
);
262 ASSERT(offset
+ repOffset
+ length
<= base
->usedCapacity
);
265 r
->offset
= repOffset
+ offset
;
269 r
->setBaseString(base
);
271 r
->checkConsistency();
273 // steal the single reference this Rep was created with
277 PassRefPtr
<UString::Rep
> UString::Rep::createFromUTF8(const char* string
)
280 return &UString::Rep::null();
282 size_t length
= strlen(string
);
283 Vector
<UChar
, 1024> buffer(length
);
284 UChar
* p
= buffer
.data();
285 if (conversionOK
!= convertUTF8ToUTF16(&string
, string
+ length
, &p
, p
+ length
))
286 return &UString::Rep::null();
288 return UString::Rep::createCopying(buffer
.data(), p
- buffer
.data());
291 void UString::Rep::destroy()
295 // Static null and empty strings can never be destroyed, but we cannot rely on
296 // reference counting, because ref/deref are not thread-safe.
298 if (identifierTable())
299 Identifier::remove(this);
300 UString::BaseString
* base
= baseString();
310 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
311 // or anything like that.
312 const unsigned PHI
= 0x9e3779b9U
;
314 // Paul Hsieh's SuperFastHash
315 // http://www.azillionmonkeys.com/qed/hash.html
316 unsigned UString::Rep::computeHash(const UChar
* s
, int len
)
328 tmp
= (s
[1] << 11) ^ hash
;
329 hash
= (hash
<< 16) ^ tmp
;
341 // Force "avalanching" of final 127 bits
348 // this avoids ever returning a hash code of 0, since that is used to
349 // signal "hash not computed yet", using a value that is likely to be
350 // effectively the same as 0 when the low bits are masked
357 // Paul Hsieh's SuperFastHash
358 // http://www.azillionmonkeys.com/qed/hash.html
359 unsigned UString::Rep::computeHash(const char* s
, int l
)
361 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
362 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
363 // were 16-bit chunks, which should give matching results
373 hash
+= static_cast<unsigned char>(s
[0]);
374 tmp
= (static_cast<unsigned char>(s
[1]) << 11) ^ hash
;
375 hash
= (hash
<< 16) ^ tmp
;
382 hash
+= static_cast<unsigned char>(s
[0]);
387 // Force "avalanching" of final 127 bits
394 // this avoids ever returning a hash code of 0, since that is used to
395 // signal "hash not computed yet", using a value that is likely to be
396 // effectively the same as 0 when the low bits are masked
404 void UString::Rep::checkConsistency() const
406 const UString::BaseString
* base
= baseString();
408 // There is no recursion for base strings.
409 ASSERT(base
== base
->baseString());
412 // There are only two static strings: null and empty.
415 // Static strings cannot get in identifier tables, because they are globally shared.
416 ASSERT(!identifierTable());
419 // The string fits in buffer.
420 ASSERT(base
->usedPreCapacity
<= base
->preCapacity
);
421 ASSERT(base
->usedCapacity
<= base
->capacity
);
422 ASSERT(-offset
<= base
->usedPreCapacity
);
423 ASSERT(offset
+ len
<= base
->usedCapacity
);
427 // put these early so they can be inlined
428 static inline size_t expandedSize(size_t size
, size_t otherSize
)
430 // Do the size calculation in two parts, returning overflowIndicator if
431 // we overflow the maximum value that we can handle.
433 if (size
> maxUChars())
434 return overflowIndicator();
436 size_t expandedSize
= ((size
+ 10) / 10 * 11) + 1;
437 if (maxUChars() - expandedSize
< otherSize
)
438 return overflowIndicator();
440 return expandedSize
+ otherSize
;
443 static inline bool expandCapacity(UString::Rep
* rep
, int requiredLength
)
445 rep
->checkConsistency();
447 UString::BaseString
* base
= rep
->baseString();
449 if (requiredLength
> base
->capacity
) {
450 size_t newCapacity
= expandedSize(requiredLength
, base
->preCapacity
);
451 UChar
* oldBuf
= base
->buf
;
452 base
->buf
= reallocChars(base
->buf
, newCapacity
);
457 base
->capacity
= newCapacity
- base
->preCapacity
;
459 if (requiredLength
> base
->usedCapacity
)
460 base
->usedCapacity
= requiredLength
;
462 rep
->checkConsistency();
466 void UString::expandCapacity(int requiredLength
)
468 if (!JSC::expandCapacity(m_rep
.get(), requiredLength
))
472 void UString::expandPreCapacity(int requiredPreCap
)
474 m_rep
->checkConsistency();
476 BaseString
* base
= m_rep
->baseString();
478 if (requiredPreCap
> base
->preCapacity
) {
479 size_t newCapacity
= expandedSize(requiredPreCap
, base
->capacity
);
480 int delta
= newCapacity
- base
->capacity
- base
->preCapacity
;
482 UChar
* newBuf
= allocChars(newCapacity
);
487 copyChars(newBuf
+ delta
, base
->buf
, base
->capacity
+ base
->preCapacity
);
491 base
->preCapacity
= newCapacity
- base
->capacity
;
493 if (requiredPreCap
> base
->usedPreCapacity
)
494 base
->usedPreCapacity
= requiredPreCap
;
496 m_rep
->checkConsistency();
499 static PassRefPtr
<UString::Rep
> createRep(const char* c
)
502 return &UString::Rep::null();
505 return &UString::Rep::empty();
507 size_t length
= strlen(c
);
508 UChar
* d
= allocChars(length
);
510 return &UString::Rep::null();
512 for (size_t i
= 0; i
< length
; i
++)
513 d
[i
] = static_cast<unsigned char>(c
[i
]); // use unsigned char to zero-extend instead of sign-extend
514 return UString::Rep::create(d
, static_cast<int>(length
));
519 UString::UString(const char* c
)
520 : m_rep(createRep(c
))
524 UString::UString(const UChar
* c
, int length
)
527 m_rep
= &Rep::empty();
529 m_rep
= Rep::createCopying(c
, length
);
532 UString::UString(UChar
* c
, int length
, bool copy
)
535 m_rep
= &Rep::empty();
537 m_rep
= Rep::createCopying(c
, length
);
539 m_rep
= Rep::create(c
, length
);
542 UString::UString(const Vector
<UChar
>& buffer
)
545 m_rep
= &Rep::empty();
547 m_rep
= Rep::createCopying(buffer
.data(), buffer
.size());
550 static ALWAYS_INLINE
int newCapacityWithOverflowCheck(const int currentCapacity
, const int extendLength
, const bool plusOne
= false)
552 ASSERT_WITH_MESSAGE(extendLength
>= 0, "extendedLength = %d", extendLength
);
554 const int plusLength
= plusOne
? 1 : 0;
555 if (currentCapacity
> std::numeric_limits
<int>::max() - extendLength
- plusLength
)
558 return currentCapacity
+ extendLength
+ plusLength
;
561 static ALWAYS_INLINE PassRefPtr
<UString::Rep
> concatenate(PassRefPtr
<UString::Rep
> r
, const UChar
* tData
, int tSize
)
563 RefPtr
<UString::Rep
> rep
= r
;
565 rep
->checkConsistency();
567 int thisSize
= rep
->size();
568 int thisOffset
= rep
->offset
;
569 int length
= thisSize
+ tSize
;
570 UString::BaseString
* base
= rep
->baseString();
575 } else if (thisSize
== 0) {
577 rep
= UString::Rep::createCopying(tData
, tSize
);
578 } else if (rep
== base
&& rep
->rc
== 1) {
579 // this is direct and has refcount of 1 (so we can just alter it directly)
580 if (!expandCapacity(rep
.get(), newCapacityWithOverflowCheck(thisOffset
, length
)))
581 rep
= &UString::Rep::null();
583 copyChars(rep
->data() + thisSize
, tData
, tSize
);
587 } else if (thisOffset
+ thisSize
== base
->usedCapacity
&& thisSize
>= minShareSize
) {
588 // this reaches the end of the buffer - extend it if it's long enough to append to
589 if (!expandCapacity(rep
.get(), newCapacityWithOverflowCheck(thisOffset
, length
)))
590 rep
= &UString::Rep::null();
592 copyChars(rep
->data() + thisSize
, tData
, tSize
);
593 rep
= UString::Rep::create(rep
, 0, length
);
596 // this is shared with someone using more capacity, gotta make a whole new string
597 size_t newCapacity
= expandedSize(length
, 0);
598 UChar
* d
= allocChars(newCapacity
);
600 rep
= &UString::Rep::null();
602 copyChars(d
, rep
->data(), thisSize
);
603 copyChars(d
+ thisSize
, tData
, tSize
);
604 rep
= UString::Rep::create(d
, length
);
605 rep
->baseString()->capacity
= newCapacity
;
609 rep
->checkConsistency();
611 return rep
.release();
614 static ALWAYS_INLINE PassRefPtr
<UString::Rep
> concatenate(PassRefPtr
<UString::Rep
> r
, const char* t
)
616 RefPtr
<UString::Rep
> rep
= r
;
618 rep
->checkConsistency();
620 int thisSize
= rep
->size();
621 int thisOffset
= rep
->offset
;
622 int tSize
= static_cast<int>(strlen(t
));
623 int length
= thisSize
+ tSize
;
624 UString::BaseString
* base
= rep
->baseString();
630 } else if (tSize
== 0) {
631 // t is empty, we'll just return *this below.
632 } else if (rep
== base
&& rep
->rc
== 1) {
633 // this is direct and has refcount of 1 (so we can just alter it directly)
634 expandCapacity(rep
.get(), newCapacityWithOverflowCheck(thisOffset
, length
));
635 UChar
* d
= rep
->data();
637 for (int i
= 0; i
< tSize
; ++i
)
638 d
[thisSize
+ i
] = static_cast<unsigned char>(t
[i
]); // use unsigned char to zero-extend instead of sign-extend
642 } else if (thisOffset
+ thisSize
== base
->usedCapacity
&& thisSize
>= minShareSize
) {
643 // this string reaches the end of the buffer - extend it
644 expandCapacity(rep
.get(), newCapacityWithOverflowCheck(thisOffset
, length
));
645 UChar
* d
= rep
->data();
647 for (int i
= 0; i
< tSize
; ++i
)
648 d
[thisSize
+ i
] = static_cast<unsigned char>(t
[i
]); // use unsigned char to zero-extend instead of sign-extend
649 rep
= UString::Rep::create(rep
, 0, length
);
652 // this is shared with someone using more capacity, gotta make a whole new string
653 size_t newCapacity
= expandedSize(length
, 0);
654 UChar
* d
= allocChars(newCapacity
);
656 rep
= &UString::Rep::null();
658 copyChars(d
, rep
->data(), thisSize
);
659 for (int i
= 0; i
< tSize
; ++i
)
660 d
[thisSize
+ i
] = static_cast<unsigned char>(t
[i
]); // use unsigned char to zero-extend instead of sign-extend
661 rep
= UString::Rep::create(d
, length
);
662 rep
->baseString()->capacity
= newCapacity
;
666 rep
->checkConsistency();
668 return rep
.release();
671 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
* a
, UString::Rep
* b
)
673 a
->checkConsistency();
674 b
->checkConsistency();
676 int aSize
= a
->size();
677 int aOffset
= a
->offset
;
678 int bSize
= b
->size();
679 int bOffset
= b
->offset
;
680 int length
= aSize
+ bSize
;
691 UString::BaseString
* aBase
= a
->baseString();
692 if (bSize
== 1 && aOffset
+ aSize
== aBase
->usedCapacity
&& aOffset
+ length
<= aBase
->capacity
) {
693 // b is a single character (common fast case)
694 aBase
->usedCapacity
= aOffset
+ length
;
695 a
->data()[aSize
] = b
->data()[0];
696 return UString::Rep::create(a
, 0, length
);
699 UString::BaseString
* bBase
= b
->baseString();
700 if (aOffset
+ aSize
== aBase
->usedCapacity
&& aSize
>= minShareSize
&& 4 * aSize
>= bSize
701 && (-bOffset
!= bBase
->usedPreCapacity
|| aSize
>= bSize
)) {
702 // - a reaches the end of its buffer so it qualifies for shared append
703 // - also, it's at least a quarter the length of b - appending to a much shorter
704 // string does more harm than good
705 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
708 x
.expandCapacity(newCapacityWithOverflowCheck(aOffset
, length
));
709 if (!a
->data() || !x
.data())
711 copyChars(a
->data() + aSize
, b
->data(), bSize
);
712 PassRefPtr
<UString::Rep
> result
= UString::Rep::create(a
, 0, length
);
714 a
->checkConsistency();
715 b
->checkConsistency();
716 result
->checkConsistency();
721 if (-bOffset
== bBase
->usedPreCapacity
&& bSize
>= minShareSize
&& 4 * bSize
>= aSize
) {
722 // - b reaches the beginning of its buffer so it qualifies for shared prepend
723 // - also, it's at least a quarter the length of a - prepending to a much shorter
724 // string does more harm than good
726 y
.expandPreCapacity(-bOffset
+ aSize
);
727 if (!b
->data() || !y
.data())
729 copyChars(b
->data() - aSize
, a
->data(), aSize
);
730 PassRefPtr
<UString::Rep
> result
= UString::Rep::create(b
, -aSize
, length
);
732 a
->checkConsistency();
733 b
->checkConsistency();
734 result
->checkConsistency();
739 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
740 size_t newCapacity
= expandedSize(length
, 0);
741 UChar
* d
= allocChars(newCapacity
);
744 copyChars(d
, a
->data(), aSize
);
745 copyChars(d
+ aSize
, b
->data(), bSize
);
746 PassRefPtr
<UString::Rep
> result
= UString::Rep::create(d
, length
);
747 result
->baseString()->capacity
= newCapacity
;
749 a
->checkConsistency();
750 b
->checkConsistency();
751 result
->checkConsistency();
756 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
* rep
, int i
)
758 UChar buf
[1 + sizeof(i
) * 3];
759 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
764 else if (i
== INT_MIN
) {
765 char minBuf
[1 + sizeof(i
) * 3];
766 sprintf(minBuf
, "%d", INT_MIN
);
767 return concatenate(rep
, minBuf
);
769 bool negative
= false;
775 *--p
= static_cast<unsigned short>((i
% 10) + '0');
782 return concatenate(rep
, p
, static_cast<int>(end
- p
));
786 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
* rep
, double d
)
788 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
790 return concatenate(rep
, "NaN");
792 if (d
== 0.0) // stringify -0 as 0
799 char* result
= WTF::dtoa(d
, 0, &decimalPoint
, &sign
, NULL
);
800 int length
= static_cast<int>(strlen(result
));
806 if (decimalPoint
<= 0 && decimalPoint
> -6) {
809 for (int j
= decimalPoint
; j
< 0; j
++)
811 strcpy(buf
+ i
, result
);
812 } else if (decimalPoint
<= 21 && decimalPoint
> 0) {
813 if (length
<= decimalPoint
) {
814 strcpy(buf
+ i
, result
);
816 for (int j
= 0; j
< decimalPoint
- length
; j
++)
820 strncpy(buf
+ i
, result
, decimalPoint
);
823 strcpy(buf
+ i
, result
+ decimalPoint
);
825 } else if (result
[0] < '0' || result
[0] > '9')
826 strcpy(buf
+ i
, result
);
828 buf
[i
++] = result
[0];
831 strcpy(buf
+ i
, result
+ 1);
836 buf
[i
++] = (decimalPoint
>= 0) ? '+' : '-';
837 // decimalPoint can't be more than 3 digits decimal given the
838 // nature of float representation
839 int exponential
= decimalPoint
- 1;
841 exponential
= -exponential
;
842 if (exponential
>= 100)
843 buf
[i
++] = static_cast<char>('0' + exponential
/ 100);
844 if (exponential
>= 10)
845 buf
[i
++] = static_cast<char>('0' + (exponential
% 100) / 10);
846 buf
[i
++] = static_cast<char>('0' + exponential
% 10);
850 WTF::freedtoa(result
);
852 return concatenate(rep
, buf
);
855 UString
UString::from(int i
)
857 UChar buf
[1 + sizeof(i
) * 3];
858 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
863 else if (i
== INT_MIN
) {
864 char minBuf
[1 + sizeof(i
) * 3];
865 snprintf(minBuf
, 1 + sizeof(i
) * 3, "%d", INT_MIN
);
866 return UString(minBuf
);
868 bool negative
= false;
874 *--p
= static_cast<unsigned short>((i
% 10) + '0');
881 return UString(p
, static_cast<int>(end
- p
));
884 UString
UString::from(unsigned int u
)
886 UChar buf
[sizeof(u
) * 3];
887 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
894 *--p
= static_cast<unsigned short>((u
% 10) + '0');
899 return UString(p
, static_cast<int>(end
- p
));
902 UString
UString::from(long l
)
904 UChar buf
[1 + sizeof(l
) * 3];
905 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
910 else if (l
== LONG_MIN
) {
911 char minBuf
[1 + sizeof(l
) * 3];
912 snprintf(minBuf
, 1 + sizeof(l
) * 3, "%ld", LONG_MIN
);
913 return UString(minBuf
);
915 bool negative
= false;
921 *--p
= static_cast<unsigned short>((l
% 10) + '0');
928 return UString(p
, static_cast<int>(end
- p
));
931 UString
UString::from(double d
)
933 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
941 char* result
= WTF::dtoa(d
, 0, &decimalPoint
, &sign
, NULL
);
942 int length
= static_cast<int>(strlen(result
));
948 if (decimalPoint
<= 0 && decimalPoint
> -6) {
951 for (int j
= decimalPoint
; j
< 0; j
++)
953 strlcpy(buf
+ i
, result
, sizeof(buf
) - i
);
954 } else if (decimalPoint
<= 21 && decimalPoint
> 0) {
955 if (length
<= decimalPoint
) {
956 strlcpy(buf
+ i
, result
, sizeof(buf
) - i
);
958 for (int j
= 0; j
< decimalPoint
- length
; j
++)
962 int len
= (decimalPoint
<= static_cast<int>(sizeof(buf
)) - i
? decimalPoint
: sizeof(buf
) - i
);
963 strncpy(buf
+ i
, result
, len
);
966 strlcpy(buf
+ i
, result
+ decimalPoint
, sizeof(buf
) - i
);
968 } else if (result
[0] < '0' || result
[0] > '9')
969 strlcpy(buf
+ i
, result
, sizeof(buf
) - i
);
971 buf
[i
++] = result
[0];
974 strlcpy(buf
+ i
, result
+ 1, sizeof(buf
) - i
);
979 buf
[i
++] = (decimalPoint
>= 0) ? '+' : '-';
980 // decimalPoint can't be more than 3 digits decimal given the
981 // nature of float representation
982 int exponential
= decimalPoint
- 1;
984 exponential
= -exponential
;
985 if (exponential
>= 100)
986 buf
[i
++] = static_cast<char>('0' + exponential
/ 100);
987 if (exponential
>= 10)
988 buf
[i
++] = static_cast<char>('0' + (exponential
% 100) / 10);
989 buf
[i
++] = static_cast<char>('0' + exponential
% 10);
991 ASSERT(i
<= static_cast<int>(sizeof(buf
)));
994 WTF::freedtoa(result
);
999 UString
UString::spliceSubstringsWithSeparators(const Range
* substringRanges
, int rangeCount
, const UString
* separators
, int separatorCount
) const
1001 m_rep
->checkConsistency();
1003 if (rangeCount
== 1 && separatorCount
== 0) {
1004 int thisSize
= size();
1005 int position
= substringRanges
[0].position
;
1006 int length
= substringRanges
[0].length
;
1007 if (position
<= 0 && length
>= thisSize
)
1009 return UString::Rep::create(m_rep
, max(0, position
), min(thisSize
, length
));
1012 int totalLength
= 0;
1013 for (int i
= 0; i
< rangeCount
; i
++)
1014 totalLength
+= substringRanges
[i
].length
;
1015 for (int i
= 0; i
< separatorCount
; i
++)
1016 totalLength
+= separators
[i
].size();
1018 if (totalLength
== 0)
1021 UChar
* buffer
= allocChars(totalLength
);
1025 int maxCount
= max(rangeCount
, separatorCount
);
1027 for (int i
= 0; i
< maxCount
; i
++) {
1028 if (i
< rangeCount
) {
1029 copyChars(buffer
+ bufferPos
, data() + substringRanges
[i
].position
, substringRanges
[i
].length
);
1030 bufferPos
+= substringRanges
[i
].length
;
1032 if (i
< separatorCount
) {
1033 copyChars(buffer
+ bufferPos
, separators
[i
].data(), separators
[i
].size());
1034 bufferPos
+= separators
[i
].size();
1038 return UString::Rep::create(buffer
, totalLength
);
1041 UString
& UString::append(const UString
&t
)
1043 m_rep
->checkConsistency();
1044 t
.rep()->checkConsistency();
1046 int thisSize
= size();
1047 int thisOffset
= m_rep
->offset
;
1048 int tSize
= t
.size();
1049 int length
= thisSize
+ tSize
;
1050 BaseString
* base
= m_rep
->baseString();
1053 if (thisSize
== 0) {
1056 } else if (tSize
== 0) {
1058 } else if (m_rep
== base
&& m_rep
->rc
== 1) {
1059 // this is direct and has refcount of 1 (so we can just alter it directly)
1060 expandCapacity(newCapacityWithOverflowCheck(thisOffset
, length
));
1062 copyChars(m_rep
->data() + thisSize
, t
.data(), tSize
);
1063 m_rep
->len
= length
;
1066 } else if (thisOffset
+ thisSize
== base
->usedCapacity
&& thisSize
>= minShareSize
) {
1067 // this reaches the end of the buffer - extend it if it's long enough to append to
1068 expandCapacity(newCapacityWithOverflowCheck(thisOffset
, length
));
1070 copyChars(m_rep
->data() + thisSize
, t
.data(), tSize
);
1071 m_rep
= Rep::create(m_rep
, 0, length
);
1074 // this is shared with someone using more capacity, gotta make a whole new string
1075 size_t newCapacity
= expandedSize(length
, 0);
1076 UChar
* d
= allocChars(newCapacity
);
1080 copyChars(d
, data(), thisSize
);
1081 copyChars(d
+ thisSize
, t
.data(), tSize
);
1082 m_rep
= Rep::create(d
, length
);
1083 m_rep
->baseString()->capacity
= newCapacity
;
1087 m_rep
->checkConsistency();
1088 t
.rep()->checkConsistency();
1093 UString
& UString::append(const UChar
* tData
, int tSize
)
1095 m_rep
= concatenate(m_rep
.release(), tData
, tSize
);
1099 UString
& UString::append(const char* t
)
1101 m_rep
= concatenate(m_rep
.release(), t
);
1105 UString
& UString::append(UChar c
)
1107 m_rep
->checkConsistency();
1109 int thisOffset
= m_rep
->offset
;
1110 int length
= size();
1111 BaseString
* base
= m_rep
->baseString();
1115 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
1116 size_t newCapacity
= expandedSize(1, 0);
1117 UChar
* d
= allocChars(newCapacity
);
1122 m_rep
= Rep::create(d
, 1);
1123 m_rep
->baseString()->capacity
= newCapacity
;
1125 } else if (m_rep
== base
&& m_rep
->rc
== 1) {
1126 // this is direct and has refcount of 1 (so we can just alter it directly)
1127 expandCapacity(newCapacityWithOverflowCheck(thisOffset
, length
, true));
1128 UChar
* d
= m_rep
->data();
1131 m_rep
->len
= length
+ 1;
1134 } else if (thisOffset
+ length
== base
->usedCapacity
&& length
>= minShareSize
) {
1135 // this reaches the end of the string - extend it and share
1136 expandCapacity(newCapacityWithOverflowCheck(thisOffset
, length
, true));
1137 UChar
* d
= m_rep
->data();
1140 m_rep
= Rep::create(m_rep
, 0, length
+ 1);
1143 // this is shared with someone using more capacity, gotta make a whole new string
1144 size_t newCapacity
= expandedSize(length
+ 1, 0);
1145 UChar
* d
= allocChars(newCapacity
);
1149 copyChars(d
, data(), length
);
1151 m_rep
= Rep::create(d
, length
+ 1);
1152 m_rep
->baseString()->capacity
= newCapacity
;
1156 m_rep
->checkConsistency();
1161 bool UString::getCString(CStringBuffer
& buffer
) const
1163 int length
= size();
1164 int neededSize
= length
+ 1;
1165 buffer
.resize(neededSize
);
1166 char* buf
= buffer
.data();
1169 const UChar
* p
= data();
1171 const UChar
* limit
= p
+ length
;
1172 while (p
!= limit
) {
1175 *q
= static_cast<char>(c
);
1181 return !(ored
& 0xFF00);
1184 char* UString::ascii() const
1186 int length
= size();
1187 int neededSize
= length
+ 1;
1188 delete[] statBuffer
;
1189 statBuffer
= new char[neededSize
];
1191 const UChar
* p
= data();
1192 char* q
= statBuffer
;
1193 const UChar
* limit
= p
+ length
;
1194 while (p
!= limit
) {
1195 *q
= static_cast<char>(p
[0]);
1204 UString
& UString::operator=(const char* c
)
1207 m_rep
= &Rep::null();
1212 m_rep
= &Rep::empty();
1216 int l
= static_cast<int>(strlen(c
));
1218 BaseString
* base
= m_rep
->baseString();
1219 if (m_rep
->rc
== 1 && l
<= base
->capacity
&& m_rep
== base
&& m_rep
->offset
== 0 && base
->preCapacity
== 0) {
1229 m_rep
= Rep::create(d
, l
);
1231 for (int i
= 0; i
< l
; i
++)
1232 d
[i
] = static_cast<unsigned char>(c
[i
]); // use unsigned char to zero-extend instead of sign-extend
1237 bool UString::is8Bit() const
1239 const UChar
* u
= data();
1240 const UChar
* limit
= u
+ size();
1250 UChar
UString::operator[](int pos
) const
1257 double UString::toDouble(bool tolerateTrailingJunk
, bool tolerateEmptyString
) const
1260 UChar c
= data()[0];
1261 if (isASCIIDigit(c
))
1263 if (isASCIISpace(c
) && tolerateEmptyString
)
1268 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
1269 // after the number, so this is too strict a check.
1273 const char* c
= s
.data();
1275 // skip leading white space
1276 while (isASCIISpace(*c
))
1281 return tolerateEmptyString
? 0.0 : NaN
;
1286 if (*c
== '0' && (*(c
+ 1) == 'x' || *(c
+ 1) == 'X')) {
1287 const char* firstDigitPosition
= c
+ 2;
1291 if (*c
>= '0' && *c
<= '9')
1292 d
= d
* 16.0 + *c
- '0';
1293 else if ((*c
>= 'A' && *c
<= 'F') || (*c
>= 'a' && *c
<= 'f'))
1294 d
= d
* 16.0 + (*c
& 0xdf) - 'A' + 10.0;
1299 if (d
>= mantissaOverflowLowerBound
)
1300 d
= parseIntOverflow(firstDigitPosition
, c
- firstDigitPosition
, 16);
1304 d
= WTF::strtod(c
, &end
);
1305 if ((d
!= 0.0 || end
!= c
) && d
!= Inf
&& d
!= -Inf
) {
1312 else if (*c
== '-') {
1317 // We used strtod() to do the conversion. However, strtod() handles
1318 // infinite values slightly differently than JavaScript in that it
1319 // converts the string "inf" with any capitalization to infinity,
1320 // whereas the ECMA spec requires that it be converted to NaN.
1322 if (c
[0] == 'I' && c
[1] == 'n' && c
[2] == 'f' && c
[3] == 'i' && c
[4] == 'n' && c
[5] == 'i' && c
[6] == 't' && c
[7] == 'y') {
1325 } else if ((d
== Inf
|| d
== -Inf
) && *c
!= 'I' && *c
!= 'i')
1332 // allow trailing white space
1333 while (isASCIISpace(*c
))
1335 // don't allow anything after - unless tolerant=true
1336 if (!tolerateTrailingJunk
&& *c
!= '\0')
1342 double UString::toDouble(bool tolerateTrailingJunk
) const
1344 return toDouble(tolerateTrailingJunk
, true);
1347 double UString::toDouble() const
1349 return toDouble(false, true);
1352 uint32_t UString::toUInt32(bool* ok
) const
1354 double d
= toDouble();
1357 if (d
!= static_cast<uint32_t>(d
)) {
1365 return static_cast<uint32_t>(d
);
1368 uint32_t UString::toUInt32(bool* ok
, bool tolerateEmptyString
) const
1370 double d
= toDouble(false, tolerateEmptyString
);
1373 if (d
!= static_cast<uint32_t>(d
)) {
1381 return static_cast<uint32_t>(d
);
1384 uint32_t UString::toStrictUInt32(bool* ok
) const
1389 // Empty string is not OK.
1390 int len
= m_rep
->len
;
1393 const UChar
* p
= m_rep
->data();
1394 unsigned short c
= p
[0];
1396 // If the first digit is 0, only 0 itself is OK.
1403 // Convert to UInt32, checking for overflow.
1406 // Process character, turning it into a digit.
1407 if (c
< '0' || c
> '9')
1409 const unsigned d
= c
- '0';
1411 // Multiply by 10, checking for overflow out of 32 bits.
1412 if (i
> 0xFFFFFFFFU
/ 10)
1416 // Add in the digit, checking for overflow out of 32 bits.
1417 const unsigned max
= 0xFFFFFFFFU
- d
;
1422 // Handle end of string.
1429 // Get next character.
1434 int UString::find(const UString
& f
, int pos
) const
1443 const UChar
* end
= data() + size();
1444 for (const UChar
* c
= data() + pos
; c
< end
; c
++) {
1446 return static_cast<int>(c
- data());
1456 const UChar
* end
= data() + sz
- fsz
;
1457 int fsizeminusone
= (fsz
- 1) * sizeof(UChar
);
1458 const UChar
* fdata
= f
.data();
1459 unsigned short fchar
= fdata
[0];
1461 for (const UChar
* c
= data() + pos
; c
<= end
; c
++) {
1462 if (c
[0] == fchar
&& !memcmp(c
+ 1, fdata
, fsizeminusone
))
1463 return static_cast<int>(c
- data());
1469 int UString::find(UChar ch
, int pos
) const
1473 const UChar
* end
= data() + size();
1474 for (const UChar
* c
= data() + pos
; c
< end
; c
++) {
1476 return static_cast<int>(c
- data());
1482 int UString::rfind(const UString
& f
, int pos
) const
1494 int fsizeminusone
= (fsz
- 1) * sizeof(UChar
);
1495 const UChar
* fdata
= f
.data();
1496 for (const UChar
* c
= data() + pos
; c
>= data(); c
--) {
1497 if (*c
== *fdata
&& !memcmp(c
+ 1, fdata
+ 1, fsizeminusone
))
1498 return static_cast<int>(c
- data());
1504 int UString::rfind(UChar ch
, int pos
) const
1508 if (pos
+ 1 >= size())
1510 for (const UChar
* c
= data() + pos
; c
>= data(); c
--) {
1512 return static_cast<int>(c
- data());
1518 UString
UString::substr(int pos
, int len
) const
1531 if (pos
== 0 && len
== s
)
1534 return UString(Rep::create(m_rep
, pos
, len
));
1537 bool operator==(const UString
& s1
, const UString
& s2
)
1539 int size
= s1
.size();
1544 return s2
.size() == 1 && s1
.data()[0] == s2
.data()[0];
1546 return s2
.size() == size
&& memcmp(s1
.data(), s2
.data(), size
* sizeof(UChar
)) == 0;
1550 bool operator==(const UString
& s1
, const char *s2
)
1553 return s1
.isEmpty();
1555 const UChar
* u
= s1
.data();
1556 const UChar
* uend
= u
+ s1
.size();
1557 while (u
!= uend
&& *s2
) {
1558 if (u
[0] != (unsigned char)*s2
)
1564 return u
== uend
&& *s2
== 0;
1567 bool operator<(const UString
& s1
, const UString
& s2
)
1569 const int l1
= s1
.size();
1570 const int l2
= s2
.size();
1571 const int lmin
= l1
< l2
? l1
: l2
;
1572 const UChar
* c1
= s1
.data();
1573 const UChar
* c2
= s2
.data();
1575 while (l
< lmin
&& *c1
== *c2
) {
1581 return (c1
[0] < c2
[0]);
1586 bool operator>(const UString
& s1
, const UString
& s2
)
1588 const int l1
= s1
.size();
1589 const int l2
= s2
.size();
1590 const int lmin
= l1
< l2
? l1
: l2
;
1591 const UChar
* c1
= s1
.data();
1592 const UChar
* c2
= s2
.data();
1594 while (l
< lmin
&& *c1
== *c2
) {
1600 return (c1
[0] > c2
[0]);
1605 int compare(const UString
& s1
, const UString
& s2
)
1607 const int l1
= s1
.size();
1608 const int l2
= s2
.size();
1609 const int lmin
= l1
< l2
? l1
: l2
;
1610 const UChar
* c1
= s1
.data();
1611 const UChar
* c2
= s2
.data();
1613 while (l
< lmin
&& *c1
== *c2
) {
1620 return (c1
[0] > c2
[0]) ? 1 : -1;
1625 return (l1
> l2
) ? 1 : -1;
1628 bool equal(const UString::Rep
* r
, const UString::Rep
* b
)
1630 int length
= r
->len
;
1631 if (length
!= b
->len
)
1633 const UChar
* d
= r
->data();
1634 const UChar
* s
= b
->data();
1635 for (int i
= 0; i
!= length
; ++i
) {
1642 CString
UString::UTF8String(bool strict
) const
1644 // Allocate a buffer big enough to hold all the characters.
1645 const int length
= size();
1646 Vector
<char, 1024> buffer(length
* 3);
1648 // Convert to runs of 8-bit characters.
1649 char* p
= buffer
.data();
1650 const UChar
* d
= reinterpret_cast<const UChar
*>(&data()[0]);
1651 ConversionResult result
= convertUTF16ToUTF8(&d
, d
+ length
, &p
, p
+ buffer
.size(), strict
);
1652 if (result
!= conversionOK
)
1655 return CString(buffer
.data(), p
- buffer
.data());
1658 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1659 NEVER_INLINE
void UString::makeNull()
1661 m_rep
= &Rep::null();
1664 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1665 NEVER_INLINE
UString::Rep
* UString::nullRep()
1667 return &Rep::null();