2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2009 Google Inc. All rights reserved.
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
27 #include "JSGlobalObjectFunctions.h"
28 #include "Collector.h"
30 #include "Identifier.h"
31 #include "Operations.h"
38 #include <wtf/ASCIICType.h>
39 #include <wtf/Assertions.h>
40 #include <wtf/MathExtras.h>
41 #include <wtf/Vector.h>
42 #include <wtf/unicode/UTF8.h>
52 using namespace WTF::Unicode
;
55 // This can be tuned differently per platform by putting platform #ifs right here.
56 // If you don't define this macro at all, then copyChars will just call directly
58 #define USTRING_COPY_CHARS_INLINE_CUTOFF 20
62 extern const double NaN
;
63 extern const double Inf
;
65 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
66 static const int minLengthToShare
= 10;
68 static inline size_t overflowIndicator() { return std::numeric_limits
<size_t>::max(); }
69 static inline size_t maxUChars() { return std::numeric_limits
<size_t>::max() / sizeof(UChar
); }
71 static inline UChar
* allocChars(size_t length
)
74 if (length
> maxUChars())
76 return static_cast<UChar
*>(tryFastMalloc(sizeof(UChar
) * length
));
79 static inline UChar
* reallocChars(UChar
* buffer
, size_t length
)
82 if (length
> maxUChars())
84 return static_cast<UChar
*>(tryFastRealloc(buffer
, sizeof(UChar
) * length
));
87 static inline void copyChars(UChar
* destination
, const UChar
* source
, unsigned numCharacters
)
89 #ifdef USTRING_COPY_CHARS_INLINE_CUTOFF
90 if (numCharacters
<= USTRING_COPY_CHARS_INLINE_CUTOFF
) {
91 for (unsigned i
= 0; i
< numCharacters
; ++i
)
92 destination
[i
] = source
[i
];
96 memcpy(destination
, source
, numCharacters
* sizeof(UChar
));
99 COMPILE_ASSERT(sizeof(UChar
) == 2, uchar_is_2_bytes
);
101 CString::CString(const char* c
)
102 : m_length(strlen(c
))
103 , m_data(new char[m_length
+ 1])
105 memcpy(m_data
, c
, m_length
+ 1);
108 CString::CString(const char* c
, size_t length
)
110 , m_data(new char[length
+ 1])
112 memcpy(m_data
, c
, m_length
);
113 m_data
[m_length
] = 0;
116 CString::CString(const CString
& b
)
118 m_length
= b
.m_length
;
120 m_data
= new char[m_length
+ 1];
121 memcpy(m_data
, b
.m_data
, m_length
+ 1);
131 CString
CString::adopt(char* c
, size_t length
)
139 CString
& CString::append(const CString
& t
)
142 n
= new char[m_length
+ t
.m_length
+ 1];
144 memcpy(n
, m_data
, m_length
);
146 memcpy(n
+ m_length
, t
.m_data
, t
.m_length
);
147 m_length
+= t
.m_length
;
156 CString
& CString::operator=(const char* c
)
160 m_length
= strlen(c
);
161 m_data
= new char[m_length
+ 1];
162 memcpy(m_data
, c
, m_length
+ 1);
167 CString
& CString::operator=(const CString
& str
)
174 m_length
= str
.m_length
;
176 m_data
= new char[m_length
+ 1];
177 memcpy(m_data
, str
.m_data
, m_length
+ 1);
184 bool operator==(const CString
& c1
, const CString
& c2
)
186 size_t len
= c1
.size();
187 return len
== c2
.size() && (len
== 0 || memcmp(c1
.c_str(), c2
.c_str(), len
) == 0);
190 // These static strings are immutable, except for rc, whose initial value is chosen to
191 // reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
192 static UChar sharedEmptyChar
;
193 UString::BaseString
* UString::Rep::nullBaseString
;
194 UString::BaseString
* UString::Rep::emptyBaseString
;
195 UString
* UString::nullUString
;
197 static void initializeStaticBaseString(UString::BaseString
& base
)
199 base
.rc
= INT_MAX
/ 2;
200 base
.m_identifierTableAndFlags
.setFlag(UString::Rep::StaticFlag
);
201 base
.checkConsistency();
204 void initializeUString()
206 UString::Rep::nullBaseString
= new UString::BaseString(0, 0);
207 initializeStaticBaseString(*UString::Rep::nullBaseString
);
209 UString::Rep::emptyBaseString
= new UString::BaseString(&sharedEmptyChar
, 0);
210 initializeStaticBaseString(*UString::Rep::emptyBaseString
);
212 UString::nullUString
= new UString
;
215 static char* statBuffer
= 0; // Only used for debugging via UString::ascii().
217 PassRefPtr
<UString::Rep
> UString::Rep::createCopying(const UChar
* d
, int l
)
219 UChar
* copyD
= static_cast<UChar
*>(fastMalloc(l
* sizeof(UChar
)));
220 copyChars(copyD
, d
, l
);
221 return create(copyD
, l
);
224 PassRefPtr
<UString::Rep
> UString::Rep::createFromUTF8(const char* string
)
227 return &UString::Rep::null();
229 size_t length
= strlen(string
);
230 Vector
<UChar
, 1024> buffer(length
);
231 UChar
* p
= buffer
.data();
232 if (conversionOK
!= convertUTF8ToUTF16(&string
, string
+ length
, &p
, p
+ length
))
233 return &UString::Rep::null();
235 return UString::Rep::createCopying(buffer
.data(), p
- buffer
.data());
238 PassRefPtr
<UString::Rep
> UString::Rep::create(UChar
* string
, int length
, PassRefPtr
<UString::SharedUChar
> sharedBuffer
)
240 PassRefPtr
<UString::Rep
> rep
= create(string
, length
);
241 rep
->baseString()->setSharedBuffer(sharedBuffer
);
242 rep
->checkConsistency();
246 UString::SharedUChar
* UString::Rep::sharedBuffer()
248 UString::BaseString
* base
= baseString();
249 if (len
< minLengthToShare
)
252 return base
->sharedBuffer();
255 void UString::Rep::destroy()
259 // Static null and empty strings can never be destroyed, but we cannot rely on
260 // reference counting, because ref/deref are not thread-safe.
262 if (identifierTable())
263 Identifier::remove(this);
265 UString::BaseString
* base
= baseString();
268 m_sharedBuffer
->deref();
278 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
279 // or anything like that.
280 const unsigned PHI
= 0x9e3779b9U
;
282 // Paul Hsieh's SuperFastHash
283 // http://www.azillionmonkeys.com/qed/hash.html
284 unsigned UString::Rep::computeHash(const UChar
* s
, int len
)
296 tmp
= (s
[1] << 11) ^ hash
;
297 hash
= (hash
<< 16) ^ tmp
;
309 // Force "avalanching" of final 127 bits
316 // this avoids ever returning a hash code of 0, since that is used to
317 // signal "hash not computed yet", using a value that is likely to be
318 // effectively the same as 0 when the low bits are masked
325 // Paul Hsieh's SuperFastHash
326 // http://www.azillionmonkeys.com/qed/hash.html
327 unsigned UString::Rep::computeHash(const char* s
, int l
)
329 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
330 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
331 // were 16-bit chunks, which should give matching results
341 hash
+= static_cast<unsigned char>(s
[0]);
342 tmp
= (static_cast<unsigned char>(s
[1]) << 11) ^ hash
;
343 hash
= (hash
<< 16) ^ tmp
;
350 hash
+= static_cast<unsigned char>(s
[0]);
355 // Force "avalanching" of final 127 bits
362 // this avoids ever returning a hash code of 0, since that is used to
363 // signal "hash not computed yet", using a value that is likely to be
364 // effectively the same as 0 when the low bits are masked
372 void UString::Rep::checkConsistency() const
374 const UString::BaseString
* base
= baseString();
376 // There is no recursion for base strings.
377 ASSERT(base
== base
->baseString());
380 // There are only two static strings: null and empty.
383 // Static strings cannot get in identifier tables, because they are globally shared.
384 ASSERT(!identifierTable());
387 // The string fits in buffer.
388 ASSERT(base
->usedPreCapacity
<= base
->preCapacity
);
389 ASSERT(base
->usedCapacity
<= base
->capacity
);
390 ASSERT(-offset
<= base
->usedPreCapacity
);
391 ASSERT(offset
+ len
<= base
->usedCapacity
);
395 UString::SharedUChar
* UString::BaseString::sharedBuffer()
398 setSharedBuffer(SharedUChar::create(new OwnFastMallocPtr
<UChar
>(buf
)));
399 return m_sharedBuffer
;
402 void UString::BaseString::setSharedBuffer(PassRefPtr
<UString::SharedUChar
> sharedBuffer
)
404 // The manual steps below are because m_sharedBuffer can't be a RefPtr. m_sharedBuffer
405 // is in a union with another variable to avoid making BaseString any larger.
407 m_sharedBuffer
->deref();
408 m_sharedBuffer
= sharedBuffer
.releaseRef();
411 bool UString::BaseString::slowIsBufferReadOnly()
413 // The buffer may not be modified as soon as the underlying data has been shared with another class.
414 if (m_sharedBuffer
->isShared())
417 // At this point, we know it that the underlying buffer isn't shared outside of this base class,
418 // so get rid of m_sharedBuffer.
419 OwnPtr
<OwnFastMallocPtr
<UChar
> > mallocPtr(m_sharedBuffer
->release());
420 UChar
* unsharedBuf
= const_cast<UChar
*>(mallocPtr
->release());
422 preCapacity
+= (buf
- unsharedBuf
);
427 // Put these early so they can be inlined.
428 static inline size_t expandedSize(size_t capacitySize
, size_t precapacitySize
)
430 // Combine capacitySize & precapacitySize to produce a single size to allocate,
431 // check that doing so does not result in overflow.
432 size_t size
= capacitySize
+ precapacitySize
;
433 if (size
< capacitySize
)
434 return overflowIndicator();
436 // Small Strings (up to 4 pages):
437 // Expand the allocation size to 112.5% of the amount requested. This is largely sicking
438 // to our previous policy, however 112.5% is cheaper to calculate.
440 size_t expandedSize
= ((size
+ (size
>> 3)) | 15) + 1;
441 // Given the limited range within which we calculate the expansion in this
442 // fashion the above calculation should never overflow.
443 ASSERT(expandedSize
>= size
);
444 ASSERT(expandedSize
< maxUChars());
448 // Medium Strings (up to 128 pages):
449 // For pages covering multiple pages over-allocation is less of a concern - any unused
450 // space will not be paged in if it is not used, so this is purely a VM overhead. For
451 // these strings allocate 2x the requested size.
452 if (size
< 0x80000) {
453 size_t expandedSize
= ((size
+ size
) | 0xfff) + 1;
454 // Given the limited range within which we calculate the expansion in this
455 // fashion the above calculation should never overflow.
456 ASSERT(expandedSize
>= size
);
457 ASSERT(expandedSize
< maxUChars());
461 // Large Strings (to infinity and beyond!):
462 // Revert to our 112.5% policy - probably best to limit the amount of unused VM we allow
463 // any individual string be responsible for.
464 size_t expandedSize
= ((size
+ (size
>> 3)) | 0xfff) + 1;
466 // Check for overflow - any result that is at least as large as requested (but
467 // still below the limit) is okay.
468 if ((expandedSize
>= size
) && (expandedSize
< maxUChars()))
470 return overflowIndicator();
473 static inline bool expandCapacity(UString::Rep
* rep
, int requiredLength
)
475 rep
->checkConsistency();
476 ASSERT(!rep
->baseString()->isBufferReadOnly());
478 UString::BaseString
* base
= rep
->baseString();
480 if (requiredLength
> base
->capacity
) {
481 size_t newCapacity
= expandedSize(requiredLength
, base
->preCapacity
);
482 UChar
* oldBuf
= base
->buf
;
483 base
->buf
= reallocChars(base
->buf
, newCapacity
);
488 base
->capacity
= newCapacity
- base
->preCapacity
;
490 if (requiredLength
> base
->usedCapacity
)
491 base
->usedCapacity
= requiredLength
;
493 rep
->checkConsistency();
497 bool UString::Rep::reserveCapacity(int capacity
)
499 // If this is an empty string there is no point 'growing' it - just allocate a new one.
500 // If the BaseString is shared with another string that is using more capacity than this
501 // string is, then growing the buffer won't help.
502 // If the BaseString's buffer is readonly, then it isn't allowed to grow.
503 UString::BaseString
* base
= baseString();
504 if (!base
->buf
|| !base
->capacity
|| (offset
+ len
) != base
->usedCapacity
|| base
->isBufferReadOnly())
507 // If there is already sufficient capacity, no need to grow!
508 if (capacity
<= base
->capacity
)
513 size_t newCapacity
= expandedSize(capacity
, base
->preCapacity
);
514 UChar
* oldBuf
= base
->buf
;
515 base
->buf
= reallocChars(base
->buf
, newCapacity
);
520 base
->capacity
= newCapacity
- base
->preCapacity
;
526 void UString::expandCapacity(int requiredLength
)
528 if (!JSC::expandCapacity(m_rep
.get(), requiredLength
))
532 void UString::expandPreCapacity(int requiredPreCap
)
534 m_rep
->checkConsistency();
535 ASSERT(!m_rep
->baseString()->isBufferReadOnly());
537 BaseString
* base
= m_rep
->baseString();
539 if (requiredPreCap
> base
->preCapacity
) {
540 size_t newCapacity
= expandedSize(requiredPreCap
, base
->capacity
);
541 int delta
= newCapacity
- base
->capacity
- base
->preCapacity
;
543 UChar
* newBuf
= allocChars(newCapacity
);
548 copyChars(newBuf
+ delta
, base
->buf
, base
->capacity
+ base
->preCapacity
);
552 base
->preCapacity
= newCapacity
- base
->capacity
;
554 if (requiredPreCap
> base
->usedPreCapacity
)
555 base
->usedPreCapacity
= requiredPreCap
;
557 m_rep
->checkConsistency();
560 static PassRefPtr
<UString::Rep
> createRep(const char* c
)
563 return &UString::Rep::null();
566 return &UString::Rep::empty();
568 size_t length
= strlen(c
);
569 UChar
* d
= allocChars(length
);
571 return &UString::Rep::null();
573 for (size_t i
= 0; i
< length
; i
++)
574 d
[i
] = static_cast<unsigned char>(c
[i
]); // use unsigned char to zero-extend instead of sign-extend
575 return UString::Rep::create(d
, static_cast<int>(length
));
580 UString::UString(const char* c
)
581 : m_rep(createRep(c
))
585 UString::UString(const UChar
* c
, int length
)
588 m_rep
= &Rep::empty();
590 m_rep
= Rep::createCopying(c
, length
);
593 UString::UString(UChar
* c
, int length
, bool copy
)
596 m_rep
= &Rep::empty();
598 m_rep
= Rep::createCopying(c
, length
);
600 m_rep
= Rep::create(c
, length
);
603 UString::UString(const Vector
<UChar
>& buffer
)
606 m_rep
= &Rep::empty();
608 m_rep
= Rep::createCopying(buffer
.data(), buffer
.size());
611 static ALWAYS_INLINE
int newCapacityWithOverflowCheck(const int currentCapacity
, const int extendLength
, const bool plusOne
= false)
613 ASSERT_WITH_MESSAGE(extendLength
>= 0, "extendedLength = %d", extendLength
);
615 const int plusLength
= plusOne
? 1 : 0;
616 if (currentCapacity
> std::numeric_limits
<int>::max() - extendLength
- plusLength
)
619 return currentCapacity
+ extendLength
+ plusLength
;
622 static ALWAYS_INLINE PassRefPtr
<UString::Rep
> concatenate(PassRefPtr
<UString::Rep
> r
, const UChar
* tData
, int tSize
)
624 RefPtr
<UString::Rep
> rep
= r
;
626 rep
->checkConsistency();
628 int thisSize
= rep
->size();
629 int thisOffset
= rep
->offset
;
630 int length
= thisSize
+ tSize
;
631 UString::BaseString
* base
= rep
->baseString();
636 } else if (thisSize
== 0) {
638 rep
= UString::Rep::createCopying(tData
, tSize
);
639 } else if (rep
== base
&& !base
->isShared()) {
640 // this is direct and has refcount of 1 (so we can just alter it directly)
641 if (!expandCapacity(rep
.get(), newCapacityWithOverflowCheck(thisOffset
, length
)))
642 rep
= &UString::Rep::null();
644 copyChars(rep
->data() + thisSize
, tData
, tSize
);
648 } else if (thisOffset
+ thisSize
== base
->usedCapacity
&& thisSize
>= minShareSize
&& !base
->isBufferReadOnly()) {
649 // this reaches the end of the buffer - extend it if it's long enough to append to
650 if (!expandCapacity(rep
.get(), newCapacityWithOverflowCheck(thisOffset
, length
)))
651 rep
= &UString::Rep::null();
653 copyChars(rep
->data() + thisSize
, tData
, tSize
);
654 rep
= UString::Rep::create(rep
, 0, length
);
657 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
658 size_t newCapacity
= expandedSize(length
, 0);
659 UChar
* d
= allocChars(newCapacity
);
661 rep
= &UString::Rep::null();
663 copyChars(d
, rep
->data(), thisSize
);
664 copyChars(d
+ thisSize
, tData
, tSize
);
665 rep
= UString::Rep::create(d
, length
);
666 rep
->baseString()->capacity
= newCapacity
;
670 rep
->checkConsistency();
672 return rep
.release();
675 static ALWAYS_INLINE PassRefPtr
<UString::Rep
> concatenate(PassRefPtr
<UString::Rep
> r
, const char* t
)
677 RefPtr
<UString::Rep
> rep
= r
;
679 rep
->checkConsistency();
681 int thisSize
= rep
->size();
682 int thisOffset
= rep
->offset
;
683 int tSize
= static_cast<int>(strlen(t
));
684 int length
= thisSize
+ tSize
;
685 UString::BaseString
* base
= rep
->baseString();
691 } else if (tSize
== 0) {
692 // t is empty, we'll just return *this below.
693 } else if (rep
== base
&& !base
->isShared()) {
694 // this is direct and has refcount of 1 (so we can just alter it directly)
695 expandCapacity(rep
.get(), newCapacityWithOverflowCheck(thisOffset
, length
));
696 UChar
* d
= rep
->data();
698 for (int i
= 0; i
< tSize
; ++i
)
699 d
[thisSize
+ i
] = static_cast<unsigned char>(t
[i
]); // use unsigned char to zero-extend instead of sign-extend
703 } else if (thisOffset
+ thisSize
== base
->usedCapacity
&& thisSize
>= minShareSize
&& !base
->isBufferReadOnly()) {
704 // this string reaches the end of the buffer - extend it
705 expandCapacity(rep
.get(), newCapacityWithOverflowCheck(thisOffset
, length
));
706 UChar
* d
= rep
->data();
708 for (int i
= 0; i
< tSize
; ++i
)
709 d
[thisSize
+ i
] = static_cast<unsigned char>(t
[i
]); // use unsigned char to zero-extend instead of sign-extend
710 rep
= UString::Rep::create(rep
, 0, length
);
713 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
714 size_t newCapacity
= expandedSize(length
, 0);
715 UChar
* d
= allocChars(newCapacity
);
717 rep
= &UString::Rep::null();
719 copyChars(d
, rep
->data(), thisSize
);
720 for (int i
= 0; i
< tSize
; ++i
)
721 d
[thisSize
+ i
] = static_cast<unsigned char>(t
[i
]); // use unsigned char to zero-extend instead of sign-extend
722 rep
= UString::Rep::create(d
, length
);
723 rep
->baseString()->capacity
= newCapacity
;
727 rep
->checkConsistency();
729 return rep
.release();
732 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
* a
, UString::Rep
* b
)
734 a
->checkConsistency();
735 b
->checkConsistency();
737 int aSize
= a
->size();
738 int bSize
= b
->size();
739 int aOffset
= a
->offset
;
743 UString::BaseString
* aBase
= a
->baseString();
744 if (bSize
== 1 && aOffset
+ aSize
== aBase
->usedCapacity
&& aOffset
+ aSize
< aBase
->capacity
&& !aBase
->isBufferReadOnly()) {
745 // b is a single character (common fast case)
746 ++aBase
->usedCapacity
;
747 a
->data()[aSize
] = b
->data()[0];
748 return UString::Rep::create(a
, 0, aSize
+ 1);
758 int bOffset
= b
->offset
;
759 int length
= aSize
+ bSize
;
761 UString::BaseString
* bBase
= b
->baseString();
762 if (aOffset
+ aSize
== aBase
->usedCapacity
&& aSize
>= minShareSize
&& 4 * aSize
>= bSize
763 && (-bOffset
!= bBase
->usedPreCapacity
|| aSize
>= bSize
) && !aBase
->isBufferReadOnly()) {
764 // - a reaches the end of its buffer so it qualifies for shared append
765 // - also, it's at least a quarter the length of b - appending to a much shorter
766 // string does more harm than good
767 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
770 x
.expandCapacity(newCapacityWithOverflowCheck(aOffset
, length
));
771 if (!a
->data() || !x
.data())
773 copyChars(a
->data() + aSize
, b
->data(), bSize
);
774 PassRefPtr
<UString::Rep
> result
= UString::Rep::create(a
, 0, length
);
776 a
->checkConsistency();
777 b
->checkConsistency();
778 result
->checkConsistency();
783 if (-bOffset
== bBase
->usedPreCapacity
&& bSize
>= minShareSize
&& 4 * bSize
>= aSize
&& !bBase
->isBufferReadOnly()) {
784 // - b reaches the beginning of its buffer so it qualifies for shared prepend
785 // - also, it's at least a quarter the length of a - prepending to a much shorter
786 // string does more harm than good
788 y
.expandPreCapacity(-bOffset
+ aSize
);
789 if (!b
->data() || !y
.data())
791 copyChars(b
->data() - aSize
, a
->data(), aSize
);
792 PassRefPtr
<UString::Rep
> result
= UString::Rep::create(b
, -aSize
, length
);
794 a
->checkConsistency();
795 b
->checkConsistency();
796 result
->checkConsistency();
801 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
802 size_t newCapacity
= expandedSize(length
, 0);
803 UChar
* d
= allocChars(newCapacity
);
806 copyChars(d
, a
->data(), aSize
);
807 copyChars(d
+ aSize
, b
->data(), bSize
);
808 PassRefPtr
<UString::Rep
> result
= UString::Rep::create(d
, length
);
809 result
->baseString()->capacity
= newCapacity
;
811 a
->checkConsistency();
812 b
->checkConsistency();
813 result
->checkConsistency();
818 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
* rep
, int i
)
820 UChar buf
[1 + sizeof(i
) * 3];
821 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
826 else if (i
== INT_MIN
) {
827 char minBuf
[1 + sizeof(i
) * 3];
828 sprintf(minBuf
, "%d", INT_MIN
);
829 return concatenate(rep
, minBuf
);
831 bool negative
= false;
837 *--p
= static_cast<unsigned short>((i
% 10) + '0');
844 return concatenate(rep
, p
, static_cast<int>(end
- p
));
848 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
* rep
, double d
)
850 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
852 return concatenate(rep
, "NaN");
854 if (d
== 0.0) // stringify -0 as 0
862 WTF::dtoa(result
, d
, 0, &decimalPoint
, &sign
, NULL
);
863 int length
= static_cast<int>(strlen(result
));
869 if (decimalPoint
<= 0 && decimalPoint
> -6) {
872 for (int j
= decimalPoint
; j
< 0; j
++)
874 strcpy(buf
+ i
, result
);
875 } else if (decimalPoint
<= 21 && decimalPoint
> 0) {
876 if (length
<= decimalPoint
) {
877 strcpy(buf
+ i
, result
);
879 for (int j
= 0; j
< decimalPoint
- length
; j
++)
883 strncpy(buf
+ i
, result
, decimalPoint
);
886 strcpy(buf
+ i
, result
+ decimalPoint
);
888 } else if (result
[0] < '0' || result
[0] > '9')
889 strcpy(buf
+ i
, result
);
891 buf
[i
++] = result
[0];
894 strcpy(buf
+ i
, result
+ 1);
899 buf
[i
++] = (decimalPoint
>= 0) ? '+' : '-';
900 // decimalPoint can't be more than 3 digits decimal given the
901 // nature of float representation
902 int exponential
= decimalPoint
- 1;
904 exponential
= -exponential
;
905 if (exponential
>= 100)
906 buf
[i
++] = static_cast<char>('0' + exponential
/ 100);
907 if (exponential
>= 10)
908 buf
[i
++] = static_cast<char>('0' + (exponential
% 100) / 10);
909 buf
[i
++] = static_cast<char>('0' + exponential
% 10);
913 return concatenate(rep
, buf
);
916 UString
UString::from(int i
)
918 UChar buf
[1 + sizeof(i
) * 3];
919 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
924 else if (i
== INT_MIN
) {
925 char minBuf
[1 + sizeof(i
) * 3];
926 snprintf(minBuf
, 1 + sizeof(i
) * 3, "%d", INT_MIN
);
927 return UString(minBuf
);
929 bool negative
= false;
935 *--p
= static_cast<unsigned short>((i
% 10) + '0');
942 return UString(p
, static_cast<int>(end
- p
));
945 UString
UString::from(unsigned int u
)
947 UChar buf
[sizeof(u
) * 3];
948 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
955 *--p
= static_cast<unsigned short>((u
% 10) + '0');
960 return UString(p
, static_cast<int>(end
- p
));
963 UString
UString::from(long l
)
965 UChar buf
[1 + sizeof(l
) * 3];
966 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
971 else if (l
== LONG_MIN
) {
972 char minBuf
[1 + sizeof(l
) * 3];
973 snprintf(minBuf
, 1 + sizeof(l
) * 3, "%ld", LONG_MIN
);
974 return UString(minBuf
);
976 bool negative
= false;
982 *--p
= static_cast<unsigned short>((l
% 10) + '0');
989 return UString(p
, static_cast<int>(end
- p
));
992 UString
UString::from(double d
)
994 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
1003 WTF::dtoa(result
, d
, 0, &decimalPoint
, &sign
, NULL
);
1004 int length
= static_cast<int>(strlen(result
));
1010 if (decimalPoint
<= 0 && decimalPoint
> -6) {
1013 for (int j
= decimalPoint
; j
< 0; j
++)
1015 strlcpy(buf
+ i
, result
, sizeof(buf
) - i
);
1016 } else if (decimalPoint
<= 21 && decimalPoint
> 0) {
1017 if (length
<= decimalPoint
) {
1018 strlcpy(buf
+ i
, result
, sizeof(buf
) - i
);
1020 for (int j
= 0; j
< decimalPoint
- length
; j
++)
1024 int len
= (decimalPoint
<= static_cast<int>(sizeof(buf
)) - i
? decimalPoint
: sizeof(buf
) - i
);
1025 strncpy(buf
+ i
, result
, len
);
1028 strlcpy(buf
+ i
, result
+ decimalPoint
, sizeof(buf
) - i
);
1030 } else if (result
[0] < '0' || result
[0] > '9')
1031 strlcpy(buf
+ i
, result
, sizeof(buf
) - i
);
1033 buf
[i
++] = result
[0];
1036 strlcpy(buf
+ i
, result
+ 1, sizeof(buf
) - i
);
1041 buf
[i
++] = (decimalPoint
>= 0) ? '+' : '-';
1042 // decimalPoint can't be more than 3 digits decimal given the
1043 // nature of float representation
1044 int exponential
= decimalPoint
- 1;
1045 if (exponential
< 0)
1046 exponential
= -exponential
;
1047 if (exponential
>= 100)
1048 buf
[i
++] = static_cast<char>('0' + exponential
/ 100);
1049 if (exponential
>= 10)
1050 buf
[i
++] = static_cast<char>('0' + (exponential
% 100) / 10);
1051 buf
[i
++] = static_cast<char>('0' + exponential
% 10);
1053 ASSERT(i
<= static_cast<int>(sizeof(buf
)));
1056 return UString(buf
);
1059 UString
UString::spliceSubstringsWithSeparators(const Range
* substringRanges
, int rangeCount
, const UString
* separators
, int separatorCount
) const
1061 m_rep
->checkConsistency();
1063 if (rangeCount
== 1 && separatorCount
== 0) {
1064 int thisSize
= size();
1065 int position
= substringRanges
[0].position
;
1066 int length
= substringRanges
[0].length
;
1067 if (position
<= 0 && length
>= thisSize
)
1069 return UString::Rep::create(m_rep
, max(0, position
), min(thisSize
, length
));
1072 int totalLength
= 0;
1073 for (int i
= 0; i
< rangeCount
; i
++)
1074 totalLength
+= substringRanges
[i
].length
;
1075 for (int i
= 0; i
< separatorCount
; i
++)
1076 totalLength
+= separators
[i
].size();
1078 if (totalLength
== 0)
1081 UChar
* buffer
= allocChars(totalLength
);
1085 int maxCount
= max(rangeCount
, separatorCount
);
1087 for (int i
= 0; i
< maxCount
; i
++) {
1088 if (i
< rangeCount
) {
1089 copyChars(buffer
+ bufferPos
, data() + substringRanges
[i
].position
, substringRanges
[i
].length
);
1090 bufferPos
+= substringRanges
[i
].length
;
1092 if (i
< separatorCount
) {
1093 copyChars(buffer
+ bufferPos
, separators
[i
].data(), separators
[i
].size());
1094 bufferPos
+= separators
[i
].size();
1098 return UString::Rep::create(buffer
, totalLength
);
1101 UString
UString::replaceRange(int rangeStart
, int rangeLength
, const UString
& replacement
) const
1103 m_rep
->checkConsistency();
1105 int replacementLength
= replacement
.size();
1106 int totalLength
= size() - rangeLength
+ replacementLength
;
1107 if (totalLength
== 0)
1110 UChar
* buffer
= allocChars(totalLength
);
1114 copyChars(buffer
, data(), rangeStart
);
1115 copyChars(buffer
+ rangeStart
, replacement
.data(), replacementLength
);
1116 int rangeEnd
= rangeStart
+ rangeLength
;
1117 copyChars(buffer
+ rangeStart
+ replacementLength
, data() + rangeEnd
, size() - rangeEnd
);
1119 return UString::Rep::create(buffer
, totalLength
);
1123 UString
& UString::append(const UString
&t
)
1125 m_rep
->checkConsistency();
1126 t
.rep()->checkConsistency();
1128 int thisSize
= size();
1129 int thisOffset
= m_rep
->offset
;
1130 int tSize
= t
.size();
1131 int length
= thisSize
+ tSize
;
1132 BaseString
* base
= m_rep
->baseString();
1135 if (thisSize
== 0) {
1138 } else if (tSize
== 0) {
1140 } else if (m_rep
== base
&& !base
->isShared()) {
1141 // this is direct and has refcount of 1 (so we can just alter it directly)
1142 expandCapacity(newCapacityWithOverflowCheck(thisOffset
, length
));
1144 copyChars(m_rep
->data() + thisSize
, t
.data(), tSize
);
1145 m_rep
->len
= length
;
1148 } else if (thisOffset
+ thisSize
== base
->usedCapacity
&& thisSize
>= minShareSize
&& !base
->isBufferReadOnly()) {
1149 // this reaches the end of the buffer - extend it if it's long enough to append to
1150 expandCapacity(newCapacityWithOverflowCheck(thisOffset
, length
));
1152 copyChars(m_rep
->data() + thisSize
, t
.data(), tSize
);
1153 m_rep
= Rep::create(m_rep
, 0, length
);
1156 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
1157 size_t newCapacity
= expandedSize(length
, 0);
1158 UChar
* d
= allocChars(newCapacity
);
1162 copyChars(d
, data(), thisSize
);
1163 copyChars(d
+ thisSize
, t
.data(), tSize
);
1164 m_rep
= Rep::create(d
, length
);
1165 m_rep
->baseString()->capacity
= newCapacity
;
1169 m_rep
->checkConsistency();
1170 t
.rep()->checkConsistency();
1175 UString
& UString::append(const UChar
* tData
, int tSize
)
1177 m_rep
= concatenate(m_rep
.release(), tData
, tSize
);
1181 UString
& UString::appendNumeric(int i
)
1183 m_rep
= concatenate(rep(), i
);
1187 UString
& UString::appendNumeric(double d
)
1189 m_rep
= concatenate(rep(), d
);
1193 UString
& UString::append(const char* t
)
1195 m_rep
= concatenate(m_rep
.release(), t
);
1199 UString
& UString::append(UChar c
)
1201 m_rep
->checkConsistency();
1203 int thisOffset
= m_rep
->offset
;
1204 int length
= size();
1205 BaseString
* base
= m_rep
->baseString();
1209 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
1210 size_t newCapacity
= expandedSize(1, 0);
1211 UChar
* d
= allocChars(newCapacity
);
1216 m_rep
= Rep::create(d
, 1);
1217 m_rep
->baseString()->capacity
= newCapacity
;
1219 } else if (m_rep
== base
&& !base
->isShared()) {
1220 // this is direct and has refcount of 1 (so we can just alter it directly)
1221 expandCapacity(newCapacityWithOverflowCheck(thisOffset
, length
, true));
1222 UChar
* d
= m_rep
->data();
1225 m_rep
->len
= length
+ 1;
1228 } else if (thisOffset
+ length
== base
->usedCapacity
&& length
>= minShareSize
&& !base
->isBufferReadOnly()) {
1229 // this reaches the end of the string - extend it and share
1230 expandCapacity(newCapacityWithOverflowCheck(thisOffset
, length
, true));
1231 UChar
* d
= m_rep
->data();
1234 m_rep
= Rep::create(m_rep
, 0, length
+ 1);
1237 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
1238 size_t newCapacity
= expandedSize(length
+ 1, 0);
1239 UChar
* d
= allocChars(newCapacity
);
1243 copyChars(d
, data(), length
);
1245 m_rep
= Rep::create(d
, length
+ 1);
1246 m_rep
->baseString()->capacity
= newCapacity
;
1250 m_rep
->checkConsistency();
1255 bool UString::getCString(CStringBuffer
& buffer
) const
1257 int length
= size();
1258 int neededSize
= length
+ 1;
1259 buffer
.resize(neededSize
);
1260 char* buf
= buffer
.data();
1263 const UChar
* p
= data();
1265 const UChar
* limit
= p
+ length
;
1266 while (p
!= limit
) {
1269 *q
= static_cast<char>(c
);
1275 return !(ored
& 0xFF00);
1278 char* UString::ascii() const
1280 int length
= size();
1281 int neededSize
= length
+ 1;
1282 delete[] statBuffer
;
1283 statBuffer
= new char[neededSize
];
1285 const UChar
* p
= data();
1286 char* q
= statBuffer
;
1287 const UChar
* limit
= p
+ length
;
1288 while (p
!= limit
) {
1289 *q
= static_cast<char>(p
[0]);
1298 UString
& UString::operator=(const char* c
)
1301 m_rep
= &Rep::null();
1306 m_rep
= &Rep::empty();
1310 int l
= static_cast<int>(strlen(c
));
1312 BaseString
* base
= m_rep
->baseString();
1313 if (!base
->isShared() && l
<= base
->capacity
&& m_rep
== base
&& m_rep
->offset
== 0 && base
->preCapacity
== 0) {
1323 m_rep
= Rep::create(d
, l
);
1325 for (int i
= 0; i
< l
; i
++)
1326 d
[i
] = static_cast<unsigned char>(c
[i
]); // use unsigned char to zero-extend instead of sign-extend
1331 bool UString::is8Bit() const
1333 const UChar
* u
= data();
1334 const UChar
* limit
= u
+ size();
1344 UChar
UString::operator[](int pos
) const
1351 double UString::toDouble(bool tolerateTrailingJunk
, bool tolerateEmptyString
) const
1354 UChar c
= data()[0];
1355 if (isASCIIDigit(c
))
1357 if (isASCIISpace(c
) && tolerateEmptyString
)
1362 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
1363 // after the number, so this is too strict a check.
1367 const char* c
= s
.data();
1369 // skip leading white space
1370 while (isASCIISpace(*c
))
1375 return tolerateEmptyString
? 0.0 : NaN
;
1380 if (*c
== '0' && (*(c
+ 1) == 'x' || *(c
+ 1) == 'X')) {
1381 const char* firstDigitPosition
= c
+ 2;
1385 if (*c
>= '0' && *c
<= '9')
1386 d
= d
* 16.0 + *c
- '0';
1387 else if ((*c
>= 'A' && *c
<= 'F') || (*c
>= 'a' && *c
<= 'f'))
1388 d
= d
* 16.0 + (*c
& 0xdf) - 'A' + 10.0;
1393 if (d
>= mantissaOverflowLowerBound
)
1394 d
= parseIntOverflow(firstDigitPosition
, c
- firstDigitPosition
, 16);
1398 d
= WTF::strtod(c
, &end
);
1399 if ((d
!= 0.0 || end
!= c
) && d
!= Inf
&& d
!= -Inf
) {
1406 else if (*c
== '-') {
1411 // We used strtod() to do the conversion. However, strtod() handles
1412 // infinite values slightly differently than JavaScript in that it
1413 // converts the string "inf" with any capitalization to infinity,
1414 // whereas the ECMA spec requires that it be converted to NaN.
1416 if (c
[0] == 'I' && c
[1] == 'n' && c
[2] == 'f' && c
[3] == 'i' && c
[4] == 'n' && c
[5] == 'i' && c
[6] == 't' && c
[7] == 'y') {
1419 } else if ((d
== Inf
|| d
== -Inf
) && *c
!= 'I' && *c
!= 'i')
1426 // allow trailing white space
1427 while (isASCIISpace(*c
))
1429 // don't allow anything after - unless tolerant=true
1430 if (!tolerateTrailingJunk
&& *c
!= '\0')
1436 double UString::toDouble(bool tolerateTrailingJunk
) const
1438 return toDouble(tolerateTrailingJunk
, true);
1441 double UString::toDouble() const
1443 return toDouble(false, true);
1446 uint32_t UString::toUInt32(bool* ok
) const
1448 double d
= toDouble();
1451 if (d
!= static_cast<uint32_t>(d
)) {
1459 return static_cast<uint32_t>(d
);
1462 uint32_t UString::toUInt32(bool* ok
, bool tolerateEmptyString
) const
1464 double d
= toDouble(false, tolerateEmptyString
);
1467 if (d
!= static_cast<uint32_t>(d
)) {
1475 return static_cast<uint32_t>(d
);
1478 uint32_t UString::toStrictUInt32(bool* ok
) const
1483 // Empty string is not OK.
1484 int len
= m_rep
->len
;
1487 const UChar
* p
= m_rep
->data();
1488 unsigned short c
= p
[0];
1490 // If the first digit is 0, only 0 itself is OK.
1497 // Convert to UInt32, checking for overflow.
1500 // Process character, turning it into a digit.
1501 if (c
< '0' || c
> '9')
1503 const unsigned d
= c
- '0';
1505 // Multiply by 10, checking for overflow out of 32 bits.
1506 if (i
> 0xFFFFFFFFU
/ 10)
1510 // Add in the digit, checking for overflow out of 32 bits.
1511 const unsigned max
= 0xFFFFFFFFU
- d
;
1516 // Handle end of string.
1523 // Get next character.
1528 int UString::find(const UString
& f
, int pos
) const
1537 const UChar
* end
= data() + size();
1538 for (const UChar
* c
= data() + pos
; c
< end
; c
++) {
1540 return static_cast<int>(c
- data());
1550 const UChar
* end
= data() + sz
- fsz
;
1551 int fsizeminusone
= (fsz
- 1) * sizeof(UChar
);
1552 const UChar
* fdata
= f
.data();
1553 unsigned short fchar
= fdata
[0];
1555 for (const UChar
* c
= data() + pos
; c
<= end
; c
++) {
1556 if (c
[0] == fchar
&& !memcmp(c
+ 1, fdata
, fsizeminusone
))
1557 return static_cast<int>(c
- data());
1563 int UString::find(UChar ch
, int pos
) const
1567 const UChar
* end
= data() + size();
1568 for (const UChar
* c
= data() + pos
; c
< end
; c
++) {
1570 return static_cast<int>(c
- data());
1576 int UString::rfind(const UString
& f
, int pos
) const
1588 int fsizeminusone
= (fsz
- 1) * sizeof(UChar
);
1589 const UChar
* fdata
= f
.data();
1590 for (const UChar
* c
= data() + pos
; c
>= data(); c
--) {
1591 if (*c
== *fdata
&& !memcmp(c
+ 1, fdata
+ 1, fsizeminusone
))
1592 return static_cast<int>(c
- data());
1598 int UString::rfind(UChar ch
, int pos
) const
1602 if (pos
+ 1 >= size())
1604 for (const UChar
* c
= data() + pos
; c
>= data(); c
--) {
1606 return static_cast<int>(c
- data());
1612 UString
UString::substr(int pos
, int len
) const
1625 if (pos
== 0 && len
== s
)
1628 return UString(Rep::create(m_rep
, pos
, len
));
1631 bool operator==(const UString
& s1
, const char *s2
)
1634 return s1
.isEmpty();
1636 const UChar
* u
= s1
.data();
1637 const UChar
* uend
= u
+ s1
.size();
1638 while (u
!= uend
&& *s2
) {
1639 if (u
[0] != (unsigned char)*s2
)
1645 return u
== uend
&& *s2
== 0;
1648 bool operator<(const UString
& s1
, const UString
& s2
)
1650 const int l1
= s1
.size();
1651 const int l2
= s2
.size();
1652 const int lmin
= l1
< l2
? l1
: l2
;
1653 const UChar
* c1
= s1
.data();
1654 const UChar
* c2
= s2
.data();
1656 while (l
< lmin
&& *c1
== *c2
) {
1662 return (c1
[0] < c2
[0]);
1667 bool operator>(const UString
& s1
, const UString
& s2
)
1669 const int l1
= s1
.size();
1670 const int l2
= s2
.size();
1671 const int lmin
= l1
< l2
? l1
: l2
;
1672 const UChar
* c1
= s1
.data();
1673 const UChar
* c2
= s2
.data();
1675 while (l
< lmin
&& *c1
== *c2
) {
1681 return (c1
[0] > c2
[0]);
1686 int compare(const UString
& s1
, const UString
& s2
)
1688 const int l1
= s1
.size();
1689 const int l2
= s2
.size();
1690 const int lmin
= l1
< l2
? l1
: l2
;
1691 const UChar
* c1
= s1
.data();
1692 const UChar
* c2
= s2
.data();
1694 while (l
< lmin
&& *c1
== *c2
) {
1701 return (c1
[0] > c2
[0]) ? 1 : -1;
1706 return (l1
> l2
) ? 1 : -1;
1709 bool equal(const UString::Rep
* r
, const UString::Rep
* b
)
1711 int length
= r
->len
;
1712 if (length
!= b
->len
)
1714 const UChar
* d
= r
->data();
1715 const UChar
* s
= b
->data();
1716 for (int i
= 0; i
!= length
; ++i
) {
1723 CString
UString::UTF8String(bool strict
) const
1725 // Allocate a buffer big enough to hold all the characters.
1726 const int length
= size();
1727 Vector
<char, 1024> buffer(length
* 3);
1729 // Convert to runs of 8-bit characters.
1730 char* p
= buffer
.data();
1731 const UChar
* d
= reinterpret_cast<const UChar
*>(&data()[0]);
1732 ConversionResult result
= convertUTF16ToUTF8(&d
, d
+ length
, &p
, p
+ buffer
.size(), strict
);
1733 if (result
!= conversionOK
)
1736 return CString(buffer
.data(), p
- buffer
.data());
1739 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1740 NEVER_INLINE
void UString::makeNull()
1742 m_rep
= &Rep::null();
1745 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1746 NEVER_INLINE
UString::Rep
* UString::nullRep()
1748 return &Rep::null();