1 // -*- c-basic-offset: 2 -*-
3 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
4 * Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
5 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
28 #include "collector.h"
31 #include "identifier.h"
32 #include "operations.h"
39 #include <wtf/Assertions.h>
40 #include <wtf/ASCIICType.h>
41 #include <wtf/MathExtras.h>
42 #include <wtf/Vector.h>
43 #include <wtf/unicode/UTF8.h>
53 using namespace WTF::Unicode
;
58 extern const double NaN
;
59 extern const double Inf
;
61 static inline const size_t overflowIndicator() { return std::numeric_limits
<size_t>::max(); }
62 static inline const size_t maxUChars() { return std::numeric_limits
<size_t>::max() / sizeof(UChar
); }
64 static inline UChar
* allocChars(size_t length
)
67 if (length
> maxUChars())
69 return static_cast<UChar
*>(fastMalloc(sizeof(UChar
) * length
));
72 static inline UChar
* reallocChars(UChar
* buffer
, size_t length
)
75 if (length
> maxUChars())
77 return static_cast<UChar
*>(fastRealloc(buffer
, sizeof(UChar
) * length
));
80 COMPILE_ASSERT(sizeof(UChar
) == 2, uchar_is_2_bytes
)
82 CString::CString(const char *c
)
85 data
= new char[length
+1];
86 memcpy(data
, c
, length
+ 1);
89 CString::CString(const char *c
, size_t len
)
92 data
= new char[len
+1];
97 CString::CString(const CString
&b
)
101 data
= new char[length
+1];
102 memcpy(data
, b
.data
, length
+ 1);
113 CString
&CString::append(const CString
&t
)
116 n
= new char[length
+t
.length
+1];
118 memcpy(n
, data
, length
);
120 memcpy(n
+length
, t
.data
, t
.length
);
130 CString
&CString::operator=(const char *c
)
135 data
= new char[length
+1];
136 memcpy(data
, c
, length
+ 1);
141 CString
&CString::operator=(const CString
&str
)
150 data
= new char[length
+ 1];
151 memcpy(data
, str
.data
, length
+ 1);
159 bool operator==(const CString
& c1
, const CString
& c2
)
161 size_t len
= c1
.size();
162 return len
== c2
.size() && (len
== 0 || memcmp(c1
.c_str(), c2
.c_str(), len
) == 0);
165 // Hack here to avoid a global with a constructor; point to an unsigned short instead of a UChar.
166 static unsigned short almostUChar
;
167 UString::Rep
UString::Rep::null
= { 0, 0, 1, 0, 0, &UString::Rep::null
, 0, 0, 0, 0, 0, 0 };
168 UString::Rep
UString::Rep::empty
= { 0, 0, 1, 0, 0, &UString::Rep::empty
, 0, reinterpret_cast<UChar
*>(&almostUChar
), 0, 0, 0, 0 };
169 const int normalStatBufferSize
= 4096;
170 static char *statBuffer
= 0; // FIXME: This buffer is never deallocated.
171 static int statBufferSize
= 0;
173 PassRefPtr
<UString::Rep
> UString::Rep::createCopying(const UChar
*d
, int l
)
175 ASSERT(JSLock::lockCount() > 0);
177 int sizeInBytes
= l
* sizeof(UChar
);
178 UChar
*copyD
= static_cast<UChar
*>(fastMalloc(sizeInBytes
));
179 memcpy(copyD
, d
, sizeInBytes
);
181 return create(copyD
, l
);
184 PassRefPtr
<UString::Rep
> UString::Rep::create(UChar
*d
, int l
)
186 ASSERT(JSLock::lockCount() > 0);
199 r
->usedPreCapacity
= 0;
202 // steal the single reference this Rep was created with
206 PassRefPtr
<UString::Rep
> UString::Rep::create(PassRefPtr
<Rep
> base
, int offset
, int length
)
208 ASSERT(JSLock::lockCount() > 0);
211 int baseOffset
= base
->offset
;
213 base
= base
->baseString
;
215 ASSERT(-(offset
+ baseOffset
) <= base
->usedPreCapacity
);
216 ASSERT(offset
+ baseOffset
+ length
<= base
->usedCapacity
);
219 r
->offset
= baseOffset
+ offset
;
224 r
->baseString
= base
.releaseRef();
229 r
->usedPreCapacity
= 0;
232 // steal the single reference this Rep was created with
236 void UString::Rep::destroy()
238 ASSERT(JSLock::lockCount() > 0);
241 Identifier::remove(this);
242 if (baseString
!= this) {
250 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
251 // or anything like that.
252 const unsigned PHI
= 0x9e3779b9U
;
254 // Paul Hsieh's SuperFastHash
255 // http://www.azillionmonkeys.com/qed/hash.html
256 unsigned UString::Rep::computeHash(const UChar
*s
, int len
)
268 tmp
= (s
[1].uc
<< 11) ^ hash
;
269 hash
= (hash
<< 16) ^ tmp
;
281 // Force "avalanching" of final 127 bits
288 // this avoids ever returning a hash code of 0, since that is used to
289 // signal "hash not computed yet", using a value that is likely to be
290 // effectively the same as 0 when the low bits are masked
297 // Paul Hsieh's SuperFastHash
298 // http://www.azillionmonkeys.com/qed/hash.html
299 unsigned UString::Rep::computeHash(const char *s
)
301 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
302 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
303 // were 16-bit chunks, which should give matching results
307 size_t l
= strlen(s
);
314 hash
+= (unsigned char)s
[0];
315 tmp
= ((unsigned char)s
[1] << 11) ^ hash
;
316 hash
= (hash
<< 16) ^ tmp
;
323 hash
+= (unsigned char)s
[0];
328 // Force "avalanching" of final 127 bits
335 // this avoids ever returning a hash code of 0, since that is used to
336 // signal "hash not computed yet", using a value that is likely to be
337 // effectively the same as 0 when the low bits are masked
344 // put these early so they can be inlined
345 inline size_t UString::expandedSize(size_t size
, size_t otherSize
) const
347 // Do the size calculation in two parts, returning overflowIndicator if
348 // we overflow the maximum value that we can handle.
350 if (size
> maxUChars())
351 return overflowIndicator();
353 size_t expandedSize
= ((size
+ 10) / 10 * 11) + 1;
354 if (maxUChars() - expandedSize
< otherSize
)
355 return overflowIndicator();
357 return expandedSize
+ otherSize
;
360 inline int UString::usedCapacity() const
362 return m_rep
->baseString
->usedCapacity
;
365 inline int UString::usedPreCapacity() const
367 return m_rep
->baseString
->usedPreCapacity
;
370 void UString::expandCapacity(int requiredLength
)
372 Rep
* r
= m_rep
->baseString
;
374 if (requiredLength
> r
->capacity
) {
375 size_t newCapacity
= expandedSize(requiredLength
, r
->preCapacity
);
376 UChar
* oldBuf
= r
->buf
;
377 r
->buf
= reallocChars(r
->buf
, newCapacity
);
383 r
->capacity
= newCapacity
- r
->preCapacity
;
385 if (requiredLength
> r
->usedCapacity
) {
386 r
->usedCapacity
= requiredLength
;
390 void UString::expandPreCapacity(int requiredPreCap
)
392 Rep
* r
= m_rep
->baseString
;
394 if (requiredPreCap
> r
->preCapacity
) {
395 size_t newCapacity
= expandedSize(requiredPreCap
, r
->capacity
);
396 int delta
= newCapacity
- r
->capacity
- r
->preCapacity
;
398 UChar
* newBuf
= allocChars(newCapacity
);
403 memcpy(newBuf
+ delta
, r
->buf
, (r
->capacity
+ r
->preCapacity
) * sizeof(UChar
));
407 r
->preCapacity
= newCapacity
- r
->capacity
;
409 if (requiredPreCap
> r
->usedPreCapacity
) {
410 r
->usedPreCapacity
= requiredPreCap
;
414 UString::UString(const char *c
)
426 size_t length
= strlen(c
);
427 UChar
*d
= allocChars(length
);
431 for (size_t i
= 0; i
< length
; i
++)
433 m_rep
= Rep::create(d
, static_cast<int>(length
));
437 UString::UString(const UChar
*c
, int length
)
442 m_rep
= Rep::createCopying(c
, length
);
445 UString::UString(UChar
*c
, int length
, bool copy
)
450 m_rep
= Rep::createCopying(c
, length
);
452 m_rep
= Rep::create(c
, length
);
455 UString::UString(const Vector
<UChar
>& buffer
)
460 m_rep
= Rep::createCopying(buffer
.data(), buffer
.size());
464 UString::UString(const UString
&a
, const UString
&b
)
466 int aSize
= a
.size();
467 int aOffset
= a
.m_rep
->offset
;
468 int bSize
= b
.size();
469 int bOffset
= b
.m_rep
->offset
;
470 int length
= aSize
+ bSize
;
477 } else if (bSize
== 0) {
480 } else if (aOffset
+ aSize
== a
.usedCapacity() && aSize
>= minShareSize
&& 4 * aSize
>= bSize
&&
481 (-bOffset
!= b
.usedPreCapacity() || aSize
>= bSize
)) {
482 // - a reaches the end of its buffer so it qualifies for shared append
483 // - also, it's at least a quarter the length of b - appending to a much shorter
484 // string does more harm than good
485 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
487 x
.expandCapacity(aOffset
+ length
);
488 if (a
.data() && x
.data()) {
489 memcpy(const_cast<UChar
*>(a
.data() + aSize
), b
.data(), bSize
* sizeof(UChar
));
490 m_rep
= Rep::create(a
.m_rep
, 0, length
);
493 } else if (-bOffset
== b
.usedPreCapacity() && bSize
>= minShareSize
&& 4 * bSize
>= aSize
) {
494 // - b reaches the beginning of its buffer so it qualifies for shared prepend
495 // - also, it's at least a quarter the length of a - prepending to a much shorter
496 // string does more harm than good
498 y
.expandPreCapacity(-bOffset
+ aSize
);
499 if (b
.data() && y
.data()) {
500 memcpy(const_cast<UChar
*>(b
.data() - aSize
), a
.data(), aSize
* sizeof(UChar
));
501 m_rep
= Rep::create(b
.m_rep
, -aSize
, length
);
505 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
506 size_t newCapacity
= expandedSize(length
, 0);
507 UChar
* d
= allocChars(newCapacity
);
511 memcpy(d
, a
.data(), aSize
* sizeof(UChar
));
512 memcpy(d
+ aSize
, b
.data(), bSize
* sizeof(UChar
));
513 m_rep
= Rep::create(d
, length
);
514 m_rep
->capacity
= newCapacity
;
519 const UString
& UString::null()
521 static UString
* n
= new UString
;
525 UString
UString::from(int i
)
527 UChar buf
[1 + sizeof(i
) * 3];
528 UChar
*end
= buf
+ sizeof(buf
) / sizeof(UChar
);
533 } else if (i
== INT_MIN
) {
534 char minBuf
[1 + sizeof(i
) * 3];
535 snprintf(minBuf
, 1 + sizeof(i
) * 3, "%d", INT_MIN
);
536 return UString(minBuf
);
538 bool negative
= false;
544 *--p
= (unsigned short)((i
% 10) + '0');
552 return UString(p
, static_cast<int>(end
- p
));
555 UString
UString::from(unsigned int u
)
557 UChar buf
[sizeof(u
) * 3];
558 UChar
*end
= buf
+ sizeof(buf
) / sizeof(UChar
);
565 *--p
= (unsigned short)((u
% 10) + '0');
570 return UString(p
, static_cast<int>(end
- p
));
573 UString
UString::from(long l
)
575 UChar buf
[1 + sizeof(l
) * 3];
576 UChar
*end
= buf
+ sizeof(buf
) / sizeof(UChar
);
581 } else if (l
== LONG_MIN
) {
582 char minBuf
[1 + sizeof(l
) * 3];
583 snprintf(minBuf
, 1 + sizeof(l
) * 3, "%ld", LONG_MIN
);
584 return UString(minBuf
);
586 bool negative
= false;
592 *--p
= (unsigned short)((l
% 10) + '0');
600 return UString(p
, static_cast<int>(end
- p
));
603 UString
UString::from(double d
)
605 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
614 char *result
= kjs_dtoa(d
, 0, 0, &decimalPoint
, &sign
, NULL
);
615 int length
= static_cast<int>(strlen(result
));
622 if (decimalPoint
<= 0 && decimalPoint
> -6) {
625 for (int j
= decimalPoint
; j
< 0; j
++) {
628 strlcpy(buf
+ i
, result
, buflength
- i
);
629 } else if (decimalPoint
<= 21 && decimalPoint
> 0) {
630 if (length
<= decimalPoint
) {
631 strlcpy(buf
+ i
, result
, buflength
- i
);
633 for (int j
= 0; j
< decimalPoint
- length
; j
++) {
638 int len
= (decimalPoint
<= buflength
- i
? decimalPoint
: buflength
- i
);
639 strncpy(buf
+ i
, result
, len
);
642 strlcpy(buf
+ i
, result
+ decimalPoint
, buflength
- i
);
644 } else if (result
[0] < '0' || result
[0] > '9') {
645 strlcpy(buf
+ i
, result
, buflength
- i
);
647 buf
[i
++] = result
[0];
650 strlcpy(buf
+ i
, result
+ 1, buflength
- i
);
655 buf
[i
++] = (decimalPoint
>= 0) ? '+' : '-';
656 // decimalPoint can't be more than 3 digits decimal given the
657 // nature of float representation
658 int exponential
= decimalPoint
- 1;
660 exponential
= -exponential
;
661 if (exponential
>= 100)
662 buf
[i
++] = static_cast<char>('0' + exponential
/ 100);
663 if (exponential
>= 10)
664 buf
[i
++] = static_cast<char>('0' + (exponential
% 100) / 10);
665 buf
[i
++] = static_cast<char>('0' + exponential
% 10);
667 assert(i
<= buflength
);
670 kjs_freedtoa(result
);
675 UString
UString::spliceSubstringsWithSeparators(const Range
* substringRanges
, int rangeCount
, const UString
* separators
, int separatorCount
) const
677 if (rangeCount
== 1 && separatorCount
== 0) {
678 int thisSize
= size();
679 int position
= substringRanges
[0].position
;
680 int length
= substringRanges
[0].length
;
681 if (position
<= 0 && length
>= thisSize
)
683 return UString::Rep::create(m_rep
, max(0, position
), min(thisSize
, length
));
687 for (int i
= 0; i
< rangeCount
; i
++)
688 totalLength
+= substringRanges
[i
].length
;
689 for (int i
= 0; i
< separatorCount
; i
++)
690 totalLength
+= separators
[i
].size();
692 if (totalLength
== 0)
695 UChar
* buffer
= allocChars(totalLength
);
699 int maxCount
= max(rangeCount
, separatorCount
);
701 for (int i
= 0; i
< maxCount
; i
++) {
702 if (i
< rangeCount
) {
703 memcpy(buffer
+ bufferPos
, data() + substringRanges
[i
].position
, substringRanges
[i
].length
* sizeof(UChar
));
704 bufferPos
+= substringRanges
[i
].length
;
706 if (i
< separatorCount
) {
707 memcpy(buffer
+ bufferPos
, separators
[i
].data(), separators
[i
].size() * sizeof(UChar
));
708 bufferPos
+= separators
[i
].size();
712 return UString::Rep::create(buffer
, totalLength
);
715 UString
&UString::append(const UString
&t
)
717 int thisSize
= size();
718 int thisOffset
= m_rep
->offset
;
719 int tSize
= t
.size();
720 int length
= thisSize
+ tSize
;
726 } else if (tSize
== 0) {
728 } else if (m_rep
->baseIsSelf() && m_rep
->rc
== 1) {
729 // this is direct and has refcount of 1 (so we can just alter it directly)
730 expandCapacity(thisOffset
+ length
);
732 memcpy(const_cast<UChar
*>(data() + thisSize
), t
.data(), tSize
* sizeof(UChar
));
736 } else if (thisOffset
+ thisSize
== usedCapacity() && thisSize
>= minShareSize
) {
737 // this reaches the end of the buffer - extend it if it's long enough to append to
738 expandCapacity(thisOffset
+ length
);
740 memcpy(const_cast<UChar
*>(data() + thisSize
), t
.data(), tSize
* sizeof(UChar
));
741 m_rep
= Rep::create(m_rep
, 0, length
);
744 // this is shared with someone using more capacity, gotta make a whole new string
745 size_t newCapacity
= expandedSize(length
, 0);
746 UChar
* d
= allocChars(newCapacity
);
750 memcpy(d
, data(), thisSize
* sizeof(UChar
));
751 memcpy(const_cast<UChar
*>(d
+ thisSize
), t
.data(), tSize
* sizeof(UChar
));
752 m_rep
= Rep::create(d
, length
);
753 m_rep
->capacity
= newCapacity
;
760 UString
&UString::append(const char *t
)
762 int thisSize
= size();
763 int thisOffset
= m_rep
->offset
;
764 int tSize
= static_cast<int>(strlen(t
));
765 int length
= thisSize
+ tSize
;
771 } else if (tSize
== 0) {
772 // t is empty, we'll just return *this below.
773 } else if (m_rep
->baseIsSelf() && m_rep
->rc
== 1) {
774 // this is direct and has refcount of 1 (so we can just alter it directly)
775 expandCapacity(thisOffset
+ length
);
776 UChar
*d
= const_cast<UChar
*>(data());
778 for (int i
= 0; i
< tSize
; ++i
)
779 d
[thisSize
+ i
] = t
[i
];
783 } else if (thisOffset
+ thisSize
== usedCapacity() && thisSize
>= minShareSize
) {
784 // this string reaches the end of the buffer - extend it
785 expandCapacity(thisOffset
+ length
);
786 UChar
*d
= const_cast<UChar
*>(data());
788 for (int i
= 0; i
< tSize
; ++i
)
789 d
[thisSize
+ i
] = t
[i
];
790 m_rep
= Rep::create(m_rep
, 0, length
);
793 // this is shared with someone using more capacity, gotta make a whole new string
794 size_t newCapacity
= expandedSize(length
, 0);
795 UChar
* d
= allocChars(newCapacity
);
799 memcpy(d
, data(), thisSize
* sizeof(UChar
));
800 for (int i
= 0; i
< tSize
; ++i
)
801 d
[thisSize
+ i
] = t
[i
];
802 m_rep
= Rep::create(d
, length
);
803 m_rep
->capacity
= newCapacity
;
810 UString
&UString::append(unsigned short c
)
812 int thisOffset
= m_rep
->offset
;
817 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
818 size_t newCapacity
= expandedSize(1, 0);
819 UChar
* d
= allocChars(newCapacity
);
824 m_rep
= Rep::create(d
, 1);
825 m_rep
->capacity
= newCapacity
;
827 } else if (m_rep
->baseIsSelf() && m_rep
->rc
== 1) {
828 // this is direct and has refcount of 1 (so we can just alter it directly)
829 expandCapacity(thisOffset
+ length
+ 1);
830 UChar
*d
= const_cast<UChar
*>(data());
833 m_rep
->len
= length
+ 1;
836 } else if (thisOffset
+ length
== usedCapacity() && length
>= minShareSize
) {
837 // this reaches the end of the string - extend it and share
838 expandCapacity(thisOffset
+ length
+ 1);
839 UChar
*d
= const_cast<UChar
*>(data());
842 m_rep
= Rep::create(m_rep
, 0, length
+ 1);
845 // this is shared with someone using more capacity, gotta make a whole new string
846 size_t newCapacity
= expandedSize(length
+ 1, 0);
847 UChar
* d
= allocChars(newCapacity
);
851 memcpy(d
, data(), length
* sizeof(UChar
));
853 m_rep
= Rep::create(d
, length
+ 1);
854 m_rep
->capacity
= newCapacity
;
861 CString
UString::cstring() const
866 char *UString::ascii() const
868 // Never make the buffer smaller than normalStatBufferSize.
869 // Thus we almost never need to reallocate.
871 int neededSize
= length
+ 1;
872 if (neededSize
< normalStatBufferSize
) {
873 neededSize
= normalStatBufferSize
;
875 if (neededSize
!= statBufferSize
) {
876 delete [] statBuffer
;
877 statBuffer
= new char [neededSize
];
878 statBufferSize
= neededSize
;
881 const UChar
*p
= data();
882 char *q
= statBuffer
;
883 const UChar
*limit
= p
+ length
;
885 *q
= static_cast<char>(p
->uc
);
894 UString
&UString::operator=(const char *c
)
906 int l
= static_cast<int>(strlen(c
));
908 if (m_rep
->rc
== 1 && l
<= m_rep
->capacity
&& m_rep
->baseIsSelf() && m_rep
->offset
== 0 && m_rep
->preCapacity
== 0) {
918 m_rep
= Rep::create(d
, l
);
920 for (int i
= 0; i
< l
; i
++)
926 bool UString::is8Bit() const
928 const UChar
*u
= data();
929 const UChar
*limit
= u
+ size();
939 const UChar
UString::operator[](int pos
) const
946 double UString::toDouble(bool tolerateTrailingJunk
, bool tolerateEmptyString
) const
950 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
951 // after the number, so is8Bit is too strict a check.
955 const char *c
= ascii();
957 // skip leading white space
958 while (isASCIISpace(*c
))
963 return tolerateEmptyString
? 0.0 : NaN
;
966 if (*c
== '0' && (*(c
+1) == 'x' || *(c
+1) == 'X')) {
967 const char* firstDigitPosition
= c
+ 2;
971 if (*c
>= '0' && *c
<= '9')
972 d
= d
* 16.0 + *c
- '0';
973 else if ((*c
>= 'A' && *c
<= 'F') || (*c
>= 'a' && *c
<= 'f'))
974 d
= d
* 16.0 + (*c
& 0xdf) - 'A' + 10.0;
979 if (d
>= mantissaOverflowLowerBound
)
980 d
= parseIntOverflow(firstDigitPosition
, c
- firstDigitPosition
, 16);
984 d
= kjs_strtod(c
, &end
);
985 if ((d
!= 0.0 || end
!= c
) && d
!= Inf
&& d
!= -Inf
) {
992 else if (*c
== '-') {
997 // We used strtod() to do the conversion. However, strtod() handles
998 // infinite values slightly differently than JavaScript in that it
999 // converts the string "inf" with any capitalization to infinity,
1000 // whereas the ECMA spec requires that it be converted to NaN.
1002 if (c
[0] == 'I' && c
[1] == 'n' && c
[2] == 'f' && c
[3] == 'i' && c
[4] == 'n' && c
[5] == 'i' && c
[6] == 't' && c
[7] == 'y') {
1005 } else if ((d
== Inf
|| d
== -Inf
) && *c
!= 'I' && *c
!= 'i')
1012 // allow trailing white space
1013 while (isASCIISpace(*c
))
1015 // don't allow anything after - unless tolerant=true
1016 if (!tolerateTrailingJunk
&& *c
!= '\0')
1022 double UString::toDouble(bool tolerateTrailingJunk
) const
1024 return toDouble(tolerateTrailingJunk
, true);
1027 double UString::toDouble() const
1029 return toDouble(false, true);
1032 uint32_t UString::toUInt32(bool *ok
) const
1034 double d
= toDouble();
1037 if (d
!= static_cast<uint32_t>(d
)) {
1045 return static_cast<uint32_t>(d
);
1048 uint32_t UString::toUInt32(bool *ok
, bool tolerateEmptyString
) const
1050 double d
= toDouble(false, tolerateEmptyString
);
1053 if (d
!= static_cast<uint32_t>(d
)) {
1061 return static_cast<uint32_t>(d
);
1064 uint32_t UString::toStrictUInt32(bool *ok
) const
1069 // Empty string is not OK.
1070 int len
= m_rep
->len
;
1073 const UChar
*p
= m_rep
->data();
1074 unsigned short c
= p
->unicode();
1076 // If the first digit is 0, only 0 itself is OK.
1083 // Convert to UInt32, checking for overflow.
1086 // Process character, turning it into a digit.
1087 if (c
< '0' || c
> '9')
1089 const unsigned d
= c
- '0';
1091 // Multiply by 10, checking for overflow out of 32 bits.
1092 if (i
> 0xFFFFFFFFU
/ 10)
1096 // Add in the digit, checking for overflow out of 32 bits.
1097 const unsigned max
= 0xFFFFFFFFU
- d
;
1102 // Handle end of string.
1109 // Get next character.
1110 c
= (++p
)->unicode();
1114 int UString::find(const UString
&f
, int pos
) const
1124 const UChar
*end
= data() + sz
- fsz
;
1125 int fsizeminusone
= (fsz
- 1) * sizeof(UChar
);
1126 const UChar
*fdata
= f
.data();
1127 unsigned short fchar
= fdata
->uc
;
1129 for (const UChar
*c
= data() + pos
; c
<= end
; c
++)
1130 if (c
->uc
== fchar
&& !memcmp(c
+ 1, fdata
, fsizeminusone
))
1131 return static_cast<int>(c
- data());
1136 int UString::find(UChar ch
, int pos
) const
1140 const UChar
*end
= data() + size();
1141 for (const UChar
*c
= data() + pos
; c
< end
; c
++)
1143 return static_cast<int>(c
- data());
1148 int UString::rfind(const UString
&f
, int pos
) const
1160 int fsizeminusone
= (fsz
- 1) * sizeof(UChar
);
1161 const UChar
*fdata
= f
.data();
1162 for (const UChar
*c
= data() + pos
; c
>= data(); c
--) {
1163 if (*c
== *fdata
&& !memcmp(c
+ 1, fdata
+ 1, fsizeminusone
))
1164 return static_cast<int>(c
- data());
1170 int UString::rfind(UChar ch
, int pos
) const
1174 if (pos
+ 1 >= size())
1176 for (const UChar
*c
= data() + pos
; c
>= data(); c
--) {
1178 return static_cast<int>(c
-data());
1184 UString
UString::substr(int pos
, int len
) const
1197 if (pos
== 0 && len
== s
)
1200 return UString(Rep::create(m_rep
, pos
, len
));
1203 bool operator==(const UString
& s1
, const UString
& s2
)
1205 if (s1
.m_rep
->len
!= s2
.m_rep
->len
)
1208 return (memcmp(s1
.m_rep
->data(), s2
.m_rep
->data(),
1209 s1
.m_rep
->len
* sizeof(UChar
)) == 0);
1212 bool operator==(const UString
& s1
, const char *s2
)
1215 return s1
.isEmpty();
1218 const UChar
*u
= s1
.data();
1219 const UChar
*uend
= u
+ s1
.size();
1220 while (u
!= uend
&& *s2
) {
1221 if (u
->uc
!= (unsigned char)*s2
)
1227 return u
== uend
&& *s2
== 0;
1230 bool operator<(const UString
& s1
, const UString
& s2
)
1232 const int l1
= s1
.size();
1233 const int l2
= s2
.size();
1234 const int lmin
= l1
< l2
? l1
: l2
;
1235 const UChar
*c1
= s1
.data();
1236 const UChar
*c2
= s2
.data();
1238 while (l
< lmin
&& *c1
== *c2
) {
1244 return (c1
->uc
< c2
->uc
);
1249 int compare(const UString
& s1
, const UString
& s2
)
1251 const int l1
= s1
.size();
1252 const int l2
= s2
.size();
1253 const int lmin
= l1
< l2
? l1
: l2
;
1254 const UChar
*c1
= s1
.data();
1255 const UChar
*c2
= s2
.data();
1257 while (l
< lmin
&& *c1
== *c2
) {
1264 return (c1
->uc
> c2
->uc
) ? 1 : -1;
1269 return (l1
> l2
) ? 1 : -1;
1272 CString
UString::UTF8String(bool strict
) const
1274 // Allocate a buffer big enough to hold all the characters.
1275 const int length
= size();
1276 Vector
<char, 1024> buffer(length
* 3);
1278 // Convert to runs of 8-bit characters.
1279 char* p
= buffer
.data();
1280 const ::UChar
* d
= reinterpret_cast<const ::UChar
*>(&data()->uc
);
1281 ConversionResult result
= convertUTF16ToUTF8(&d
, d
+ length
, &p
, p
+ buffer
.size(), strict
);
1282 if (result
!= conversionOK
)
1285 return CString(buffer
.data(), p
- buffer
.data());