2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2009 Google Inc. All rights reserved.
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
27 #include "JSGlobalObjectFunctions.h"
28 #include "Collector.h"
30 #include "Identifier.h"
31 #include "Operations.h"
39 #include <wtf/ASCIICType.h>
40 #include <wtf/Assertions.h>
41 #include <wtf/MathExtras.h>
42 #include <wtf/StringExtras.h>
43 #include <wtf/Vector.h>
44 #include <wtf/unicode/UTF8.h>
45 #include <wtf/StringExtras.h>
52 using namespace WTF::Unicode
;
57 extern const double NaN
;
58 extern const double Inf
;
60 // The null string is immutable, except for refCount.
61 UString
* UString::s_nullUString
;
63 void initializeUString()
65 // UStringImpl::empty() does not construct its static string in a threadsafe fashion,
66 // so ensure it has been initialized from here.
69 UString::s_nullUString
= new UString
;
72 UString::UString(const char* c
)
73 : m_rep(Rep::create(c
))
77 UString::UString(const char* c
, unsigned length
)
78 : m_rep(Rep::create(c
, length
))
82 UString::UString(const UChar
* c
, unsigned length
)
83 : m_rep(Rep::create(c
, length
))
87 UString
UString::from(int i
)
89 UChar buf
[1 + sizeof(i
) * 3];
90 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
95 else if (i
== INT_MIN
) {
96 char minBuf
[1 + sizeof(i
) * 3];
97 snprintf(minBuf
, sizeof(minBuf
), "%d", INT_MIN
);
98 return UString(minBuf
);
100 bool negative
= false;
106 *--p
= static_cast<unsigned short>((i
% 10) + '0');
113 return UString(p
, static_cast<unsigned>(end
- p
));
116 UString
UString::from(long long i
)
118 UChar buf
[1 + sizeof(i
) * 3];
119 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
124 else if (i
== std::numeric_limits
<long long>::min()) {
125 char minBuf
[1 + sizeof(i
) * 3];
126 snprintf(minBuf
, sizeof(minBuf
), "%lld", std::numeric_limits
<long long>::min());
127 return UString(minBuf
);
129 bool negative
= false;
135 *--p
= static_cast<unsigned short>((i
% 10) + '0');
142 return UString(p
, static_cast<unsigned>(end
- p
));
145 UString
UString::from(unsigned u
)
147 UChar buf
[sizeof(u
) * 3];
148 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
155 *--p
= static_cast<unsigned short>((u
% 10) + '0');
160 return UString(p
, static_cast<unsigned>(end
- p
));
163 UString
UString::from(long l
)
165 UChar buf
[1 + sizeof(l
) * 3];
166 UChar
* end
= buf
+ sizeof(buf
) / sizeof(UChar
);
171 else if (l
== LONG_MIN
) {
172 char minBuf
[1 + sizeof(l
) * 3];
173 snprintf(minBuf
, sizeof(minBuf
), "%ld", LONG_MIN
);
174 return UString(minBuf
);
176 bool negative
= false;
182 *--p
= static_cast<unsigned short>((l
% 10) + '0');
189 return UString(p
, end
- p
);
192 UString
UString::from(double d
)
196 doubleToStringInJavaScriptFormat(d
, buffer
, &length
);
197 return UString(buffer
, length
);
200 char* UString::ascii() const
202 static char* asciiBuffer
= 0;
204 unsigned length
= size();
205 unsigned neededSize
= length
+ 1;
206 delete[] asciiBuffer
;
207 asciiBuffer
= new char[neededSize
];
209 const UChar
* p
= data();
210 char* q
= asciiBuffer
;
211 const UChar
* limit
= p
+ length
;
213 *q
= static_cast<char>(p
[0]);
222 bool UString::is8Bit() const
224 const UChar
* u
= data();
225 const UChar
* limit
= u
+ size();
235 UChar
UString::operator[](unsigned pos
) const
242 double UString::toDouble(bool tolerateTrailingJunk
, bool tolerateEmptyString
) const
248 if (isASCIISpace(c
) && tolerateEmptyString
)
253 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk
254 // after the number, even if it contains invalid UTF-16 sequences. So we
255 // shouldn't use the UTF8String function, which returns null when it
256 // encounters invalid UTF-16. Further, we have no need to convert the
257 // non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
259 CString s
= UTF8String();
262 const char* c
= s
.data();
264 // skip leading white space
265 while (isASCIISpace(*c
))
270 return tolerateEmptyString
? 0.0 : NaN
;
275 if (*c
== '0' && (*(c
+ 1) == 'x' || *(c
+ 1) == 'X')) {
276 const char* firstDigitPosition
= c
+ 2;
280 if (*c
>= '0' && *c
<= '9')
281 d
= d
* 16.0 + *c
- '0';
282 else if ((*c
>= 'A' && *c
<= 'F') || (*c
>= 'a' && *c
<= 'f'))
283 d
= d
* 16.0 + (*c
& 0xdf) - 'A' + 10.0;
288 if (d
>= mantissaOverflowLowerBound
)
289 d
= parseIntOverflow(firstDigitPosition
, c
- firstDigitPosition
, 16);
293 d
= WTF::strtod(c
, &end
);
294 if ((d
!= 0.0 || end
!= c
) && d
!= Inf
&& d
!= -Inf
) {
301 else if (*c
== '-') {
306 // We used strtod() to do the conversion. However, strtod() handles
307 // infinite values slightly differently than JavaScript in that it
308 // converts the string "inf" with any capitalization to infinity,
309 // whereas the ECMA spec requires that it be converted to NaN.
311 if (c
[0] == 'I' && c
[1] == 'n' && c
[2] == 'f' && c
[3] == 'i' && c
[4] == 'n' && c
[5] == 'i' && c
[6] == 't' && c
[7] == 'y') {
314 } else if ((d
== Inf
|| d
== -Inf
) && *c
!= 'I' && *c
!= 'i')
321 // allow trailing white space
322 while (isASCIISpace(*c
))
324 // don't allow anything after - unless tolerant=true
325 // FIXME: If string contains a U+0000 character, then this check is incorrect.
326 if (!tolerateTrailingJunk
&& *c
!= '\0')
332 double UString::toDouble(bool tolerateTrailingJunk
) const
334 return toDouble(tolerateTrailingJunk
, true);
337 double UString::toDouble() const
339 return toDouble(false, true);
342 uint32_t UString::toUInt32(bool* ok
) const
344 double d
= toDouble();
347 if (d
!= static_cast<uint32_t>(d
)) {
355 return static_cast<uint32_t>(d
);
358 uint32_t UString::toUInt32(bool* ok
, bool tolerateEmptyString
) const
360 double d
= toDouble(false, tolerateEmptyString
);
363 if (d
!= static_cast<uint32_t>(d
)) {
371 return static_cast<uint32_t>(d
);
374 uint32_t UString::toStrictUInt32(bool* ok
) const
379 // Empty string is not OK.
380 unsigned len
= m_rep
->length();
383 const UChar
* p
= m_rep
->characters();
384 unsigned short c
= p
[0];
386 // If the first digit is 0, only 0 itself is OK.
393 // Convert to UInt32, checking for overflow.
396 // Process character, turning it into a digit.
397 if (c
< '0' || c
> '9')
399 const unsigned d
= c
- '0';
401 // Multiply by 10, checking for overflow out of 32 bits.
402 if (i
> 0xFFFFFFFFU
/ 10)
406 // Add in the digit, checking for overflow out of 32 bits.
407 const unsigned max
= 0xFFFFFFFFU
- d
;
412 // Handle end of string.
419 // Get next character.
424 unsigned UString::find(const UString
& f
, unsigned pos
) const
426 unsigned fsz
= f
.size();
430 const UChar
* end
= data() + size();
431 for (const UChar
* c
= data() + pos
; c
< end
; c
++) {
433 return static_cast<unsigned>(c
- data());
438 unsigned sz
= size();
443 const UChar
* end
= data() + sz
- fsz
;
444 unsigned fsizeminusone
= (fsz
- 1) * sizeof(UChar
);
445 const UChar
* fdata
= f
.data();
446 unsigned short fchar
= fdata
[0];
448 for (const UChar
* c
= data() + pos
; c
<= end
; c
++) {
449 if (c
[0] == fchar
&& !memcmp(c
+ 1, fdata
, fsizeminusone
))
450 return static_cast<unsigned>(c
- data());
456 unsigned UString::find(UChar ch
, unsigned pos
) const
458 const UChar
* end
= data() + size();
459 for (const UChar
* c
= data() + pos
; c
< end
; c
++) {
461 return static_cast<unsigned>(c
- data());
467 unsigned UString::rfind(const UString
& f
, unsigned pos
) const
469 unsigned sz
= size();
470 unsigned fsz
= f
.size();
477 unsigned fsizeminusone
= (fsz
- 1) * sizeof(UChar
);
478 const UChar
* fdata
= f
.data();
479 for (const UChar
* c
= data() + pos
; c
>= data(); c
--) {
480 if (*c
== *fdata
&& !memcmp(c
+ 1, fdata
+ 1, fsizeminusone
))
481 return static_cast<unsigned>(c
- data());
487 unsigned UString::rfind(UChar ch
, unsigned pos
) const
491 if (pos
+ 1 >= size())
493 for (const UChar
* c
= data() + pos
; c
>= data(); c
--) {
495 return static_cast<unsigned>(c
- data());
501 UString
UString::substr(unsigned pos
, unsigned len
) const
507 unsigned limit
= s
- pos
;
511 if (pos
== 0 && len
== s
)
514 return UString(Rep::create(m_rep
, pos
, len
));
517 bool operator==(const UString
& s1
, const char *s2
)
522 const UChar
* u
= s1
.data();
523 const UChar
* uend
= u
+ s1
.size();
524 while (u
!= uend
&& *s2
) {
525 if (u
[0] != (unsigned char)*s2
)
531 return u
== uend
&& *s2
== 0;
534 bool operator<(const UString
& s1
, const UString
& s2
)
536 const unsigned l1
= s1
.size();
537 const unsigned l2
= s2
.size();
538 const unsigned lmin
= l1
< l2
? l1
: l2
;
539 const UChar
* c1
= s1
.data();
540 const UChar
* c2
= s2
.data();
542 while (l
< lmin
&& *c1
== *c2
) {
548 return (c1
[0] < c2
[0]);
553 bool operator>(const UString
& s1
, const UString
& s2
)
555 const unsigned l1
= s1
.size();
556 const unsigned l2
= s2
.size();
557 const unsigned lmin
= l1
< l2
? l1
: l2
;
558 const UChar
* c1
= s1
.data();
559 const UChar
* c2
= s2
.data();
561 while (l
< lmin
&& *c1
== *c2
) {
567 return (c1
[0] > c2
[0]);
572 int compare(const UString
& s1
, const UString
& s2
)
574 const unsigned l1
= s1
.size();
575 const unsigned l2
= s2
.size();
576 const unsigned lmin
= l1
< l2
? l1
: l2
;
577 const UChar
* c1
= s1
.data();
578 const UChar
* c2
= s2
.data();
580 while (l
< lmin
&& *c1
== *c2
) {
587 return (c1
[0] > c2
[0]) ? 1 : -1;
592 return (l1
> l2
) ? 1 : -1;
595 CString
UString::UTF8String(bool strict
) const
597 // Allocate a buffer big enough to hold all the characters.
598 const unsigned length
= size();
599 if (length
> numeric_limits
<unsigned>::max() / 3)
601 Vector
<char, 1024> buffer(length
* 3);
603 // Convert to runs of 8-bit characters.
604 char* p
= buffer
.data();
605 const UChar
* d
= reinterpret_cast<const UChar
*>(&data()[0]);
606 ConversionResult result
= convertUTF16ToUTF8(&d
, d
+ length
, &p
, p
+ buffer
.size(), strict
);
607 if (result
!= conversionOK
)
610 return CString(buffer
.data(), p
- buffer
.data());