2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2009 Google Inc. All rights reserved.
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
27 #include "JSGlobalObjectFunctions.h"
29 #include "Identifier.h"
30 #include "Operations.h"
36 #include <wtf/ASCIICType.h>
37 #include <wtf/Assertions.h>
38 #include <wtf/DecimalNumber.h>
39 #include <wtf/MathExtras.h>
40 #include <wtf/StringExtras.h>
41 #include <wtf/Vector.h>
42 #include <wtf/unicode/UTF8.h>
49 using namespace WTF::Unicode
;
54 extern const double NaN
;
55 extern const double Inf
;
57 COMPILE_ASSERT(sizeof(UString
) == sizeof(void*), UString_should_stay_small
);
59 // Construct a string with UTF-16 data.
60 UString::UString(const UChar
* characters
, unsigned length
)
61 : m_impl(characters
? StringImpl::create(characters
, length
) : 0)
65 // Construct a string with UTF-16 data, from a null-terminated source.
66 UString::UString(const UChar
* characters
)
72 while (characters
[length
] != UChar(0))
75 m_impl
= StringImpl::create(characters
, length
);
78 // Construct a string with latin1 data.
79 UString::UString(const char* characters
, unsigned length
)
80 : m_impl(characters
? StringImpl::create(characters
, length
) : 0)
84 // Construct a string with latin1 data, from a null-terminated source.
85 UString::UString(const char* characters
)
86 : m_impl(characters
? StringImpl::create(characters
) : 0)
90 UString
UString::number(int i
)
92 UChar buf
[1 + sizeof(i
) * 3];
93 UChar
* end
= buf
+ WTF_ARRAY_LENGTH(buf
);
98 else if (i
== INT_MIN
) {
99 char minBuf
[1 + sizeof(i
) * 3];
100 snprintf(minBuf
, sizeof(minBuf
), "%d", INT_MIN
);
101 return UString(minBuf
);
103 bool negative
= false;
109 *--p
= static_cast<unsigned short>((i
% 10) + '0');
116 return UString(p
, static_cast<unsigned>(end
- p
));
119 UString
UString::number(long long i
)
121 UChar buf
[1 + sizeof(i
) * 3];
122 UChar
* end
= buf
+ WTF_ARRAY_LENGTH(buf
);
127 else if (i
== std::numeric_limits
<long long>::min()) {
128 char minBuf
[1 + sizeof(i
) * 3];
130 snprintf(minBuf
, sizeof(minBuf
), "%I64d", std::numeric_limits
<long long>::min());
132 snprintf(minBuf
, sizeof(minBuf
), "%lld", std::numeric_limits
<long long>::min());
134 return UString(minBuf
);
136 bool negative
= false;
142 *--p
= static_cast<unsigned short>((i
% 10) + '0');
149 return UString(p
, static_cast<unsigned>(end
- p
));
152 UString
UString::number(unsigned u
)
154 UChar buf
[sizeof(u
) * 3];
155 UChar
* end
= buf
+ WTF_ARRAY_LENGTH(buf
);
162 *--p
= static_cast<unsigned short>((u
% 10) + '0');
167 return UString(p
, static_cast<unsigned>(end
- p
));
170 UString
UString::number(long l
)
172 UChar buf
[1 + sizeof(l
) * 3];
173 UChar
* end
= buf
+ WTF_ARRAY_LENGTH(buf
);
178 else if (l
== LONG_MIN
) {
179 char minBuf
[1 + sizeof(l
) * 3];
180 snprintf(minBuf
, sizeof(minBuf
), "%ld", LONG_MIN
);
181 return UString(minBuf
);
183 bool negative
= false;
189 *--p
= static_cast<unsigned short>((l
% 10) + '0');
196 return UString(p
, end
- p
);
199 UString
UString::number(double d
)
201 NumberToStringBuffer buffer
;
202 unsigned length
= numberToString(d
, buffer
);
203 return UString(buffer
, length
);
206 UString
UString::substringSharingImpl(unsigned offset
, unsigned length
) const
208 // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
210 unsigned stringLength
= this->length();
211 offset
= min(offset
, stringLength
);
212 length
= min(length
, stringLength
- offset
);
214 if (!offset
&& length
== stringLength
)
216 return UString(StringImpl::create(m_impl
, offset
, length
));
219 bool operator==(const UString
& s1
, const char *s2
)
224 const UChar
* u
= s1
.characters();
225 const UChar
* uend
= u
+ s1
.length();
226 while (u
!= uend
&& *s2
) {
227 if (u
[0] != (unsigned char)*s2
)
233 return u
== uend
&& *s2
== 0;
236 bool operator<(const UString
& s1
, const UString
& s2
)
238 const unsigned l1
= s1
.length();
239 const unsigned l2
= s2
.length();
240 const unsigned lmin
= l1
< l2
? l1
: l2
;
241 const UChar
* c1
= s1
.characters();
242 const UChar
* c2
= s2
.characters();
244 while (l
< lmin
&& *c1
== *c2
) {
250 return (c1
[0] < c2
[0]);
255 bool operator>(const UString
& s1
, const UString
& s2
)
257 const unsigned l1
= s1
.length();
258 const unsigned l2
= s2
.length();
259 const unsigned lmin
= l1
< l2
? l1
: l2
;
260 const UChar
* c1
= s1
.characters();
261 const UChar
* c2
= s2
.characters();
263 while (l
< lmin
&& *c1
== *c2
) {
269 return (c1
[0] > c2
[0]);
274 CString
UString::ascii() const
276 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
277 // preserved, characters outside of this range are converted to '?'.
279 unsigned length
= this->length();
280 const UChar
* characters
= this->characters();
282 char* characterBuffer
;
283 CString result
= CString::newUninitialized(length
, characterBuffer
);
285 for (unsigned i
= 0; i
< length
; ++i
) {
286 UChar ch
= characters
[i
];
287 characterBuffer
[i
] = ch
&& (ch
< 0x20 || ch
>= 0x7f) ? '?' : ch
;
293 CString
UString::latin1() const
295 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
296 // preserved, characters outside of this range are converted to '?'.
298 unsigned length
= this->length();
299 const UChar
* characters
= this->characters();
301 char* characterBuffer
;
302 CString result
= CString::newUninitialized(length
, characterBuffer
);
304 for (unsigned i
= 0; i
< length
; ++i
) {
305 UChar ch
= characters
[i
];
306 characterBuffer
[i
] = ch
> 0xff ? '?' : ch
;
312 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
313 static inline void putUTF8Triple(char*& buffer
, UChar ch
)
315 ASSERT(ch
>= 0x0800);
316 *buffer
++ = static_cast<char>(((ch
>> 12) & 0x0F) | 0xE0);
317 *buffer
++ = static_cast<char>(((ch
>> 6) & 0x3F) | 0x80);
318 *buffer
++ = static_cast<char>((ch
& 0x3F) | 0x80);
321 CString
UString::utf8(bool strict
) const
323 unsigned length
= this->length();
324 const UChar
* characters
= this->characters();
326 // Allocate a buffer big enough to hold all the characters
327 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
328 // Optimization ideas, if we find this function is hot:
329 // * We could speculatively create a CStringBuffer to contain 'length'
330 // characters, and resize if necessary (i.e. if the buffer contains
331 // non-ascii characters). (Alternatively, scan the buffer first for
332 // ascii characters, so we know this will be sufficient).
333 // * We could allocate a CStringBuffer with an appropriate size to
334 // have a good chance of being able to write the string into the
335 // buffer without reallocing (say, 1.5 x length).
336 if (length
> numeric_limits
<unsigned>::max() / 3)
338 Vector
<char, 1024> bufferVector(length
* 3);
340 char* buffer
= bufferVector
.data();
341 ConversionResult result
= convertUTF16ToUTF8(&characters
, characters
+ length
, &buffer
, buffer
+ bufferVector
.size(), strict
);
342 ASSERT(result
!= targetExhausted
); // (length * 3) should be sufficient for any conversion
344 // Only produced from strict conversion.
345 if (result
== sourceIllegal
)
348 // Check for an unconverted high surrogate.
349 if (result
== sourceExhausted
) {
352 // This should be one unpaired high surrogate. Treat it the same
353 // was as an unpaired high surrogate would have been handled in
354 // the middle of a string with non-strict conversion - which is
355 // to say, simply encode it to UTF-8.
356 ASSERT((characters
+ 1) == (this->characters() + length
));
357 ASSERT((*characters
>= 0xD800) && (*characters
<= 0xDBFF));
358 // There should be room left, since one UChar hasn't been converted.
359 ASSERT((buffer
+ 3) <= (buffer
+ bufferVector
.size()));
360 putUTF8Triple(buffer
, *characters
);
363 return CString(bufferVector
.data(), buffer
- bufferVector
.data());