]> git.saurik.com Git - apple/javascriptcore.git/blame - runtime/UString.cpp
JavaScriptCore-1097.3.3.tar.gz
[apple/javascriptcore.git] / runtime / UString.cpp
CommitLineData
9dae56ea
A
1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
ba379fdc 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
9dae56ea 4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
ba379fdc 5 * Copyright (C) 2009 Google Inc. All rights reserved.
9dae56ea
A
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "UString.h"
26
27#include "JSGlobalObjectFunctions.h"
14957cd0 28#include "Heap.h"
9dae56ea
A
29#include "Identifier.h"
30#include "Operations.h"
31#include <ctype.h>
9dae56ea 32#include <limits.h>
f9bf01c6 33#include <limits>
9dae56ea
A
34#include <stdio.h>
35#include <stdlib.h>
36#include <wtf/ASCIICType.h>
37#include <wtf/Assertions.h>
38#include <wtf/MathExtras.h>
f9bf01c6 39#include <wtf/StringExtras.h>
9dae56ea 40#include <wtf/Vector.h>
6fe7ccc8 41#include <wtf/dtoa.h>
9dae56ea
A
42#include <wtf/unicode/UTF8.h>
43
9dae56ea
A
44#if HAVE(STRINGS_H)
45#include <strings.h>
46#endif
47
48using namespace WTF;
49using namespace WTF::Unicode;
50using namespace std;
51
9dae56ea 52namespace JSC {
4e4e5a6f 53
14957cd0 54COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
9dae56ea 55
14957cd0
A
56// Construct a string with UTF-16 data.
57UString::UString(const UChar* characters, unsigned length)
58 : m_impl(characters ? StringImpl::create(characters, length) : 0)
9dae56ea 59{
9dae56ea
A
60}
61
14957cd0
A
62// Construct a string with UTF-16 data, from a null-terminated source.
63UString::UString(const UChar* characters)
9dae56ea 64{
14957cd0
A
65 if (!characters)
66 return;
67
68 int length = 0;
69 while (characters[length] != UChar(0))
70 ++length;
71
72 m_impl = StringImpl::create(characters, length);
9dae56ea
A
73}
74
14957cd0 75// Construct a string with latin1 data.
6fe7ccc8 76UString::UString(const LChar* characters, unsigned length)
14957cd0 77 : m_impl(characters ? StringImpl::create(characters, length) : 0)
9dae56ea 78{
9dae56ea
A
79}
80
6fe7ccc8
A
81UString::UString(const char* characters, unsigned length)
82 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters), length) : 0)
83{
84}
85
14957cd0 86// Construct a string with latin1 data, from a null-terminated source.
6fe7ccc8 87UString::UString(const LChar* characters)
14957cd0 88 : m_impl(characters ? StringImpl::create(characters) : 0)
9dae56ea 89{
9dae56ea
A
90}
91
6fe7ccc8
A
92UString::UString(const char* characters)
93 : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters)) : 0)
94{
95}
96
14957cd0 97UString UString::number(int i)
9dae56ea 98{
6fe7ccc8
A
99 LChar buf[1 + sizeof(i) * 3];
100 LChar* end = buf + WTF_ARRAY_LENGTH(buf);
101 LChar* p = end;
4e4e5a6f 102
9dae56ea
A
103 if (i == 0)
104 *--p = '0';
105 else if (i == INT_MIN) {
106 char minBuf[1 + sizeof(i) * 3];
fb8617cd 107 snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN);
f9bf01c6 108 return UString(minBuf);
9dae56ea
A
109 } else {
110 bool negative = false;
111 if (i < 0) {
112 negative = true;
113 i = -i;
114 }
115 while (i) {
116 *--p = static_cast<unsigned short>((i % 10) + '0');
117 i /= 10;
118 }
119 if (negative)
120 *--p = '-';
121 }
122
4e4e5a6f 123 return UString(p, static_cast<unsigned>(end - p));
9dae56ea
A
124}
125
14957cd0 126UString UString::number(long long i)
9dae56ea 127{
6fe7ccc8
A
128 LChar buf[1 + sizeof(i) * 3];
129 LChar* end = buf + WTF_ARRAY_LENGTH(buf);
130 LChar* p = end;
f9bf01c6 131
9dae56ea
A
132 if (i == 0)
133 *--p = '0';
f9bf01c6 134 else if (i == std::numeric_limits<long long>::min()) {
9dae56ea 135 char minBuf[1 + sizeof(i) * 3];
14957cd0
A
136#if OS(WINDOWS)
137 snprintf(minBuf, sizeof(minBuf), "%I64d", std::numeric_limits<long long>::min());
138#else
f9bf01c6 139 snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits<long long>::min());
14957cd0 140#endif
9dae56ea
A
141 return UString(minBuf);
142 } else {
143 bool negative = false;
144 if (i < 0) {
145 negative = true;
146 i = -i;
147 }
148 while (i) {
149 *--p = static_cast<unsigned short>((i % 10) + '0');
150 i /= 10;
151 }
152 if (negative)
153 *--p = '-';
154 }
155
4e4e5a6f 156 return UString(p, static_cast<unsigned>(end - p));
9dae56ea
A
157}
158
14957cd0 159UString UString::number(unsigned u)
9dae56ea 160{
6fe7ccc8
A
161 LChar buf[sizeof(u) * 3];
162 LChar* end = buf + WTF_ARRAY_LENGTH(buf);
163 LChar* p = end;
4e4e5a6f 164
9dae56ea
A
165 if (u == 0)
166 *--p = '0';
167 else {
168 while (u) {
169 *--p = static_cast<unsigned short>((u % 10) + '0');
170 u /= 10;
171 }
172 }
4e4e5a6f
A
173
174 return UString(p, static_cast<unsigned>(end - p));
9dae56ea
A
175}
176
14957cd0 177UString UString::number(long l)
9dae56ea 178{
6fe7ccc8
A
179 LChar buf[1 + sizeof(l) * 3];
180 LChar* end = buf + WTF_ARRAY_LENGTH(buf);
181 LChar* p = end;
9dae56ea
A
182
183 if (l == 0)
184 *--p = '0';
185 else if (l == LONG_MIN) {
186 char minBuf[1 + sizeof(l) * 3];
fb8617cd 187 snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN);
9dae56ea
A
188 return UString(minBuf);
189 } else {
190 bool negative = false;
191 if (l < 0) {
192 negative = true;
193 l = -l;
194 }
195 while (l) {
196 *--p = static_cast<unsigned short>((l % 10) + '0');
197 l /= 10;
198 }
199 if (negative)
200 *--p = '-';
201 }
202
4e4e5a6f 203 return UString(p, end - p);
9dae56ea
A
204}
205
14957cd0 206UString UString::number(double d)
9dae56ea 207{
14957cd0 208 NumberToStringBuffer buffer;
6fe7ccc8 209 return UString(numberToString(d, buffer));
9dae56ea
A
210}
211
14957cd0 212UString UString::substringSharingImpl(unsigned offset, unsigned length) const
9dae56ea 213{
14957cd0 214 // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
9dae56ea 215
14957cd0
A
216 unsigned stringLength = this->length();
217 offset = min(offset, stringLength);
218 length = min(length, stringLength - offset);
9dae56ea 219
14957cd0 220 if (!offset && length == stringLength)
9dae56ea 221 return *this;
14957cd0 222 return UString(StringImpl::create(m_impl, offset, length));
9dae56ea
A
223}
224
9dae56ea
A
225bool operator==(const UString& s1, const char *s2)
226{
6fe7ccc8
A
227 if (s1.isEmpty())
228 return !s2;
9dae56ea 229
6fe7ccc8
A
230 return equal(s1.impl(), s2);
231}
232
233// This method assumes that all simple checks have been performed by
234// the inlined operator==() in the header file.
235bool equalSlowCase(const UString& s1, const UString& s2)
236{
237 StringImpl* rep1 = s1.impl();
238 StringImpl* rep2 = s2.impl();
239 unsigned size1 = rep1->length();
240
241 // At this point we know
242 // (a) that the strings are the same length and
243 // (b) that they are greater than zero length.
244 bool s1Is8Bit = rep1->is8Bit();
245 bool s2Is8Bit = rep2->is8Bit();
246
247 if (s1Is8Bit) {
248 const LChar* d1 = rep1->characters8();
249 if (s2Is8Bit) {
250 const LChar* d2 = rep2->characters8();
251
252 if (d1 == d2) // Check to see if the data pointers are the same.
253 return true;
254
255 // Do quick checks for sizes 1 and 2.
256 switch (size1) {
257 case 1:
258 return d1[0] == d2[0];
259 case 2:
260 return (d1[0] == d2[0]) & (d1[1] == d2[1]);
261 default:
262 return (!memcmp(d1, d2, size1 * sizeof(LChar)));
263 }
264 }
265
266 const UChar* d2 = rep2->characters16();
267
268 for (unsigned i = 0; i < size1; i++) {
269 if (d1[i] != d2[i])
270 return false;
271 }
272 return true;
273 }
274
275 if (s2Is8Bit) {
276 const UChar* d1 = rep1->characters16();
277 const LChar* d2 = rep2->characters8();
278
279 for (unsigned i = 0; i < size1; i++) {
280 if (d1[i] != d2[i])
281 return false;
282 }
283 return true;
284
285 }
286
287 const UChar* d1 = rep1->characters16();
288 const UChar* d2 = rep2->characters16();
289
290 if (d1 == d2) // Check to see if the data pointers are the same.
291 return true;
292
293 // Do quick checks for sizes 1 and 2.
294 switch (size1) {
295 case 1:
296 return d1[0] == d2[0];
297 case 2:
298 return (d1[0] == d2[0]) & (d1[1] == d2[1]);
299 default:
300 return (!memcmp(d1, d2, size1 * sizeof(UChar)));
301 }
9dae56ea
A
302}
303
304bool operator<(const UString& s1, const UString& s2)
305{
14957cd0
A
306 const unsigned l1 = s1.length();
307 const unsigned l2 = s2.length();
4e4e5a6f 308 const unsigned lmin = l1 < l2 ? l1 : l2;
6fe7ccc8
A
309 if (s1.is8Bit() && s2.is8Bit()) {
310 const LChar* c1 = s1.characters8();
311 const LChar* c2 = s2.characters8();
312 unsigned length = 0;
313 while (length < lmin && *c1 == *c2) {
314 c1++;
315 c2++;
316 length++;
317 }
318 if (length < lmin)
319 return (c1[0] < c2[0]);
320
321 return (l1 < l2);
322 }
14957cd0
A
323 const UChar* c1 = s1.characters();
324 const UChar* c2 = s2.characters();
6fe7ccc8
A
325 unsigned length = 0;
326 while (length < lmin && *c1 == *c2) {
9dae56ea
A
327 c1++;
328 c2++;
6fe7ccc8 329 length++;
9dae56ea 330 }
6fe7ccc8 331 if (length < lmin)
9dae56ea
A
332 return (c1[0] < c2[0]);
333
334 return (l1 < l2);
335}
336
337bool operator>(const UString& s1, const UString& s2)
338{
14957cd0
A
339 const unsigned l1 = s1.length();
340 const unsigned l2 = s2.length();
4e4e5a6f 341 const unsigned lmin = l1 < l2 ? l1 : l2;
14957cd0
A
342 const UChar* c1 = s1.characters();
343 const UChar* c2 = s2.characters();
4e4e5a6f 344 unsigned l = 0;
9dae56ea
A
345 while (l < lmin && *c1 == *c2) {
346 c1++;
347 c2++;
348 l++;
349 }
350 if (l < lmin)
351 return (c1[0] > c2[0]);
352
353 return (l1 > l2);
354}
355
14957cd0 356CString UString::ascii() const
9dae56ea 357{
14957cd0
A
358 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
359 // preserved, characters outside of this range are converted to '?'.
360
361 unsigned length = this->length();
6fe7ccc8
A
362
363 if (this->is8Bit()) {
364 const LChar* characters = this->characters8();
365
366 char* characterBuffer;
367 CString result = CString::newUninitialized(length, characterBuffer);
368
369 for (unsigned i = 0; i < length; ++i) {
370 LChar ch = characters[i];
371 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
372 }
373
374 return result;
375 }
376
377 const UChar* characters = this->characters16();
14957cd0
A
378
379 char* characterBuffer;
380 CString result = CString::newUninitialized(length, characterBuffer);
381
382 for (unsigned i = 0; i < length; ++i) {
383 UChar ch = characters[i];
384 characterBuffer[i] = ch && (ch < 0x20 || ch >= 0x7f) ? '?' : ch;
9dae56ea
A
385 }
386
14957cd0
A
387 return result;
388}
9dae56ea 389
14957cd0
A
390CString UString::latin1() const
391{
392 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
393 // preserved, characters outside of this range are converted to '?'.
394
395 unsigned length = this->length();
396 const UChar* characters = this->characters();
9dae56ea 397
14957cd0
A
398 char* characterBuffer;
399 CString result = CString::newUninitialized(length, characterBuffer);
400
401 for (unsigned i = 0; i < length; ++i) {
402 UChar ch = characters[i];
403 characterBuffer[i] = ch > 0xff ? '?' : ch;
404 }
405
406 return result;
9dae56ea
A
407}
408
14957cd0
A
409// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
410static inline void putUTF8Triple(char*& buffer, UChar ch)
9dae56ea 411{
14957cd0
A
412 ASSERT(ch >= 0x0800);
413 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
414 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
415 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
416}
417
418CString UString::utf8(bool strict) const
419{
420 unsigned length = this->length();
6fe7ccc8
A
421
422 if (!length)
423 return CString("", 0);
14957cd0
A
424
425 // Allocate a buffer big enough to hold all the characters
426 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
427 // Optimization ideas, if we find this function is hot:
428 // * We could speculatively create a CStringBuffer to contain 'length'
429 // characters, and resize if necessary (i.e. if the buffer contains
430 // non-ascii characters). (Alternatively, scan the buffer first for
431 // ascii characters, so we know this will be sufficient).
432 // * We could allocate a CStringBuffer with an appropriate size to
433 // have a good chance of being able to write the string into the
434 // buffer without reallocing (say, 1.5 x length).
b80e6193 435 if (length > numeric_limits<unsigned>::max() / 3)
9dae56ea 436 return CString();
14957cd0 437
6fe7ccc8 438 Vector<char, 1024> bufferVector(length * 3);
14957cd0 439 char* buffer = bufferVector.data();
14957cd0 440
6fe7ccc8
A
441 if (is8Bit()) {
442 const LChar* characters = this->characters8();
14957cd0 443
6fe7ccc8
A
444 ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
445 ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
446 } else {
447 const UChar* characters = this->characters16();
448
449 ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
450 ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
451
452 // Only produced from strict conversion.
453 if (result == sourceIllegal)
14957cd0 454 return CString();
6fe7ccc8
A
455
456 // Check for an unconverted high surrogate.
457 if (result == sourceExhausted) {
458 if (strict)
459 return CString();
460 // This should be one unpaired high surrogate. Treat it the same
461 // was as an unpaired high surrogate would have been handled in
462 // the middle of a string with non-strict conversion - which is
463 // to say, simply encode it to UTF-8.
464 ASSERT((characters + 1) == (this->characters() + length));
465 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
466 // There should be room left, since one UChar hasn't been converted.
467 ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
468 putUTF8Triple(buffer, *characters);
469 }
14957cd0 470 }
9dae56ea 471
14957cd0 472 return CString(bufferVector.data(), buffer - bufferVector.data());
9dae56ea
A
473}
474
9dae56ea 475} // namespace JSC