]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/UString.cpp
JavaScriptCore-721.26.tar.gz
[apple/javascriptcore.git] / runtime / UString.cpp
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24 #include "config.h"
25 #include "UString.h"
26
27 #include "JSGlobalObjectFunctions.h"
28 #include "Collector.h"
29 #include "dtoa.h"
30 #include "Identifier.h"
31 #include "Operations.h"
32 #include <ctype.h>
33 #include <limits.h>
34 #include <limits>
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <wtf/ASCIICType.h>
40 #include <wtf/Assertions.h>
41 #include <wtf/MathExtras.h>
42 #include <wtf/StringExtras.h>
43 #include <wtf/Vector.h>
44 #include <wtf/unicode/UTF8.h>
45 #include <wtf/StringExtras.h>
46
47 #if HAVE(STRINGS_H)
48 #include <strings.h>
49 #endif
50
51 using namespace WTF;
52 using namespace WTF::Unicode;
53 using namespace std;
54
55 namespace JSC {
56
57 extern const double NaN;
58 extern const double Inf;
59
60 // The null string is immutable, except for refCount.
61 UString* UString::s_nullUString;
62
63 void initializeUString()
64 {
65 // UStringImpl::empty() does not construct its static string in a threadsafe fashion,
66 // so ensure it has been initialized from here.
67 UStringImpl::empty();
68
69 UString::s_nullUString = new UString;
70 }
71
72 UString::UString(const char* c)
73 : m_rep(Rep::create(c))
74 {
75 }
76
77 UString::UString(const char* c, unsigned length)
78 : m_rep(Rep::create(c, length))
79 {
80 }
81
82 UString::UString(const UChar* c, unsigned length)
83 : m_rep(Rep::create(c, length))
84 {
85 }
86
87 UString UString::from(int i)
88 {
89 UChar buf[1 + sizeof(i) * 3];
90 UChar* end = buf + sizeof(buf) / sizeof(UChar);
91 UChar* p = end;
92
93 if (i == 0)
94 *--p = '0';
95 else if (i == INT_MIN) {
96 char minBuf[1 + sizeof(i) * 3];
97 snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN);
98 return UString(minBuf);
99 } else {
100 bool negative = false;
101 if (i < 0) {
102 negative = true;
103 i = -i;
104 }
105 while (i) {
106 *--p = static_cast<unsigned short>((i % 10) + '0');
107 i /= 10;
108 }
109 if (negative)
110 *--p = '-';
111 }
112
113 return UString(p, static_cast<unsigned>(end - p));
114 }
115
116 UString UString::from(long long i)
117 {
118 UChar buf[1 + sizeof(i) * 3];
119 UChar* end = buf + sizeof(buf) / sizeof(UChar);
120 UChar* p = end;
121
122 if (i == 0)
123 *--p = '0';
124 else if (i == std::numeric_limits<long long>::min()) {
125 char minBuf[1 + sizeof(i) * 3];
126 snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits<long long>::min());
127 return UString(minBuf);
128 } else {
129 bool negative = false;
130 if (i < 0) {
131 negative = true;
132 i = -i;
133 }
134 while (i) {
135 *--p = static_cast<unsigned short>((i % 10) + '0');
136 i /= 10;
137 }
138 if (negative)
139 *--p = '-';
140 }
141
142 return UString(p, static_cast<unsigned>(end - p));
143 }
144
145 UString UString::from(unsigned u)
146 {
147 UChar buf[sizeof(u) * 3];
148 UChar* end = buf + sizeof(buf) / sizeof(UChar);
149 UChar* p = end;
150
151 if (u == 0)
152 *--p = '0';
153 else {
154 while (u) {
155 *--p = static_cast<unsigned short>((u % 10) + '0');
156 u /= 10;
157 }
158 }
159
160 return UString(p, static_cast<unsigned>(end - p));
161 }
162
163 UString UString::from(long l)
164 {
165 UChar buf[1 + sizeof(l) * 3];
166 UChar* end = buf + sizeof(buf) / sizeof(UChar);
167 UChar* p = end;
168
169 if (l == 0)
170 *--p = '0';
171 else if (l == LONG_MIN) {
172 char minBuf[1 + sizeof(l) * 3];
173 snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN);
174 return UString(minBuf);
175 } else {
176 bool negative = false;
177 if (l < 0) {
178 negative = true;
179 l = -l;
180 }
181 while (l) {
182 *--p = static_cast<unsigned short>((l % 10) + '0');
183 l /= 10;
184 }
185 if (negative)
186 *--p = '-';
187 }
188
189 return UString(p, end - p);
190 }
191
192 UString UString::from(double d)
193 {
194 DtoaBuffer buffer;
195 unsigned length;
196 doubleToStringInJavaScriptFormat(d, buffer, &length);
197 return UString(buffer, length);
198 }
199
200 char* UString::ascii() const
201 {
202 static char* asciiBuffer = 0;
203
204 unsigned length = size();
205 unsigned neededSize = length + 1;
206 delete[] asciiBuffer;
207 asciiBuffer = new char[neededSize];
208
209 const UChar* p = data();
210 char* q = asciiBuffer;
211 const UChar* limit = p + length;
212 while (p != limit) {
213 *q = static_cast<char>(p[0]);
214 ++p;
215 ++q;
216 }
217 *q = '\0';
218
219 return asciiBuffer;
220 }
221
222 bool UString::is8Bit() const
223 {
224 const UChar* u = data();
225 const UChar* limit = u + size();
226 while (u < limit) {
227 if (u[0] > 0xFF)
228 return false;
229 ++u;
230 }
231
232 return true;
233 }
234
235 UChar UString::operator[](unsigned pos) const
236 {
237 if (pos >= size())
238 return '\0';
239 return data()[pos];
240 }
241
242 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
243 {
244 if (size() == 1) {
245 UChar c = data()[0];
246 if (isASCIIDigit(c))
247 return c - '0';
248 if (isASCIISpace(c) && tolerateEmptyString)
249 return 0;
250 return NaN;
251 }
252
253 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate junk
254 // after the number, even if it contains invalid UTF-16 sequences. So we
255 // shouldn't use the UTF8String function, which returns null when it
256 // encounters invalid UTF-16. Further, we have no need to convert the
257 // non-ASCII characters to UTF-8, so the UTF8String does quite a bit of
258 // unnecessary work.
259 CString s = UTF8String();
260 if (s.isNull())
261 return NaN;
262 const char* c = s.data();
263
264 // skip leading white space
265 while (isASCIISpace(*c))
266 c++;
267
268 // empty string ?
269 if (*c == '\0')
270 return tolerateEmptyString ? 0.0 : NaN;
271
272 double d;
273
274 // hex number ?
275 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
276 const char* firstDigitPosition = c + 2;
277 c++;
278 d = 0.0;
279 while (*(++c)) {
280 if (*c >= '0' && *c <= '9')
281 d = d * 16.0 + *c - '0';
282 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
283 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
284 else
285 break;
286 }
287
288 if (d >= mantissaOverflowLowerBound)
289 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
290 } else {
291 // regular number ?
292 char* end;
293 d = WTF::strtod(c, &end);
294 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
295 c = end;
296 } else {
297 double sign = 1.0;
298
299 if (*c == '+')
300 c++;
301 else if (*c == '-') {
302 sign = -1.0;
303 c++;
304 }
305
306 // We used strtod() to do the conversion. However, strtod() handles
307 // infinite values slightly differently than JavaScript in that it
308 // converts the string "inf" with any capitalization to infinity,
309 // whereas the ECMA spec requires that it be converted to NaN.
310
311 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
312 d = sign * Inf;
313 c += 8;
314 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
315 c = end;
316 else
317 return NaN;
318 }
319 }
320
321 // allow trailing white space
322 while (isASCIISpace(*c))
323 c++;
324 // don't allow anything after - unless tolerant=true
325 // FIXME: If string contains a U+0000 character, then this check is incorrect.
326 if (!tolerateTrailingJunk && *c != '\0')
327 d = NaN;
328
329 return d;
330 }
331
332 double UString::toDouble(bool tolerateTrailingJunk) const
333 {
334 return toDouble(tolerateTrailingJunk, true);
335 }
336
337 double UString::toDouble() const
338 {
339 return toDouble(false, true);
340 }
341
342 uint32_t UString::toUInt32(bool* ok) const
343 {
344 double d = toDouble();
345 bool b = true;
346
347 if (d != static_cast<uint32_t>(d)) {
348 b = false;
349 d = 0;
350 }
351
352 if (ok)
353 *ok = b;
354
355 return static_cast<uint32_t>(d);
356 }
357
358 uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
359 {
360 double d = toDouble(false, tolerateEmptyString);
361 bool b = true;
362
363 if (d != static_cast<uint32_t>(d)) {
364 b = false;
365 d = 0;
366 }
367
368 if (ok)
369 *ok = b;
370
371 return static_cast<uint32_t>(d);
372 }
373
374 uint32_t UString::toStrictUInt32(bool* ok) const
375 {
376 if (ok)
377 *ok = false;
378
379 // Empty string is not OK.
380 unsigned len = m_rep->length();
381 if (len == 0)
382 return 0;
383 const UChar* p = m_rep->characters();
384 unsigned short c = p[0];
385
386 // If the first digit is 0, only 0 itself is OK.
387 if (c == '0') {
388 if (len == 1 && ok)
389 *ok = true;
390 return 0;
391 }
392
393 // Convert to UInt32, checking for overflow.
394 uint32_t i = 0;
395 while (1) {
396 // Process character, turning it into a digit.
397 if (c < '0' || c > '9')
398 return 0;
399 const unsigned d = c - '0';
400
401 // Multiply by 10, checking for overflow out of 32 bits.
402 if (i > 0xFFFFFFFFU / 10)
403 return 0;
404 i *= 10;
405
406 // Add in the digit, checking for overflow out of 32 bits.
407 const unsigned max = 0xFFFFFFFFU - d;
408 if (i > max)
409 return 0;
410 i += d;
411
412 // Handle end of string.
413 if (--len == 0) {
414 if (ok)
415 *ok = true;
416 return i;
417 }
418
419 // Get next character.
420 c = *(++p);
421 }
422 }
423
424 unsigned UString::find(const UString& f, unsigned pos) const
425 {
426 unsigned fsz = f.size();
427
428 if (fsz == 1) {
429 UChar ch = f[0];
430 const UChar* end = data() + size();
431 for (const UChar* c = data() + pos; c < end; c++) {
432 if (*c == ch)
433 return static_cast<unsigned>(c - data());
434 }
435 return NotFound;
436 }
437
438 unsigned sz = size();
439 if (sz < fsz)
440 return NotFound;
441 if (fsz == 0)
442 return pos;
443 const UChar* end = data() + sz - fsz;
444 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
445 const UChar* fdata = f.data();
446 unsigned short fchar = fdata[0];
447 ++fdata;
448 for (const UChar* c = data() + pos; c <= end; c++) {
449 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
450 return static_cast<unsigned>(c - data());
451 }
452
453 return NotFound;
454 }
455
456 unsigned UString::find(UChar ch, unsigned pos) const
457 {
458 const UChar* end = data() + size();
459 for (const UChar* c = data() + pos; c < end; c++) {
460 if (*c == ch)
461 return static_cast<unsigned>(c - data());
462 }
463
464 return NotFound;
465 }
466
467 unsigned UString::rfind(const UString& f, unsigned pos) const
468 {
469 unsigned sz = size();
470 unsigned fsz = f.size();
471 if (sz < fsz)
472 return NotFound;
473 if (pos > sz - fsz)
474 pos = sz - fsz;
475 if (fsz == 0)
476 return pos;
477 unsigned fsizeminusone = (fsz - 1) * sizeof(UChar);
478 const UChar* fdata = f.data();
479 for (const UChar* c = data() + pos; c >= data(); c--) {
480 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
481 return static_cast<unsigned>(c - data());
482 }
483
484 return NotFound;
485 }
486
487 unsigned UString::rfind(UChar ch, unsigned pos) const
488 {
489 if (isEmpty())
490 return NotFound;
491 if (pos + 1 >= size())
492 pos = size() - 1;
493 for (const UChar* c = data() + pos; c >= data(); c--) {
494 if (*c == ch)
495 return static_cast<unsigned>(c - data());
496 }
497
498 return NotFound;
499 }
500
501 UString UString::substr(unsigned pos, unsigned len) const
502 {
503 unsigned s = size();
504
505 if (pos >= s)
506 pos = s;
507 unsigned limit = s - pos;
508 if (len > limit)
509 len = limit;
510
511 if (pos == 0 && len == s)
512 return *this;
513
514 return UString(Rep::create(m_rep, pos, len));
515 }
516
517 bool operator==(const UString& s1, const char *s2)
518 {
519 if (s2 == 0)
520 return s1.isEmpty();
521
522 const UChar* u = s1.data();
523 const UChar* uend = u + s1.size();
524 while (u != uend && *s2) {
525 if (u[0] != (unsigned char)*s2)
526 return false;
527 s2++;
528 u++;
529 }
530
531 return u == uend && *s2 == 0;
532 }
533
534 bool operator<(const UString& s1, const UString& s2)
535 {
536 const unsigned l1 = s1.size();
537 const unsigned l2 = s2.size();
538 const unsigned lmin = l1 < l2 ? l1 : l2;
539 const UChar* c1 = s1.data();
540 const UChar* c2 = s2.data();
541 unsigned l = 0;
542 while (l < lmin && *c1 == *c2) {
543 c1++;
544 c2++;
545 l++;
546 }
547 if (l < lmin)
548 return (c1[0] < c2[0]);
549
550 return (l1 < l2);
551 }
552
553 bool operator>(const UString& s1, const UString& s2)
554 {
555 const unsigned l1 = s1.size();
556 const unsigned l2 = s2.size();
557 const unsigned lmin = l1 < l2 ? l1 : l2;
558 const UChar* c1 = s1.data();
559 const UChar* c2 = s2.data();
560 unsigned l = 0;
561 while (l < lmin && *c1 == *c2) {
562 c1++;
563 c2++;
564 l++;
565 }
566 if (l < lmin)
567 return (c1[0] > c2[0]);
568
569 return (l1 > l2);
570 }
571
572 int compare(const UString& s1, const UString& s2)
573 {
574 const unsigned l1 = s1.size();
575 const unsigned l2 = s2.size();
576 const unsigned lmin = l1 < l2 ? l1 : l2;
577 const UChar* c1 = s1.data();
578 const UChar* c2 = s2.data();
579 unsigned l = 0;
580 while (l < lmin && *c1 == *c2) {
581 c1++;
582 c2++;
583 l++;
584 }
585
586 if (l < lmin)
587 return (c1[0] > c2[0]) ? 1 : -1;
588
589 if (l1 == l2)
590 return 0;
591
592 return (l1 > l2) ? 1 : -1;
593 }
594
595 CString UString::UTF8String(bool strict) const
596 {
597 // Allocate a buffer big enough to hold all the characters.
598 const unsigned length = size();
599 if (length > numeric_limits<unsigned>::max() / 3)
600 return CString();
601 Vector<char, 1024> buffer(length * 3);
602
603 // Convert to runs of 8-bit characters.
604 char* p = buffer.data();
605 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
606 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
607 if (result != conversionOK)
608 return CString();
609
610 return CString(buffer.data(), p - buffer.data());
611 }
612
613 } // namespace JSC