]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/UString.cpp
JavaScriptCore-554.1.tar.gz
[apple/javascriptcore.git] / runtime / UString.cpp
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24 #include "config.h"
25 #include "UString.h"
26
27 #include "JSGlobalObjectFunctions.h"
28 #include "Collector.h"
29 #include "dtoa.h"
30 #include "Identifier.h"
31 #include "Operations.h"
32 #include <ctype.h>
33 #include <float.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <wtf/ASCIICType.h>
39 #include <wtf/Assertions.h>
40 #include <wtf/MathExtras.h>
41 #include <wtf/Vector.h>
42 #include <wtf/unicode/UTF8.h>
43
44 #if HAVE(STRING_H)
45 #include <string.h>
46 #endif
47 #if HAVE(STRINGS_H)
48 #include <strings.h>
49 #endif
50
51 using namespace WTF;
52 using namespace WTF::Unicode;
53 using namespace std;
54
55 // This can be tuned differently per platform by putting platform #ifs right here.
56 // If you don't define this macro at all, then copyChars will just call directly
57 // to memcpy.
58 #define USTRING_COPY_CHARS_INLINE_CUTOFF 20
59
60 namespace JSC {
61
62 extern const double NaN;
63 extern const double Inf;
64
65 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
66 static const int minLengthToShare = 10;
67
68 static inline size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
69 static inline size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
70
71 static inline UChar* allocChars(size_t length)
72 {
73 ASSERT(length);
74 if (length > maxUChars())
75 return 0;
76 return static_cast<UChar*>(tryFastMalloc(sizeof(UChar) * length));
77 }
78
79 static inline UChar* reallocChars(UChar* buffer, size_t length)
80 {
81 ASSERT(length);
82 if (length > maxUChars())
83 return 0;
84 return static_cast<UChar*>(tryFastRealloc(buffer, sizeof(UChar) * length));
85 }
86
87 static inline void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
88 {
89 #ifdef USTRING_COPY_CHARS_INLINE_CUTOFF
90 if (numCharacters <= USTRING_COPY_CHARS_INLINE_CUTOFF) {
91 for (unsigned i = 0; i < numCharacters; ++i)
92 destination[i] = source[i];
93 return;
94 }
95 #endif
96 memcpy(destination, source, numCharacters * sizeof(UChar));
97 }
98
99 COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes);
100
101 CString::CString(const char* c)
102 : m_length(strlen(c))
103 , m_data(new char[m_length + 1])
104 {
105 memcpy(m_data, c, m_length + 1);
106 }
107
108 CString::CString(const char* c, size_t length)
109 : m_length(length)
110 , m_data(new char[length + 1])
111 {
112 memcpy(m_data, c, m_length);
113 m_data[m_length] = 0;
114 }
115
116 CString::CString(const CString& b)
117 {
118 m_length = b.m_length;
119 if (b.m_data) {
120 m_data = new char[m_length + 1];
121 memcpy(m_data, b.m_data, m_length + 1);
122 } else
123 m_data = 0;
124 }
125
126 CString::~CString()
127 {
128 delete [] m_data;
129 }
130
131 CString CString::adopt(char* c, size_t length)
132 {
133 CString s;
134 s.m_data = c;
135 s.m_length = length;
136 return s;
137 }
138
139 CString& CString::append(const CString& t)
140 {
141 char* n;
142 n = new char[m_length + t.m_length + 1];
143 if (m_length)
144 memcpy(n, m_data, m_length);
145 if (t.m_length)
146 memcpy(n + m_length, t.m_data, t.m_length);
147 m_length += t.m_length;
148 n[m_length] = 0;
149
150 delete [] m_data;
151 m_data = n;
152
153 return *this;
154 }
155
156 CString& CString::operator=(const char* c)
157 {
158 if (m_data)
159 delete [] m_data;
160 m_length = strlen(c);
161 m_data = new char[m_length + 1];
162 memcpy(m_data, c, m_length + 1);
163
164 return *this;
165 }
166
167 CString& CString::operator=(const CString& str)
168 {
169 if (this == &str)
170 return *this;
171
172 if (m_data)
173 delete [] m_data;
174 m_length = str.m_length;
175 if (str.m_data) {
176 m_data = new char[m_length + 1];
177 memcpy(m_data, str.m_data, m_length + 1);
178 } else
179 m_data = 0;
180
181 return *this;
182 }
183
184 bool operator==(const CString& c1, const CString& c2)
185 {
186 size_t len = c1.size();
187 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
188 }
189
190 // These static strings are immutable, except for rc, whose initial value is chosen to
191 // reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
192 static UChar sharedEmptyChar;
193 UString::BaseString* UString::Rep::nullBaseString;
194 UString::BaseString* UString::Rep::emptyBaseString;
195 UString* UString::nullUString;
196
197 static void initializeStaticBaseString(UString::BaseString& base)
198 {
199 base.rc = INT_MAX / 2;
200 base.m_identifierTableAndFlags.setFlag(UString::Rep::StaticFlag);
201 base.checkConsistency();
202 }
203
204 void initializeUString()
205 {
206 UString::Rep::nullBaseString = new UString::BaseString(0, 0);
207 initializeStaticBaseString(*UString::Rep::nullBaseString);
208
209 UString::Rep::emptyBaseString = new UString::BaseString(&sharedEmptyChar, 0);
210 initializeStaticBaseString(*UString::Rep::emptyBaseString);
211
212 UString::nullUString = new UString;
213 }
214
215 static char* statBuffer = 0; // Only used for debugging via UString::ascii().
216
217 PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar* d, int l)
218 {
219 UChar* copyD = static_cast<UChar*>(fastMalloc(l * sizeof(UChar)));
220 copyChars(copyD, d, l);
221 return create(copyD, l);
222 }
223
224 PassRefPtr<UString::Rep> UString::Rep::createFromUTF8(const char* string)
225 {
226 if (!string)
227 return &UString::Rep::null();
228
229 size_t length = strlen(string);
230 Vector<UChar, 1024> buffer(length);
231 UChar* p = buffer.data();
232 if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length))
233 return &UString::Rep::null();
234
235 return UString::Rep::createCopying(buffer.data(), p - buffer.data());
236 }
237
238 PassRefPtr<UString::Rep> UString::Rep::create(UChar* string, int length, PassRefPtr<UString::SharedUChar> sharedBuffer)
239 {
240 PassRefPtr<UString::Rep> rep = create(string, length);
241 rep->baseString()->setSharedBuffer(sharedBuffer);
242 rep->checkConsistency();
243 return rep;
244 }
245
246 UString::SharedUChar* UString::Rep::sharedBuffer()
247 {
248 UString::BaseString* base = baseString();
249 if (len < minLengthToShare)
250 return 0;
251
252 return base->sharedBuffer();
253 }
254
255 void UString::Rep::destroy()
256 {
257 checkConsistency();
258
259 // Static null and empty strings can never be destroyed, but we cannot rely on
260 // reference counting, because ref/deref are not thread-safe.
261 if (!isStatic()) {
262 if (identifierTable())
263 Identifier::remove(this);
264
265 UString::BaseString* base = baseString();
266 if (base == this) {
267 if (m_sharedBuffer)
268 m_sharedBuffer->deref();
269 else
270 fastFree(base->buf);
271 } else
272 base->deref();
273
274 delete this;
275 }
276 }
277
278 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
279 // or anything like that.
280 const unsigned PHI = 0x9e3779b9U;
281
282 // Paul Hsieh's SuperFastHash
283 // http://www.azillionmonkeys.com/qed/hash.html
284 unsigned UString::Rep::computeHash(const UChar* s, int len)
285 {
286 unsigned l = len;
287 uint32_t hash = PHI;
288 uint32_t tmp;
289
290 int rem = l & 1;
291 l >>= 1;
292
293 // Main loop
294 for (; l > 0; l--) {
295 hash += s[0];
296 tmp = (s[1] << 11) ^ hash;
297 hash = (hash << 16) ^ tmp;
298 s += 2;
299 hash += hash >> 11;
300 }
301
302 // Handle end case
303 if (rem) {
304 hash += s[0];
305 hash ^= hash << 11;
306 hash += hash >> 17;
307 }
308
309 // Force "avalanching" of final 127 bits
310 hash ^= hash << 3;
311 hash += hash >> 5;
312 hash ^= hash << 2;
313 hash += hash >> 15;
314 hash ^= hash << 10;
315
316 // this avoids ever returning a hash code of 0, since that is used to
317 // signal "hash not computed yet", using a value that is likely to be
318 // effectively the same as 0 when the low bits are masked
319 if (hash == 0)
320 hash = 0x80000000;
321
322 return hash;
323 }
324
325 // Paul Hsieh's SuperFastHash
326 // http://www.azillionmonkeys.com/qed/hash.html
327 unsigned UString::Rep::computeHash(const char* s, int l)
328 {
329 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
330 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
331 // were 16-bit chunks, which should give matching results
332
333 uint32_t hash = PHI;
334 uint32_t tmp;
335
336 size_t rem = l & 1;
337 l >>= 1;
338
339 // Main loop
340 for (; l > 0; l--) {
341 hash += static_cast<unsigned char>(s[0]);
342 tmp = (static_cast<unsigned char>(s[1]) << 11) ^ hash;
343 hash = (hash << 16) ^ tmp;
344 s += 2;
345 hash += hash >> 11;
346 }
347
348 // Handle end case
349 if (rem) {
350 hash += static_cast<unsigned char>(s[0]);
351 hash ^= hash << 11;
352 hash += hash >> 17;
353 }
354
355 // Force "avalanching" of final 127 bits
356 hash ^= hash << 3;
357 hash += hash >> 5;
358 hash ^= hash << 2;
359 hash += hash >> 15;
360 hash ^= hash << 10;
361
362 // this avoids ever returning a hash code of 0, since that is used to
363 // signal "hash not computed yet", using a value that is likely to be
364 // effectively the same as 0 when the low bits are masked
365 if (hash == 0)
366 hash = 0x80000000;
367
368 return hash;
369 }
370
371 #ifndef NDEBUG
372 void UString::Rep::checkConsistency() const
373 {
374 const UString::BaseString* base = baseString();
375
376 // There is no recursion for base strings.
377 ASSERT(base == base->baseString());
378
379 if (isStatic()) {
380 // There are only two static strings: null and empty.
381 ASSERT(!len);
382
383 // Static strings cannot get in identifier tables, because they are globally shared.
384 ASSERT(!identifierTable());
385 }
386
387 // The string fits in buffer.
388 ASSERT(base->usedPreCapacity <= base->preCapacity);
389 ASSERT(base->usedCapacity <= base->capacity);
390 ASSERT(-offset <= base->usedPreCapacity);
391 ASSERT(offset + len <= base->usedCapacity);
392 }
393 #endif
394
395 UString::SharedUChar* UString::BaseString::sharedBuffer()
396 {
397 if (!m_sharedBuffer)
398 setSharedBuffer(SharedUChar::create(new OwnFastMallocPtr<UChar>(buf)));
399 return m_sharedBuffer;
400 }
401
402 void UString::BaseString::setSharedBuffer(PassRefPtr<UString::SharedUChar> sharedBuffer)
403 {
404 // The manual steps below are because m_sharedBuffer can't be a RefPtr. m_sharedBuffer
405 // is in a union with another variable to avoid making BaseString any larger.
406 if (m_sharedBuffer)
407 m_sharedBuffer->deref();
408 m_sharedBuffer = sharedBuffer.releaseRef();
409 }
410
411 bool UString::BaseString::slowIsBufferReadOnly()
412 {
413 // The buffer may not be modified as soon as the underlying data has been shared with another class.
414 if (m_sharedBuffer->isShared())
415 return true;
416
417 // At this point, we know it that the underlying buffer isn't shared outside of this base class,
418 // so get rid of m_sharedBuffer.
419 OwnPtr<OwnFastMallocPtr<UChar> > mallocPtr(m_sharedBuffer->release());
420 UChar* unsharedBuf = const_cast<UChar*>(mallocPtr->release());
421 setSharedBuffer(0);
422 preCapacity += (buf - unsharedBuf);
423 buf = unsharedBuf;
424 return false;
425 }
426
427 // Put these early so they can be inlined.
428 static inline size_t expandedSize(size_t capacitySize, size_t precapacitySize)
429 {
430 // Combine capacitySize & precapacitySize to produce a single size to allocate,
431 // check that doing so does not result in overflow.
432 size_t size = capacitySize + precapacitySize;
433 if (size < capacitySize)
434 return overflowIndicator();
435
436 // Small Strings (up to 4 pages):
437 // Expand the allocation size to 112.5% of the amount requested. This is largely sicking
438 // to our previous policy, however 112.5% is cheaper to calculate.
439 if (size < 0x4000) {
440 size_t expandedSize = ((size + (size >> 3)) | 15) + 1;
441 // Given the limited range within which we calculate the expansion in this
442 // fashion the above calculation should never overflow.
443 ASSERT(expandedSize >= size);
444 ASSERT(expandedSize < maxUChars());
445 return expandedSize;
446 }
447
448 // Medium Strings (up to 128 pages):
449 // For pages covering multiple pages over-allocation is less of a concern - any unused
450 // space will not be paged in if it is not used, so this is purely a VM overhead. For
451 // these strings allocate 2x the requested size.
452 if (size < 0x80000) {
453 size_t expandedSize = ((size + size) | 0xfff) + 1;
454 // Given the limited range within which we calculate the expansion in this
455 // fashion the above calculation should never overflow.
456 ASSERT(expandedSize >= size);
457 ASSERT(expandedSize < maxUChars());
458 return expandedSize;
459 }
460
461 // Large Strings (to infinity and beyond!):
462 // Revert to our 112.5% policy - probably best to limit the amount of unused VM we allow
463 // any individual string be responsible for.
464 size_t expandedSize = ((size + (size >> 3)) | 0xfff) + 1;
465
466 // Check for overflow - any result that is at least as large as requested (but
467 // still below the limit) is okay.
468 if ((expandedSize >= size) && (expandedSize < maxUChars()))
469 return expandedSize;
470 return overflowIndicator();
471 }
472
473 static inline bool expandCapacity(UString::Rep* rep, int requiredLength)
474 {
475 rep->checkConsistency();
476 ASSERT(!rep->baseString()->isBufferReadOnly());
477
478 UString::BaseString* base = rep->baseString();
479
480 if (requiredLength > base->capacity) {
481 size_t newCapacity = expandedSize(requiredLength, base->preCapacity);
482 UChar* oldBuf = base->buf;
483 base->buf = reallocChars(base->buf, newCapacity);
484 if (!base->buf) {
485 base->buf = oldBuf;
486 return false;
487 }
488 base->capacity = newCapacity - base->preCapacity;
489 }
490 if (requiredLength > base->usedCapacity)
491 base->usedCapacity = requiredLength;
492
493 rep->checkConsistency();
494 return true;
495 }
496
497 bool UString::Rep::reserveCapacity(int capacity)
498 {
499 // If this is an empty string there is no point 'growing' it - just allocate a new one.
500 // If the BaseString is shared with another string that is using more capacity than this
501 // string is, then growing the buffer won't help.
502 // If the BaseString's buffer is readonly, then it isn't allowed to grow.
503 UString::BaseString* base = baseString();
504 if (!base->buf || !base->capacity || (offset + len) != base->usedCapacity || base->isBufferReadOnly())
505 return false;
506
507 // If there is already sufficient capacity, no need to grow!
508 if (capacity <= base->capacity)
509 return true;
510
511 checkConsistency();
512
513 size_t newCapacity = expandedSize(capacity, base->preCapacity);
514 UChar* oldBuf = base->buf;
515 base->buf = reallocChars(base->buf, newCapacity);
516 if (!base->buf) {
517 base->buf = oldBuf;
518 return false;
519 }
520 base->capacity = newCapacity - base->preCapacity;
521
522 checkConsistency();
523 return true;
524 }
525
526 void UString::expandCapacity(int requiredLength)
527 {
528 if (!JSC::expandCapacity(m_rep.get(), requiredLength))
529 makeNull();
530 }
531
532 void UString::expandPreCapacity(int requiredPreCap)
533 {
534 m_rep->checkConsistency();
535 ASSERT(!m_rep->baseString()->isBufferReadOnly());
536
537 BaseString* base = m_rep->baseString();
538
539 if (requiredPreCap > base->preCapacity) {
540 size_t newCapacity = expandedSize(requiredPreCap, base->capacity);
541 int delta = newCapacity - base->capacity - base->preCapacity;
542
543 UChar* newBuf = allocChars(newCapacity);
544 if (!newBuf) {
545 makeNull();
546 return;
547 }
548 copyChars(newBuf + delta, base->buf, base->capacity + base->preCapacity);
549 fastFree(base->buf);
550 base->buf = newBuf;
551
552 base->preCapacity = newCapacity - base->capacity;
553 }
554 if (requiredPreCap > base->usedPreCapacity)
555 base->usedPreCapacity = requiredPreCap;
556
557 m_rep->checkConsistency();
558 }
559
560 static PassRefPtr<UString::Rep> createRep(const char* c)
561 {
562 if (!c)
563 return &UString::Rep::null();
564
565 if (!c[0])
566 return &UString::Rep::empty();
567
568 size_t length = strlen(c);
569 UChar* d = allocChars(length);
570 if (!d)
571 return &UString::Rep::null();
572 else {
573 for (size_t i = 0; i < length; i++)
574 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
575 return UString::Rep::create(d, static_cast<int>(length));
576 }
577
578 }
579
580 UString::UString(const char* c)
581 : m_rep(createRep(c))
582 {
583 }
584
585 UString::UString(const UChar* c, int length)
586 {
587 if (length == 0)
588 m_rep = &Rep::empty();
589 else
590 m_rep = Rep::createCopying(c, length);
591 }
592
593 UString::UString(UChar* c, int length, bool copy)
594 {
595 if (length == 0)
596 m_rep = &Rep::empty();
597 else if (copy)
598 m_rep = Rep::createCopying(c, length);
599 else
600 m_rep = Rep::create(c, length);
601 }
602
603 UString::UString(const Vector<UChar>& buffer)
604 {
605 if (!buffer.size())
606 m_rep = &Rep::empty();
607 else
608 m_rep = Rep::createCopying(buffer.data(), buffer.size());
609 }
610
611 static ALWAYS_INLINE int newCapacityWithOverflowCheck(const int currentCapacity, const int extendLength, const bool plusOne = false)
612 {
613 ASSERT_WITH_MESSAGE(extendLength >= 0, "extendedLength = %d", extendLength);
614
615 const int plusLength = plusOne ? 1 : 0;
616 if (currentCapacity > std::numeric_limits<int>::max() - extendLength - plusLength)
617 CRASH();
618
619 return currentCapacity + extendLength + plusLength;
620 }
621
622 static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const UChar* tData, int tSize)
623 {
624 RefPtr<UString::Rep> rep = r;
625
626 rep->checkConsistency();
627
628 int thisSize = rep->size();
629 int thisOffset = rep->offset;
630 int length = thisSize + tSize;
631 UString::BaseString* base = rep->baseString();
632
633 // possible cases:
634 if (tSize == 0) {
635 // t is empty
636 } else if (thisSize == 0) {
637 // this is empty
638 rep = UString::Rep::createCopying(tData, tSize);
639 } else if (rep == base && !base->isShared()) {
640 // this is direct and has refcount of 1 (so we can just alter it directly)
641 if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
642 rep = &UString::Rep::null();
643 if (rep->data()) {
644 copyChars(rep->data() + thisSize, tData, tSize);
645 rep->len = length;
646 rep->_hash = 0;
647 }
648 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) {
649 // this reaches the end of the buffer - extend it if it's long enough to append to
650 if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
651 rep = &UString::Rep::null();
652 if (rep->data()) {
653 copyChars(rep->data() + thisSize, tData, tSize);
654 rep = UString::Rep::create(rep, 0, length);
655 }
656 } else {
657 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
658 size_t newCapacity = expandedSize(length, 0);
659 UChar* d = allocChars(newCapacity);
660 if (!d)
661 rep = &UString::Rep::null();
662 else {
663 copyChars(d, rep->data(), thisSize);
664 copyChars(d + thisSize, tData, tSize);
665 rep = UString::Rep::create(d, length);
666 rep->baseString()->capacity = newCapacity;
667 }
668 }
669
670 rep->checkConsistency();
671
672 return rep.release();
673 }
674
675 static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const char* t)
676 {
677 RefPtr<UString::Rep> rep = r;
678
679 rep->checkConsistency();
680
681 int thisSize = rep->size();
682 int thisOffset = rep->offset;
683 int tSize = static_cast<int>(strlen(t));
684 int length = thisSize + tSize;
685 UString::BaseString* base = rep->baseString();
686
687 // possible cases:
688 if (thisSize == 0) {
689 // this is empty
690 rep = createRep(t);
691 } else if (tSize == 0) {
692 // t is empty, we'll just return *this below.
693 } else if (rep == base && !base->isShared()) {
694 // this is direct and has refcount of 1 (so we can just alter it directly)
695 expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
696 UChar* d = rep->data();
697 if (d) {
698 for (int i = 0; i < tSize; ++i)
699 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
700 rep->len = length;
701 rep->_hash = 0;
702 }
703 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) {
704 // this string reaches the end of the buffer - extend it
705 expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
706 UChar* d = rep->data();
707 if (d) {
708 for (int i = 0; i < tSize; ++i)
709 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
710 rep = UString::Rep::create(rep, 0, length);
711 }
712 } else {
713 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
714 size_t newCapacity = expandedSize(length, 0);
715 UChar* d = allocChars(newCapacity);
716 if (!d)
717 rep = &UString::Rep::null();
718 else {
719 copyChars(d, rep->data(), thisSize);
720 for (int i = 0; i < tSize; ++i)
721 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
722 rep = UString::Rep::create(d, length);
723 rep->baseString()->capacity = newCapacity;
724 }
725 }
726
727 rep->checkConsistency();
728
729 return rep.release();
730 }
731
732 PassRefPtr<UString::Rep> concatenate(UString::Rep* a, UString::Rep* b)
733 {
734 a->checkConsistency();
735 b->checkConsistency();
736
737 int aSize = a->size();
738 int bSize = b->size();
739 int aOffset = a->offset;
740
741 // possible cases:
742
743 UString::BaseString* aBase = a->baseString();
744 if (bSize == 1 && aOffset + aSize == aBase->usedCapacity && aOffset + aSize < aBase->capacity && !aBase->isBufferReadOnly()) {
745 // b is a single character (common fast case)
746 ++aBase->usedCapacity;
747 a->data()[aSize] = b->data()[0];
748 return UString::Rep::create(a, 0, aSize + 1);
749 }
750
751 // a is empty
752 if (aSize == 0)
753 return b;
754 // b is empty
755 if (bSize == 0)
756 return a;
757
758 int bOffset = b->offset;
759 int length = aSize + bSize;
760
761 UString::BaseString* bBase = b->baseString();
762 if (aOffset + aSize == aBase->usedCapacity && aSize >= minShareSize && 4 * aSize >= bSize
763 && (-bOffset != bBase->usedPreCapacity || aSize >= bSize) && !aBase->isBufferReadOnly()) {
764 // - a reaches the end of its buffer so it qualifies for shared append
765 // - also, it's at least a quarter the length of b - appending to a much shorter
766 // string does more harm than good
767 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
768
769 UString x(a);
770 x.expandCapacity(newCapacityWithOverflowCheck(aOffset, length));
771 if (!a->data() || !x.data())
772 return 0;
773 copyChars(a->data() + aSize, b->data(), bSize);
774 PassRefPtr<UString::Rep> result = UString::Rep::create(a, 0, length);
775
776 a->checkConsistency();
777 b->checkConsistency();
778 result->checkConsistency();
779
780 return result;
781 }
782
783 if (-bOffset == bBase->usedPreCapacity && bSize >= minShareSize && 4 * bSize >= aSize && !bBase->isBufferReadOnly()) {
784 // - b reaches the beginning of its buffer so it qualifies for shared prepend
785 // - also, it's at least a quarter the length of a - prepending to a much shorter
786 // string does more harm than good
787 UString y(b);
788 y.expandPreCapacity(-bOffset + aSize);
789 if (!b->data() || !y.data())
790 return 0;
791 copyChars(b->data() - aSize, a->data(), aSize);
792 PassRefPtr<UString::Rep> result = UString::Rep::create(b, -aSize, length);
793
794 a->checkConsistency();
795 b->checkConsistency();
796 result->checkConsistency();
797
798 return result;
799 }
800
801 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
802 size_t newCapacity = expandedSize(length, 0);
803 UChar* d = allocChars(newCapacity);
804 if (!d)
805 return 0;
806 copyChars(d, a->data(), aSize);
807 copyChars(d + aSize, b->data(), bSize);
808 PassRefPtr<UString::Rep> result = UString::Rep::create(d, length);
809 result->baseString()->capacity = newCapacity;
810
811 a->checkConsistency();
812 b->checkConsistency();
813 result->checkConsistency();
814
815 return result;
816 }
817
818 PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, int i)
819 {
820 UChar buf[1 + sizeof(i) * 3];
821 UChar* end = buf + sizeof(buf) / sizeof(UChar);
822 UChar* p = end;
823
824 if (i == 0)
825 *--p = '0';
826 else if (i == INT_MIN) {
827 char minBuf[1 + sizeof(i) * 3];
828 sprintf(minBuf, "%d", INT_MIN);
829 return concatenate(rep, minBuf);
830 } else {
831 bool negative = false;
832 if (i < 0) {
833 negative = true;
834 i = -i;
835 }
836 while (i) {
837 *--p = static_cast<unsigned short>((i % 10) + '0');
838 i /= 10;
839 }
840 if (negative)
841 *--p = '-';
842 }
843
844 return concatenate(rep, p, static_cast<int>(end - p));
845
846 }
847
848 PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, double d)
849 {
850 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
851 if (isnan(d))
852 return concatenate(rep, "NaN");
853
854 if (d == 0.0) // stringify -0 as 0
855 d = 0.0;
856
857 char buf[80];
858 int decimalPoint;
859 int sign;
860
861 char result[80];
862 WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL);
863 int length = static_cast<int>(strlen(result));
864
865 int i = 0;
866 if (sign)
867 buf[i++] = '-';
868
869 if (decimalPoint <= 0 && decimalPoint > -6) {
870 buf[i++] = '0';
871 buf[i++] = '.';
872 for (int j = decimalPoint; j < 0; j++)
873 buf[i++] = '0';
874 strcpy(buf + i, result);
875 } else if (decimalPoint <= 21 && decimalPoint > 0) {
876 if (length <= decimalPoint) {
877 strcpy(buf + i, result);
878 i += length;
879 for (int j = 0; j < decimalPoint - length; j++)
880 buf[i++] = '0';
881 buf[i] = '\0';
882 } else {
883 strncpy(buf + i, result, decimalPoint);
884 i += decimalPoint;
885 buf[i++] = '.';
886 strcpy(buf + i, result + decimalPoint);
887 }
888 } else if (result[0] < '0' || result[0] > '9')
889 strcpy(buf + i, result);
890 else {
891 buf[i++] = result[0];
892 if (length > 1) {
893 buf[i++] = '.';
894 strcpy(buf + i, result + 1);
895 i += length - 1;
896 }
897
898 buf[i++] = 'e';
899 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
900 // decimalPoint can't be more than 3 digits decimal given the
901 // nature of float representation
902 int exponential = decimalPoint - 1;
903 if (exponential < 0)
904 exponential = -exponential;
905 if (exponential >= 100)
906 buf[i++] = static_cast<char>('0' + exponential / 100);
907 if (exponential >= 10)
908 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
909 buf[i++] = static_cast<char>('0' + exponential % 10);
910 buf[i++] = '\0';
911 }
912
913 return concatenate(rep, buf);
914 }
915
916 UString UString::from(int i)
917 {
918 UChar buf[1 + sizeof(i) * 3];
919 UChar* end = buf + sizeof(buf) / sizeof(UChar);
920 UChar* p = end;
921
922 if (i == 0)
923 *--p = '0';
924 else if (i == INT_MIN) {
925 char minBuf[1 + sizeof(i) * 3];
926 snprintf(minBuf, 1 + sizeof(i) * 3, "%d", INT_MIN);
927 return UString(minBuf);
928 } else {
929 bool negative = false;
930 if (i < 0) {
931 negative = true;
932 i = -i;
933 }
934 while (i) {
935 *--p = static_cast<unsigned short>((i % 10) + '0');
936 i /= 10;
937 }
938 if (negative)
939 *--p = '-';
940 }
941
942 return UString(p, static_cast<int>(end - p));
943 }
944
945 UString UString::from(unsigned int u)
946 {
947 UChar buf[sizeof(u) * 3];
948 UChar* end = buf + sizeof(buf) / sizeof(UChar);
949 UChar* p = end;
950
951 if (u == 0)
952 *--p = '0';
953 else {
954 while (u) {
955 *--p = static_cast<unsigned short>((u % 10) + '0');
956 u /= 10;
957 }
958 }
959
960 return UString(p, static_cast<int>(end - p));
961 }
962
963 UString UString::from(long l)
964 {
965 UChar buf[1 + sizeof(l) * 3];
966 UChar* end = buf + sizeof(buf) / sizeof(UChar);
967 UChar* p = end;
968
969 if (l == 0)
970 *--p = '0';
971 else if (l == LONG_MIN) {
972 char minBuf[1 + sizeof(l) * 3];
973 snprintf(minBuf, 1 + sizeof(l) * 3, "%ld", LONG_MIN);
974 return UString(minBuf);
975 } else {
976 bool negative = false;
977 if (l < 0) {
978 negative = true;
979 l = -l;
980 }
981 while (l) {
982 *--p = static_cast<unsigned short>((l % 10) + '0');
983 l /= 10;
984 }
985 if (negative)
986 *--p = '-';
987 }
988
989 return UString(p, static_cast<int>(end - p));
990 }
991
992 UString UString::from(double d)
993 {
994 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
995 if (isnan(d))
996 return "NaN";
997
998 char buf[80];
999 int decimalPoint;
1000 int sign;
1001
1002 char result[80];
1003 WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL);
1004 int length = static_cast<int>(strlen(result));
1005
1006 int i = 0;
1007 if (sign)
1008 buf[i++] = '-';
1009
1010 if (decimalPoint <= 0 && decimalPoint > -6) {
1011 buf[i++] = '0';
1012 buf[i++] = '.';
1013 for (int j = decimalPoint; j < 0; j++)
1014 buf[i++] = '0';
1015 strlcpy(buf + i, result, sizeof(buf) - i);
1016 } else if (decimalPoint <= 21 && decimalPoint > 0) {
1017 if (length <= decimalPoint) {
1018 strlcpy(buf + i, result, sizeof(buf) - i);
1019 i += length;
1020 for (int j = 0; j < decimalPoint - length; j++)
1021 buf[i++] = '0';
1022 buf[i] = '\0';
1023 } else {
1024 int len = (decimalPoint <= static_cast<int>(sizeof(buf)) - i ? decimalPoint : sizeof(buf) - i);
1025 strncpy(buf + i, result, len);
1026 i += len;
1027 buf[i++] = '.';
1028 strlcpy(buf + i, result + decimalPoint, sizeof(buf) - i);
1029 }
1030 } else if (result[0] < '0' || result[0] > '9')
1031 strlcpy(buf + i, result, sizeof(buf) - i);
1032 else {
1033 buf[i++] = result[0];
1034 if (length > 1) {
1035 buf[i++] = '.';
1036 strlcpy(buf + i, result + 1, sizeof(buf) - i);
1037 i += length - 1;
1038 }
1039
1040 buf[i++] = 'e';
1041 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
1042 // decimalPoint can't be more than 3 digits decimal given the
1043 // nature of float representation
1044 int exponential = decimalPoint - 1;
1045 if (exponential < 0)
1046 exponential = -exponential;
1047 if (exponential >= 100)
1048 buf[i++] = static_cast<char>('0' + exponential / 100);
1049 if (exponential >= 10)
1050 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
1051 buf[i++] = static_cast<char>('0' + exponential % 10);
1052 buf[i++] = '\0';
1053 ASSERT(i <= static_cast<int>(sizeof(buf)));
1054 }
1055
1056 return UString(buf);
1057 }
1058
1059 UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
1060 {
1061 m_rep->checkConsistency();
1062
1063 if (rangeCount == 1 && separatorCount == 0) {
1064 int thisSize = size();
1065 int position = substringRanges[0].position;
1066 int length = substringRanges[0].length;
1067 if (position <= 0 && length >= thisSize)
1068 return *this;
1069 return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
1070 }
1071
1072 int totalLength = 0;
1073 for (int i = 0; i < rangeCount; i++)
1074 totalLength += substringRanges[i].length;
1075 for (int i = 0; i < separatorCount; i++)
1076 totalLength += separators[i].size();
1077
1078 if (totalLength == 0)
1079 return "";
1080
1081 UChar* buffer = allocChars(totalLength);
1082 if (!buffer)
1083 return null();
1084
1085 int maxCount = max(rangeCount, separatorCount);
1086 int bufferPos = 0;
1087 for (int i = 0; i < maxCount; i++) {
1088 if (i < rangeCount) {
1089 copyChars(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length);
1090 bufferPos += substringRanges[i].length;
1091 }
1092 if (i < separatorCount) {
1093 copyChars(buffer + bufferPos, separators[i].data(), separators[i].size());
1094 bufferPos += separators[i].size();
1095 }
1096 }
1097
1098 return UString::Rep::create(buffer, totalLength);
1099 }
1100
1101 UString UString::replaceRange(int rangeStart, int rangeLength, const UString& replacement) const
1102 {
1103 m_rep->checkConsistency();
1104
1105 int replacementLength = replacement.size();
1106 int totalLength = size() - rangeLength + replacementLength;
1107 if (totalLength == 0)
1108 return "";
1109
1110 UChar* buffer = allocChars(totalLength);
1111 if (!buffer)
1112 return null();
1113
1114 copyChars(buffer, data(), rangeStart);
1115 copyChars(buffer + rangeStart, replacement.data(), replacementLength);
1116 int rangeEnd = rangeStart + rangeLength;
1117 copyChars(buffer + rangeStart + replacementLength, data() + rangeEnd, size() - rangeEnd);
1118
1119 return UString::Rep::create(buffer, totalLength);
1120 }
1121
1122
1123 UString& UString::append(const UString &t)
1124 {
1125 m_rep->checkConsistency();
1126 t.rep()->checkConsistency();
1127
1128 int thisSize = size();
1129 int thisOffset = m_rep->offset;
1130 int tSize = t.size();
1131 int length = thisSize + tSize;
1132 BaseString* base = m_rep->baseString();
1133
1134 // possible cases:
1135 if (thisSize == 0) {
1136 // this is empty
1137 *this = t;
1138 } else if (tSize == 0) {
1139 // t is empty
1140 } else if (m_rep == base && !base->isShared()) {
1141 // this is direct and has refcount of 1 (so we can just alter it directly)
1142 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1143 if (data()) {
1144 copyChars(m_rep->data() + thisSize, t.data(), tSize);
1145 m_rep->len = length;
1146 m_rep->_hash = 0;
1147 }
1148 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) {
1149 // this reaches the end of the buffer - extend it if it's long enough to append to
1150 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1151 if (data()) {
1152 copyChars(m_rep->data() + thisSize, t.data(), tSize);
1153 m_rep = Rep::create(m_rep, 0, length);
1154 }
1155 } else {
1156 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
1157 size_t newCapacity = expandedSize(length, 0);
1158 UChar* d = allocChars(newCapacity);
1159 if (!d)
1160 makeNull();
1161 else {
1162 copyChars(d, data(), thisSize);
1163 copyChars(d + thisSize, t.data(), tSize);
1164 m_rep = Rep::create(d, length);
1165 m_rep->baseString()->capacity = newCapacity;
1166 }
1167 }
1168
1169 m_rep->checkConsistency();
1170 t.rep()->checkConsistency();
1171
1172 return *this;
1173 }
1174
1175 UString& UString::append(const UChar* tData, int tSize)
1176 {
1177 m_rep = concatenate(m_rep.release(), tData, tSize);
1178 return *this;
1179 }
1180
1181 UString& UString::appendNumeric(int i)
1182 {
1183 m_rep = concatenate(rep(), i);
1184 return *this;
1185 }
1186
1187 UString& UString::appendNumeric(double d)
1188 {
1189 m_rep = concatenate(rep(), d);
1190 return *this;
1191 }
1192
1193 UString& UString::append(const char* t)
1194 {
1195 m_rep = concatenate(m_rep.release(), t);
1196 return *this;
1197 }
1198
1199 UString& UString::append(UChar c)
1200 {
1201 m_rep->checkConsistency();
1202
1203 int thisOffset = m_rep->offset;
1204 int length = size();
1205 BaseString* base = m_rep->baseString();
1206
1207 // possible cases:
1208 if (length == 0) {
1209 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
1210 size_t newCapacity = expandedSize(1, 0);
1211 UChar* d = allocChars(newCapacity);
1212 if (!d)
1213 makeNull();
1214 else {
1215 d[0] = c;
1216 m_rep = Rep::create(d, 1);
1217 m_rep->baseString()->capacity = newCapacity;
1218 }
1219 } else if (m_rep == base && !base->isShared()) {
1220 // this is direct and has refcount of 1 (so we can just alter it directly)
1221 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1222 UChar* d = m_rep->data();
1223 if (d) {
1224 d[length] = c;
1225 m_rep->len = length + 1;
1226 m_rep->_hash = 0;
1227 }
1228 } else if (thisOffset + length == base->usedCapacity && length >= minShareSize && !base->isBufferReadOnly()) {
1229 // this reaches the end of the string - extend it and share
1230 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1231 UChar* d = m_rep->data();
1232 if (d) {
1233 d[length] = c;
1234 m_rep = Rep::create(m_rep, 0, length + 1);
1235 }
1236 } else {
1237 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
1238 size_t newCapacity = expandedSize(length + 1, 0);
1239 UChar* d = allocChars(newCapacity);
1240 if (!d)
1241 makeNull();
1242 else {
1243 copyChars(d, data(), length);
1244 d[length] = c;
1245 m_rep = Rep::create(d, length + 1);
1246 m_rep->baseString()->capacity = newCapacity;
1247 }
1248 }
1249
1250 m_rep->checkConsistency();
1251
1252 return *this;
1253 }
1254
1255 bool UString::getCString(CStringBuffer& buffer) const
1256 {
1257 int length = size();
1258 int neededSize = length + 1;
1259 buffer.resize(neededSize);
1260 char* buf = buffer.data();
1261
1262 UChar ored = 0;
1263 const UChar* p = data();
1264 char* q = buf;
1265 const UChar* limit = p + length;
1266 while (p != limit) {
1267 UChar c = p[0];
1268 ored |= c;
1269 *q = static_cast<char>(c);
1270 ++p;
1271 ++q;
1272 }
1273 *q = '\0';
1274
1275 return !(ored & 0xFF00);
1276 }
1277
1278 char* UString::ascii() const
1279 {
1280 int length = size();
1281 int neededSize = length + 1;
1282 delete[] statBuffer;
1283 statBuffer = new char[neededSize];
1284
1285 const UChar* p = data();
1286 char* q = statBuffer;
1287 const UChar* limit = p + length;
1288 while (p != limit) {
1289 *q = static_cast<char>(p[0]);
1290 ++p;
1291 ++q;
1292 }
1293 *q = '\0';
1294
1295 return statBuffer;
1296 }
1297
1298 UString& UString::operator=(const char* c)
1299 {
1300 if (!c) {
1301 m_rep = &Rep::null();
1302 return *this;
1303 }
1304
1305 if (!c[0]) {
1306 m_rep = &Rep::empty();
1307 return *this;
1308 }
1309
1310 int l = static_cast<int>(strlen(c));
1311 UChar* d;
1312 BaseString* base = m_rep->baseString();
1313 if (!base->isShared() && l <= base->capacity && m_rep == base && m_rep->offset == 0 && base->preCapacity == 0) {
1314 d = base->buf;
1315 m_rep->_hash = 0;
1316 m_rep->len = l;
1317 } else {
1318 d = allocChars(l);
1319 if (!d) {
1320 makeNull();
1321 return *this;
1322 }
1323 m_rep = Rep::create(d, l);
1324 }
1325 for (int i = 0; i < l; i++)
1326 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
1327
1328 return *this;
1329 }
1330
1331 bool UString::is8Bit() const
1332 {
1333 const UChar* u = data();
1334 const UChar* limit = u + size();
1335 while (u < limit) {
1336 if (u[0] > 0xFF)
1337 return false;
1338 ++u;
1339 }
1340
1341 return true;
1342 }
1343
1344 UChar UString::operator[](int pos) const
1345 {
1346 if (pos >= size())
1347 return '\0';
1348 return data()[pos];
1349 }
1350
1351 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
1352 {
1353 if (size() == 1) {
1354 UChar c = data()[0];
1355 if (isASCIIDigit(c))
1356 return c - '0';
1357 if (isASCIISpace(c) && tolerateEmptyString)
1358 return 0;
1359 return NaN;
1360 }
1361
1362 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
1363 // after the number, so this is too strict a check.
1364 CStringBuffer s;
1365 if (!getCString(s))
1366 return NaN;
1367 const char* c = s.data();
1368
1369 // skip leading white space
1370 while (isASCIISpace(*c))
1371 c++;
1372
1373 // empty string ?
1374 if (*c == '\0')
1375 return tolerateEmptyString ? 0.0 : NaN;
1376
1377 double d;
1378
1379 // hex number ?
1380 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
1381 const char* firstDigitPosition = c + 2;
1382 c++;
1383 d = 0.0;
1384 while (*(++c)) {
1385 if (*c >= '0' && *c <= '9')
1386 d = d * 16.0 + *c - '0';
1387 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
1388 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
1389 else
1390 break;
1391 }
1392
1393 if (d >= mantissaOverflowLowerBound)
1394 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1395 } else {
1396 // regular number ?
1397 char* end;
1398 d = WTF::strtod(c, &end);
1399 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
1400 c = end;
1401 } else {
1402 double sign = 1.0;
1403
1404 if (*c == '+')
1405 c++;
1406 else if (*c == '-') {
1407 sign = -1.0;
1408 c++;
1409 }
1410
1411 // We used strtod() to do the conversion. However, strtod() handles
1412 // infinite values slightly differently than JavaScript in that it
1413 // converts the string "inf" with any capitalization to infinity,
1414 // whereas the ECMA spec requires that it be converted to NaN.
1415
1416 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1417 d = sign * Inf;
1418 c += 8;
1419 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
1420 c = end;
1421 else
1422 return NaN;
1423 }
1424 }
1425
1426 // allow trailing white space
1427 while (isASCIISpace(*c))
1428 c++;
1429 // don't allow anything after - unless tolerant=true
1430 if (!tolerateTrailingJunk && *c != '\0')
1431 d = NaN;
1432
1433 return d;
1434 }
1435
1436 double UString::toDouble(bool tolerateTrailingJunk) const
1437 {
1438 return toDouble(tolerateTrailingJunk, true);
1439 }
1440
1441 double UString::toDouble() const
1442 {
1443 return toDouble(false, true);
1444 }
1445
1446 uint32_t UString::toUInt32(bool* ok) const
1447 {
1448 double d = toDouble();
1449 bool b = true;
1450
1451 if (d != static_cast<uint32_t>(d)) {
1452 b = false;
1453 d = 0;
1454 }
1455
1456 if (ok)
1457 *ok = b;
1458
1459 return static_cast<uint32_t>(d);
1460 }
1461
1462 uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
1463 {
1464 double d = toDouble(false, tolerateEmptyString);
1465 bool b = true;
1466
1467 if (d != static_cast<uint32_t>(d)) {
1468 b = false;
1469 d = 0;
1470 }
1471
1472 if (ok)
1473 *ok = b;
1474
1475 return static_cast<uint32_t>(d);
1476 }
1477
1478 uint32_t UString::toStrictUInt32(bool* ok) const
1479 {
1480 if (ok)
1481 *ok = false;
1482
1483 // Empty string is not OK.
1484 int len = m_rep->len;
1485 if (len == 0)
1486 return 0;
1487 const UChar* p = m_rep->data();
1488 unsigned short c = p[0];
1489
1490 // If the first digit is 0, only 0 itself is OK.
1491 if (c == '0') {
1492 if (len == 1 && ok)
1493 *ok = true;
1494 return 0;
1495 }
1496
1497 // Convert to UInt32, checking for overflow.
1498 uint32_t i = 0;
1499 while (1) {
1500 // Process character, turning it into a digit.
1501 if (c < '0' || c > '9')
1502 return 0;
1503 const unsigned d = c - '0';
1504
1505 // Multiply by 10, checking for overflow out of 32 bits.
1506 if (i > 0xFFFFFFFFU / 10)
1507 return 0;
1508 i *= 10;
1509
1510 // Add in the digit, checking for overflow out of 32 bits.
1511 const unsigned max = 0xFFFFFFFFU - d;
1512 if (i > max)
1513 return 0;
1514 i += d;
1515
1516 // Handle end of string.
1517 if (--len == 0) {
1518 if (ok)
1519 *ok = true;
1520 return i;
1521 }
1522
1523 // Get next character.
1524 c = *(++p);
1525 }
1526 }
1527
1528 int UString::find(const UString& f, int pos) const
1529 {
1530 int fsz = f.size();
1531
1532 if (pos < 0)
1533 pos = 0;
1534
1535 if (fsz == 1) {
1536 UChar ch = f[0];
1537 const UChar* end = data() + size();
1538 for (const UChar* c = data() + pos; c < end; c++) {
1539 if (*c == ch)
1540 return static_cast<int>(c - data());
1541 }
1542 return -1;
1543 }
1544
1545 int sz = size();
1546 if (sz < fsz)
1547 return -1;
1548 if (fsz == 0)
1549 return pos;
1550 const UChar* end = data() + sz - fsz;
1551 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1552 const UChar* fdata = f.data();
1553 unsigned short fchar = fdata[0];
1554 ++fdata;
1555 for (const UChar* c = data() + pos; c <= end; c++) {
1556 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1557 return static_cast<int>(c - data());
1558 }
1559
1560 return -1;
1561 }
1562
1563 int UString::find(UChar ch, int pos) const
1564 {
1565 if (pos < 0)
1566 pos = 0;
1567 const UChar* end = data() + size();
1568 for (const UChar* c = data() + pos; c < end; c++) {
1569 if (*c == ch)
1570 return static_cast<int>(c - data());
1571 }
1572
1573 return -1;
1574 }
1575
1576 int UString::rfind(const UString& f, int pos) const
1577 {
1578 int sz = size();
1579 int fsz = f.size();
1580 if (sz < fsz)
1581 return -1;
1582 if (pos < 0)
1583 pos = 0;
1584 if (pos > sz - fsz)
1585 pos = sz - fsz;
1586 if (fsz == 0)
1587 return pos;
1588 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1589 const UChar* fdata = f.data();
1590 for (const UChar* c = data() + pos; c >= data(); c--) {
1591 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1592 return static_cast<int>(c - data());
1593 }
1594
1595 return -1;
1596 }
1597
1598 int UString::rfind(UChar ch, int pos) const
1599 {
1600 if (isEmpty())
1601 return -1;
1602 if (pos + 1 >= size())
1603 pos = size() - 1;
1604 for (const UChar* c = data() + pos; c >= data(); c--) {
1605 if (*c == ch)
1606 return static_cast<int>(c - data());
1607 }
1608
1609 return -1;
1610 }
1611
1612 UString UString::substr(int pos, int len) const
1613 {
1614 int s = size();
1615
1616 if (pos < 0)
1617 pos = 0;
1618 else if (pos >= s)
1619 pos = s;
1620 if (len < 0)
1621 len = s;
1622 if (pos + len >= s)
1623 len = s - pos;
1624
1625 if (pos == 0 && len == s)
1626 return *this;
1627
1628 return UString(Rep::create(m_rep, pos, len));
1629 }
1630
1631 bool operator==(const UString& s1, const char *s2)
1632 {
1633 if (s2 == 0)
1634 return s1.isEmpty();
1635
1636 const UChar* u = s1.data();
1637 const UChar* uend = u + s1.size();
1638 while (u != uend && *s2) {
1639 if (u[0] != (unsigned char)*s2)
1640 return false;
1641 s2++;
1642 u++;
1643 }
1644
1645 return u == uend && *s2 == 0;
1646 }
1647
1648 bool operator<(const UString& s1, const UString& s2)
1649 {
1650 const int l1 = s1.size();
1651 const int l2 = s2.size();
1652 const int lmin = l1 < l2 ? l1 : l2;
1653 const UChar* c1 = s1.data();
1654 const UChar* c2 = s2.data();
1655 int l = 0;
1656 while (l < lmin && *c1 == *c2) {
1657 c1++;
1658 c2++;
1659 l++;
1660 }
1661 if (l < lmin)
1662 return (c1[0] < c2[0]);
1663
1664 return (l1 < l2);
1665 }
1666
1667 bool operator>(const UString& s1, const UString& s2)
1668 {
1669 const int l1 = s1.size();
1670 const int l2 = s2.size();
1671 const int lmin = l1 < l2 ? l1 : l2;
1672 const UChar* c1 = s1.data();
1673 const UChar* c2 = s2.data();
1674 int l = 0;
1675 while (l < lmin && *c1 == *c2) {
1676 c1++;
1677 c2++;
1678 l++;
1679 }
1680 if (l < lmin)
1681 return (c1[0] > c2[0]);
1682
1683 return (l1 > l2);
1684 }
1685
1686 int compare(const UString& s1, const UString& s2)
1687 {
1688 const int l1 = s1.size();
1689 const int l2 = s2.size();
1690 const int lmin = l1 < l2 ? l1 : l2;
1691 const UChar* c1 = s1.data();
1692 const UChar* c2 = s2.data();
1693 int l = 0;
1694 while (l < lmin && *c1 == *c2) {
1695 c1++;
1696 c2++;
1697 l++;
1698 }
1699
1700 if (l < lmin)
1701 return (c1[0] > c2[0]) ? 1 : -1;
1702
1703 if (l1 == l2)
1704 return 0;
1705
1706 return (l1 > l2) ? 1 : -1;
1707 }
1708
1709 bool equal(const UString::Rep* r, const UString::Rep* b)
1710 {
1711 int length = r->len;
1712 if (length != b->len)
1713 return false;
1714 const UChar* d = r->data();
1715 const UChar* s = b->data();
1716 for (int i = 0; i != length; ++i) {
1717 if (d[i] != s[i])
1718 return false;
1719 }
1720 return true;
1721 }
1722
1723 CString UString::UTF8String(bool strict) const
1724 {
1725 // Allocate a buffer big enough to hold all the characters.
1726 const int length = size();
1727 Vector<char, 1024> buffer(length * 3);
1728
1729 // Convert to runs of 8-bit characters.
1730 char* p = buffer.data();
1731 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
1732 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1733 if (result != conversionOK)
1734 return CString();
1735
1736 return CString(buffer.data(), p - buffer.data());
1737 }
1738
1739 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1740 NEVER_INLINE void UString::makeNull()
1741 {
1742 m_rep = &Rep::null();
1743 }
1744
1745 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1746 NEVER_INLINE UString::Rep* UString::nullRep()
1747 {
1748 return &Rep::null();
1749 }
1750
1751 } // namespace JSC