]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/UString.cpp
JavaScriptCore-525.tar.gz
[apple/javascriptcore.git] / runtime / UString.cpp
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (c) 2009, Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24 #include "config.h"
25 #include "UString.h"
26
27 #include "JSGlobalObjectFunctions.h"
28 #include "Collector.h"
29 #include "dtoa.h"
30 #include "Identifier.h"
31 #include "Operations.h"
32 #include <ctype.h>
33 #include <float.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <wtf/ASCIICType.h>
39 #include <wtf/Assertions.h>
40 #include <wtf/MathExtras.h>
41 #include <wtf/Vector.h>
42 #include <wtf/unicode/UTF8.h>
43
44 #if HAVE(STRING_H)
45 #include <string.h>
46 #endif
47 #if HAVE(STRINGS_H)
48 #include <strings.h>
49 #endif
50
51 using namespace WTF;
52 using namespace WTF::Unicode;
53 using namespace std;
54
55 // This can be tuned differently per platform by putting platform #ifs right here.
56 // If you don't define this macro at all, then copyChars will just call directly
57 // to memcpy.
58 #define USTRING_COPY_CHARS_INLINE_CUTOFF 20
59
60 namespace JSC {
61
62 extern const double NaN;
63 extern const double Inf;
64
65 static inline size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
66 static inline size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
67
68 static inline UChar* allocChars(size_t length)
69 {
70 ASSERT(length);
71 if (length > maxUChars())
72 return 0;
73 return static_cast<UChar*>(tryFastMalloc(sizeof(UChar) * length));
74 }
75
76 static inline UChar* reallocChars(UChar* buffer, size_t length)
77 {
78 ASSERT(length);
79 if (length > maxUChars())
80 return 0;
81 return static_cast<UChar*>(tryFastRealloc(buffer, sizeof(UChar) * length));
82 }
83
84 static inline void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
85 {
86 #ifdef USTRING_COPY_CHARS_INLINE_CUTOFF
87 if (numCharacters <= USTRING_COPY_CHARS_INLINE_CUTOFF) {
88 for (unsigned i = 0; i < numCharacters; ++i)
89 destination[i] = source[i];
90 return;
91 }
92 #endif
93 memcpy(destination, source, numCharacters * sizeof(UChar));
94 }
95
96 COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes)
97
98 CString::CString(const char* c)
99 : m_length(strlen(c))
100 , m_data(new char[m_length + 1])
101 {
102 memcpy(m_data, c, m_length + 1);
103 }
104
105 CString::CString(const char* c, size_t length)
106 : m_length(length)
107 , m_data(new char[length + 1])
108 {
109 memcpy(m_data, c, m_length);
110 m_data[m_length] = 0;
111 }
112
113 CString::CString(const CString& b)
114 {
115 m_length = b.m_length;
116 if (b.m_data) {
117 m_data = new char[m_length + 1];
118 memcpy(m_data, b.m_data, m_length + 1);
119 } else
120 m_data = 0;
121 }
122
123 CString::~CString()
124 {
125 delete [] m_data;
126 }
127
128 CString CString::adopt(char* c, size_t length)
129 {
130 CString s;
131 s.m_data = c;
132 s.m_length = length;
133 return s;
134 }
135
136 CString& CString::append(const CString& t)
137 {
138 char* n;
139 n = new char[m_length + t.m_length + 1];
140 if (m_length)
141 memcpy(n, m_data, m_length);
142 if (t.m_length)
143 memcpy(n + m_length, t.m_data, t.m_length);
144 m_length += t.m_length;
145 n[m_length] = 0;
146
147 delete [] m_data;
148 m_data = n;
149
150 return *this;
151 }
152
153 CString& CString::operator=(const char* c)
154 {
155 if (m_data)
156 delete [] m_data;
157 m_length = strlen(c);
158 m_data = new char[m_length + 1];
159 memcpy(m_data, c, m_length + 1);
160
161 return *this;
162 }
163
164 CString& CString::operator=(const CString& str)
165 {
166 if (this == &str)
167 return *this;
168
169 if (m_data)
170 delete [] m_data;
171 m_length = str.m_length;
172 if (str.m_data) {
173 m_data = new char[m_length + 1];
174 memcpy(m_data, str.m_data, m_length + 1);
175 } else
176 m_data = 0;
177
178 return *this;
179 }
180
181 bool operator==(const CString& c1, const CString& c2)
182 {
183 size_t len = c1.size();
184 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
185 }
186
187 // These static strings are immutable, except for rc, whose initial value is chosen to
188 // reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
189 static UChar sharedEmptyChar;
190 UString::BaseString* UString::Rep::nullBaseString;
191 UString::BaseString* UString::Rep::emptyBaseString;
192 UString* UString::nullUString;
193
194 static void initializeStaticBaseString(int len, UChar* buf, UString::BaseString& base)
195 {
196 base.offset = 0;
197 base.len = len;
198 base.rc = INT_MAX / 2;
199 base._hash = 0;
200 base.m_identifierTableAndFlags.setFlag(UString::Rep::StaticFlag);
201 base.m_baseString = 0;
202 base.buf = buf;
203 base.preCapacity = 0;
204 base.usedPreCapacity = 0;
205 base.capacity = 0;
206 base.usedCapacity = 0;
207 base.reportedCost = 0;
208 base.checkConsistency();
209 }
210
211 void initializeUString()
212 {
213 UString::Rep::nullBaseString = new UString::BaseString;
214 initializeStaticBaseString(0, 0, *UString::Rep::nullBaseString);
215
216 UString::Rep::emptyBaseString = new UString::BaseString;
217 initializeStaticBaseString(0, &sharedEmptyChar, *UString::Rep::emptyBaseString);
218
219 UString::nullUString = new UString;
220 }
221
222 static char* statBuffer = 0; // Only used for debugging via UString::ascii().
223
224 PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar* d, int l)
225 {
226 UChar* copyD = static_cast<UChar*>(fastMalloc(l * sizeof(UChar)));
227 copyChars(copyD, d, l);
228 return create(copyD, l);
229 }
230
231 PassRefPtr<UString::Rep> UString::Rep::create(UChar* d, int l)
232 {
233 BaseString* r = new BaseString;
234 r->offset = 0;
235 r->len = l;
236 r->rc = 1;
237 r->_hash = 0;
238 r->m_baseString = 0;
239 r->reportedCost = 0;
240 r->buf = d;
241 r->usedCapacity = l;
242 r->capacity = l;
243 r->usedPreCapacity = 0;
244 r->preCapacity = 0;
245
246 r->checkConsistency();
247
248 // steal the single reference this Rep was created with
249 return adoptRef(r);
250 }
251
252 PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> rep, int offset, int length)
253 {
254 ASSERT(rep);
255 rep->checkConsistency();
256
257 int repOffset = rep->offset;
258
259 PassRefPtr<BaseString> base = rep->baseString();
260
261 ASSERT(-(offset + repOffset) <= base->usedPreCapacity);
262 ASSERT(offset + repOffset + length <= base->usedCapacity);
263
264 Rep* r = new Rep;
265 r->offset = repOffset + offset;
266 r->len = length;
267 r->rc = 1;
268 r->_hash = 0;
269 r->setBaseString(base);
270
271 r->checkConsistency();
272
273 // steal the single reference this Rep was created with
274 return adoptRef(r);
275 }
276
277 PassRefPtr<UString::Rep> UString::Rep::createFromUTF8(const char* string)
278 {
279 if (!string)
280 return &UString::Rep::null();
281
282 size_t length = strlen(string);
283 Vector<UChar, 1024> buffer(length);
284 UChar* p = buffer.data();
285 if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length))
286 return &UString::Rep::null();
287
288 return UString::Rep::createCopying(buffer.data(), p - buffer.data());
289 }
290
291 void UString::Rep::destroy()
292 {
293 checkConsistency();
294
295 // Static null and empty strings can never be destroyed, but we cannot rely on
296 // reference counting, because ref/deref are not thread-safe.
297 if (!isStatic()) {
298 if (identifierTable())
299 Identifier::remove(this);
300 UString::BaseString* base = baseString();
301 if (base == this)
302 fastFree(base->buf);
303 else
304 base->deref();
305
306 delete this;
307 }
308 }
309
310 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
311 // or anything like that.
312 const unsigned PHI = 0x9e3779b9U;
313
314 // Paul Hsieh's SuperFastHash
315 // http://www.azillionmonkeys.com/qed/hash.html
316 unsigned UString::Rep::computeHash(const UChar* s, int len)
317 {
318 unsigned l = len;
319 uint32_t hash = PHI;
320 uint32_t tmp;
321
322 int rem = l & 1;
323 l >>= 1;
324
325 // Main loop
326 for (; l > 0; l--) {
327 hash += s[0];
328 tmp = (s[1] << 11) ^ hash;
329 hash = (hash << 16) ^ tmp;
330 s += 2;
331 hash += hash >> 11;
332 }
333
334 // Handle end case
335 if (rem) {
336 hash += s[0];
337 hash ^= hash << 11;
338 hash += hash >> 17;
339 }
340
341 // Force "avalanching" of final 127 bits
342 hash ^= hash << 3;
343 hash += hash >> 5;
344 hash ^= hash << 2;
345 hash += hash >> 15;
346 hash ^= hash << 10;
347
348 // this avoids ever returning a hash code of 0, since that is used to
349 // signal "hash not computed yet", using a value that is likely to be
350 // effectively the same as 0 when the low bits are masked
351 if (hash == 0)
352 hash = 0x80000000;
353
354 return hash;
355 }
356
357 // Paul Hsieh's SuperFastHash
358 // http://www.azillionmonkeys.com/qed/hash.html
359 unsigned UString::Rep::computeHash(const char* s, int l)
360 {
361 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
362 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
363 // were 16-bit chunks, which should give matching results
364
365 uint32_t hash = PHI;
366 uint32_t tmp;
367
368 size_t rem = l & 1;
369 l >>= 1;
370
371 // Main loop
372 for (; l > 0; l--) {
373 hash += static_cast<unsigned char>(s[0]);
374 tmp = (static_cast<unsigned char>(s[1]) << 11) ^ hash;
375 hash = (hash << 16) ^ tmp;
376 s += 2;
377 hash += hash >> 11;
378 }
379
380 // Handle end case
381 if (rem) {
382 hash += static_cast<unsigned char>(s[0]);
383 hash ^= hash << 11;
384 hash += hash >> 17;
385 }
386
387 // Force "avalanching" of final 127 bits
388 hash ^= hash << 3;
389 hash += hash >> 5;
390 hash ^= hash << 2;
391 hash += hash >> 15;
392 hash ^= hash << 10;
393
394 // this avoids ever returning a hash code of 0, since that is used to
395 // signal "hash not computed yet", using a value that is likely to be
396 // effectively the same as 0 when the low bits are masked
397 if (hash == 0)
398 hash = 0x80000000;
399
400 return hash;
401 }
402
403 #ifndef NDEBUG
404 void UString::Rep::checkConsistency() const
405 {
406 const UString::BaseString* base = baseString();
407
408 // There is no recursion for base strings.
409 ASSERT(base == base->baseString());
410
411 if (isStatic()) {
412 // There are only two static strings: null and empty.
413 ASSERT(!len);
414
415 // Static strings cannot get in identifier tables, because they are globally shared.
416 ASSERT(!identifierTable());
417 }
418
419 // The string fits in buffer.
420 ASSERT(base->usedPreCapacity <= base->preCapacity);
421 ASSERT(base->usedCapacity <= base->capacity);
422 ASSERT(-offset <= base->usedPreCapacity);
423 ASSERT(offset + len <= base->usedCapacity);
424 }
425 #endif
426
427 // put these early so they can be inlined
428 static inline size_t expandedSize(size_t size, size_t otherSize)
429 {
430 // Do the size calculation in two parts, returning overflowIndicator if
431 // we overflow the maximum value that we can handle.
432
433 if (size > maxUChars())
434 return overflowIndicator();
435
436 size_t expandedSize = ((size + 10) / 10 * 11) + 1;
437 if (maxUChars() - expandedSize < otherSize)
438 return overflowIndicator();
439
440 return expandedSize + otherSize;
441 }
442
443 static inline bool expandCapacity(UString::Rep* rep, int requiredLength)
444 {
445 rep->checkConsistency();
446
447 UString::BaseString* base = rep->baseString();
448
449 if (requiredLength > base->capacity) {
450 size_t newCapacity = expandedSize(requiredLength, base->preCapacity);
451 UChar* oldBuf = base->buf;
452 base->buf = reallocChars(base->buf, newCapacity);
453 if (!base->buf) {
454 base->buf = oldBuf;
455 return false;
456 }
457 base->capacity = newCapacity - base->preCapacity;
458 }
459 if (requiredLength > base->usedCapacity)
460 base->usedCapacity = requiredLength;
461
462 rep->checkConsistency();
463 return true;
464 }
465
466 void UString::expandCapacity(int requiredLength)
467 {
468 if (!JSC::expandCapacity(m_rep.get(), requiredLength))
469 makeNull();
470 }
471
472 void UString::expandPreCapacity(int requiredPreCap)
473 {
474 m_rep->checkConsistency();
475
476 BaseString* base = m_rep->baseString();
477
478 if (requiredPreCap > base->preCapacity) {
479 size_t newCapacity = expandedSize(requiredPreCap, base->capacity);
480 int delta = newCapacity - base->capacity - base->preCapacity;
481
482 UChar* newBuf = allocChars(newCapacity);
483 if (!newBuf) {
484 makeNull();
485 return;
486 }
487 copyChars(newBuf + delta, base->buf, base->capacity + base->preCapacity);
488 fastFree(base->buf);
489 base->buf = newBuf;
490
491 base->preCapacity = newCapacity - base->capacity;
492 }
493 if (requiredPreCap > base->usedPreCapacity)
494 base->usedPreCapacity = requiredPreCap;
495
496 m_rep->checkConsistency();
497 }
498
499 static PassRefPtr<UString::Rep> createRep(const char* c)
500 {
501 if (!c)
502 return &UString::Rep::null();
503
504 if (!c[0])
505 return &UString::Rep::empty();
506
507 size_t length = strlen(c);
508 UChar* d = allocChars(length);
509 if (!d)
510 return &UString::Rep::null();
511 else {
512 for (size_t i = 0; i < length; i++)
513 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
514 return UString::Rep::create(d, static_cast<int>(length));
515 }
516
517 }
518
519 UString::UString(const char* c)
520 : m_rep(createRep(c))
521 {
522 }
523
524 UString::UString(const UChar* c, int length)
525 {
526 if (length == 0)
527 m_rep = &Rep::empty();
528 else
529 m_rep = Rep::createCopying(c, length);
530 }
531
532 UString::UString(UChar* c, int length, bool copy)
533 {
534 if (length == 0)
535 m_rep = &Rep::empty();
536 else if (copy)
537 m_rep = Rep::createCopying(c, length);
538 else
539 m_rep = Rep::create(c, length);
540 }
541
542 UString::UString(const Vector<UChar>& buffer)
543 {
544 if (!buffer.size())
545 m_rep = &Rep::empty();
546 else
547 m_rep = Rep::createCopying(buffer.data(), buffer.size());
548 }
549
550 static ALWAYS_INLINE int newCapacityWithOverflowCheck(const int currentCapacity, const int extendLength, const bool plusOne = false)
551 {
552 ASSERT_WITH_MESSAGE(extendLength >= 0, "extendedLength = %d", extendLength);
553
554 const int plusLength = plusOne ? 1 : 0;
555 if (currentCapacity > std::numeric_limits<int>::max() - extendLength - plusLength)
556 CRASH();
557
558 return currentCapacity + extendLength + plusLength;
559 }
560
561 static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const UChar* tData, int tSize)
562 {
563 RefPtr<UString::Rep> rep = r;
564
565 rep->checkConsistency();
566
567 int thisSize = rep->size();
568 int thisOffset = rep->offset;
569 int length = thisSize + tSize;
570 UString::BaseString* base = rep->baseString();
571
572 // possible cases:
573 if (tSize == 0) {
574 // t is empty
575 } else if (thisSize == 0) {
576 // this is empty
577 rep = UString::Rep::createCopying(tData, tSize);
578 } else if (rep == base && rep->rc == 1) {
579 // this is direct and has refcount of 1 (so we can just alter it directly)
580 if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
581 rep = &UString::Rep::null();
582 if (rep->data()) {
583 copyChars(rep->data() + thisSize, tData, tSize);
584 rep->len = length;
585 rep->_hash = 0;
586 }
587 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) {
588 // this reaches the end of the buffer - extend it if it's long enough to append to
589 if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
590 rep = &UString::Rep::null();
591 if (rep->data()) {
592 copyChars(rep->data() + thisSize, tData, tSize);
593 rep = UString::Rep::create(rep, 0, length);
594 }
595 } else {
596 // this is shared with someone using more capacity, gotta make a whole new string
597 size_t newCapacity = expandedSize(length, 0);
598 UChar* d = allocChars(newCapacity);
599 if (!d)
600 rep = &UString::Rep::null();
601 else {
602 copyChars(d, rep->data(), thisSize);
603 copyChars(d + thisSize, tData, tSize);
604 rep = UString::Rep::create(d, length);
605 rep->baseString()->capacity = newCapacity;
606 }
607 }
608
609 rep->checkConsistency();
610
611 return rep.release();
612 }
613
614 static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const char* t)
615 {
616 RefPtr<UString::Rep> rep = r;
617
618 rep->checkConsistency();
619
620 int thisSize = rep->size();
621 int thisOffset = rep->offset;
622 int tSize = static_cast<int>(strlen(t));
623 int length = thisSize + tSize;
624 UString::BaseString* base = rep->baseString();
625
626 // possible cases:
627 if (thisSize == 0) {
628 // this is empty
629 rep = createRep(t);
630 } else if (tSize == 0) {
631 // t is empty, we'll just return *this below.
632 } else if (rep == base && rep->rc == 1) {
633 // this is direct and has refcount of 1 (so we can just alter it directly)
634 expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
635 UChar* d = rep->data();
636 if (d) {
637 for (int i = 0; i < tSize; ++i)
638 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
639 rep->len = length;
640 rep->_hash = 0;
641 }
642 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) {
643 // this string reaches the end of the buffer - extend it
644 expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
645 UChar* d = rep->data();
646 if (d) {
647 for (int i = 0; i < tSize; ++i)
648 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
649 rep = UString::Rep::create(rep, 0, length);
650 }
651 } else {
652 // this is shared with someone using more capacity, gotta make a whole new string
653 size_t newCapacity = expandedSize(length, 0);
654 UChar* d = allocChars(newCapacity);
655 if (!d)
656 rep = &UString::Rep::null();
657 else {
658 copyChars(d, rep->data(), thisSize);
659 for (int i = 0; i < tSize; ++i)
660 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
661 rep = UString::Rep::create(d, length);
662 rep->baseString()->capacity = newCapacity;
663 }
664 }
665
666 rep->checkConsistency();
667
668 return rep.release();
669 }
670
671 PassRefPtr<UString::Rep> concatenate(UString::Rep* a, UString::Rep* b)
672 {
673 a->checkConsistency();
674 b->checkConsistency();
675
676 int aSize = a->size();
677 int aOffset = a->offset;
678 int bSize = b->size();
679 int bOffset = b->offset;
680 int length = aSize + bSize;
681
682 // possible cases:
683
684 // a is empty
685 if (aSize == 0)
686 return b;
687 // b is empty
688 if (bSize == 0)
689 return a;
690
691 UString::BaseString* aBase = a->baseString();
692 if (bSize == 1 && aOffset + aSize == aBase->usedCapacity && aOffset + length <= aBase->capacity) {
693 // b is a single character (common fast case)
694 aBase->usedCapacity = aOffset + length;
695 a->data()[aSize] = b->data()[0];
696 return UString::Rep::create(a, 0, length);
697 }
698
699 UString::BaseString* bBase = b->baseString();
700 if (aOffset + aSize == aBase->usedCapacity && aSize >= minShareSize && 4 * aSize >= bSize
701 && (-bOffset != bBase->usedPreCapacity || aSize >= bSize)) {
702 // - a reaches the end of its buffer so it qualifies for shared append
703 // - also, it's at least a quarter the length of b - appending to a much shorter
704 // string does more harm than good
705 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
706
707 UString x(a);
708 x.expandCapacity(newCapacityWithOverflowCheck(aOffset, length));
709 if (!a->data() || !x.data())
710 return 0;
711 copyChars(a->data() + aSize, b->data(), bSize);
712 PassRefPtr<UString::Rep> result = UString::Rep::create(a, 0, length);
713
714 a->checkConsistency();
715 b->checkConsistency();
716 result->checkConsistency();
717
718 return result;
719 }
720
721 if (-bOffset == bBase->usedPreCapacity && bSize >= minShareSize && 4 * bSize >= aSize) {
722 // - b reaches the beginning of its buffer so it qualifies for shared prepend
723 // - also, it's at least a quarter the length of a - prepending to a much shorter
724 // string does more harm than good
725 UString y(b);
726 y.expandPreCapacity(-bOffset + aSize);
727 if (!b->data() || !y.data())
728 return 0;
729 copyChars(b->data() - aSize, a->data(), aSize);
730 PassRefPtr<UString::Rep> result = UString::Rep::create(b, -aSize, length);
731
732 a->checkConsistency();
733 b->checkConsistency();
734 result->checkConsistency();
735
736 return result;
737 }
738
739 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
740 size_t newCapacity = expandedSize(length, 0);
741 UChar* d = allocChars(newCapacity);
742 if (!d)
743 return 0;
744 copyChars(d, a->data(), aSize);
745 copyChars(d + aSize, b->data(), bSize);
746 PassRefPtr<UString::Rep> result = UString::Rep::create(d, length);
747 result->baseString()->capacity = newCapacity;
748
749 a->checkConsistency();
750 b->checkConsistency();
751 result->checkConsistency();
752
753 return result;
754 }
755
756 PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, int i)
757 {
758 UChar buf[1 + sizeof(i) * 3];
759 UChar* end = buf + sizeof(buf) / sizeof(UChar);
760 UChar* p = end;
761
762 if (i == 0)
763 *--p = '0';
764 else if (i == INT_MIN) {
765 char minBuf[1 + sizeof(i) * 3];
766 sprintf(minBuf, "%d", INT_MIN);
767 return concatenate(rep, minBuf);
768 } else {
769 bool negative = false;
770 if (i < 0) {
771 negative = true;
772 i = -i;
773 }
774 while (i) {
775 *--p = static_cast<unsigned short>((i % 10) + '0');
776 i /= 10;
777 }
778 if (negative)
779 *--p = '-';
780 }
781
782 return concatenate(rep, p, static_cast<int>(end - p));
783
784 }
785
786 PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, double d)
787 {
788 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
789 if (isnan(d))
790 return concatenate(rep, "NaN");
791
792 if (d == 0.0) // stringify -0 as 0
793 d = 0.0;
794
795 char buf[80];
796 int decimalPoint;
797 int sign;
798
799 char* result = WTF::dtoa(d, 0, &decimalPoint, &sign, NULL);
800 int length = static_cast<int>(strlen(result));
801
802 int i = 0;
803 if (sign)
804 buf[i++] = '-';
805
806 if (decimalPoint <= 0 && decimalPoint > -6) {
807 buf[i++] = '0';
808 buf[i++] = '.';
809 for (int j = decimalPoint; j < 0; j++)
810 buf[i++] = '0';
811 strcpy(buf + i, result);
812 } else if (decimalPoint <= 21 && decimalPoint > 0) {
813 if (length <= decimalPoint) {
814 strcpy(buf + i, result);
815 i += length;
816 for (int j = 0; j < decimalPoint - length; j++)
817 buf[i++] = '0';
818 buf[i] = '\0';
819 } else {
820 strncpy(buf + i, result, decimalPoint);
821 i += decimalPoint;
822 buf[i++] = '.';
823 strcpy(buf + i, result + decimalPoint);
824 }
825 } else if (result[0] < '0' || result[0] > '9')
826 strcpy(buf + i, result);
827 else {
828 buf[i++] = result[0];
829 if (length > 1) {
830 buf[i++] = '.';
831 strcpy(buf + i, result + 1);
832 i += length - 1;
833 }
834
835 buf[i++] = 'e';
836 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
837 // decimalPoint can't be more than 3 digits decimal given the
838 // nature of float representation
839 int exponential = decimalPoint - 1;
840 if (exponential < 0)
841 exponential = -exponential;
842 if (exponential >= 100)
843 buf[i++] = static_cast<char>('0' + exponential / 100);
844 if (exponential >= 10)
845 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
846 buf[i++] = static_cast<char>('0' + exponential % 10);
847 buf[i++] = '\0';
848 }
849
850 WTF::freedtoa(result);
851
852 return concatenate(rep, buf);
853 }
854
855 UString UString::from(int i)
856 {
857 UChar buf[1 + sizeof(i) * 3];
858 UChar* end = buf + sizeof(buf) / sizeof(UChar);
859 UChar* p = end;
860
861 if (i == 0)
862 *--p = '0';
863 else if (i == INT_MIN) {
864 char minBuf[1 + sizeof(i) * 3];
865 snprintf(minBuf, 1 + sizeof(i) * 3, "%d", INT_MIN);
866 return UString(minBuf);
867 } else {
868 bool negative = false;
869 if (i < 0) {
870 negative = true;
871 i = -i;
872 }
873 while (i) {
874 *--p = static_cast<unsigned short>((i % 10) + '0');
875 i /= 10;
876 }
877 if (negative)
878 *--p = '-';
879 }
880
881 return UString(p, static_cast<int>(end - p));
882 }
883
884 UString UString::from(unsigned int u)
885 {
886 UChar buf[sizeof(u) * 3];
887 UChar* end = buf + sizeof(buf) / sizeof(UChar);
888 UChar* p = end;
889
890 if (u == 0)
891 *--p = '0';
892 else {
893 while (u) {
894 *--p = static_cast<unsigned short>((u % 10) + '0');
895 u /= 10;
896 }
897 }
898
899 return UString(p, static_cast<int>(end - p));
900 }
901
902 UString UString::from(long l)
903 {
904 UChar buf[1 + sizeof(l) * 3];
905 UChar* end = buf + sizeof(buf) / sizeof(UChar);
906 UChar* p = end;
907
908 if (l == 0)
909 *--p = '0';
910 else if (l == LONG_MIN) {
911 char minBuf[1 + sizeof(l) * 3];
912 snprintf(minBuf, 1 + sizeof(l) * 3, "%ld", LONG_MIN);
913 return UString(minBuf);
914 } else {
915 bool negative = false;
916 if (l < 0) {
917 negative = true;
918 l = -l;
919 }
920 while (l) {
921 *--p = static_cast<unsigned short>((l % 10) + '0');
922 l /= 10;
923 }
924 if (negative)
925 *--p = '-';
926 }
927
928 return UString(p, static_cast<int>(end - p));
929 }
930
931 UString UString::from(double d)
932 {
933 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
934 if (isnan(d))
935 return "NaN";
936
937 char buf[80];
938 int decimalPoint;
939 int sign;
940
941 char* result = WTF::dtoa(d, 0, &decimalPoint, &sign, NULL);
942 int length = static_cast<int>(strlen(result));
943
944 int i = 0;
945 if (sign)
946 buf[i++] = '-';
947
948 if (decimalPoint <= 0 && decimalPoint > -6) {
949 buf[i++] = '0';
950 buf[i++] = '.';
951 for (int j = decimalPoint; j < 0; j++)
952 buf[i++] = '0';
953 strlcpy(buf + i, result, sizeof(buf) - i);
954 } else if (decimalPoint <= 21 && decimalPoint > 0) {
955 if (length <= decimalPoint) {
956 strlcpy(buf + i, result, sizeof(buf) - i);
957 i += length;
958 for (int j = 0; j < decimalPoint - length; j++)
959 buf[i++] = '0';
960 buf[i] = '\0';
961 } else {
962 int len = (decimalPoint <= static_cast<int>(sizeof(buf)) - i ? decimalPoint : sizeof(buf) - i);
963 strncpy(buf + i, result, len);
964 i += len;
965 buf[i++] = '.';
966 strlcpy(buf + i, result + decimalPoint, sizeof(buf) - i);
967 }
968 } else if (result[0] < '0' || result[0] > '9')
969 strlcpy(buf + i, result, sizeof(buf) - i);
970 else {
971 buf[i++] = result[0];
972 if (length > 1) {
973 buf[i++] = '.';
974 strlcpy(buf + i, result + 1, sizeof(buf) - i);
975 i += length - 1;
976 }
977
978 buf[i++] = 'e';
979 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
980 // decimalPoint can't be more than 3 digits decimal given the
981 // nature of float representation
982 int exponential = decimalPoint - 1;
983 if (exponential < 0)
984 exponential = -exponential;
985 if (exponential >= 100)
986 buf[i++] = static_cast<char>('0' + exponential / 100);
987 if (exponential >= 10)
988 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
989 buf[i++] = static_cast<char>('0' + exponential % 10);
990 buf[i++] = '\0';
991 ASSERT(i <= static_cast<int>(sizeof(buf)));
992 }
993
994 WTF::freedtoa(result);
995
996 return UString(buf);
997 }
998
999 UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
1000 {
1001 m_rep->checkConsistency();
1002
1003 if (rangeCount == 1 && separatorCount == 0) {
1004 int thisSize = size();
1005 int position = substringRanges[0].position;
1006 int length = substringRanges[0].length;
1007 if (position <= 0 && length >= thisSize)
1008 return *this;
1009 return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
1010 }
1011
1012 int totalLength = 0;
1013 for (int i = 0; i < rangeCount; i++)
1014 totalLength += substringRanges[i].length;
1015 for (int i = 0; i < separatorCount; i++)
1016 totalLength += separators[i].size();
1017
1018 if (totalLength == 0)
1019 return "";
1020
1021 UChar* buffer = allocChars(totalLength);
1022 if (!buffer)
1023 return null();
1024
1025 int maxCount = max(rangeCount, separatorCount);
1026 int bufferPos = 0;
1027 for (int i = 0; i < maxCount; i++) {
1028 if (i < rangeCount) {
1029 copyChars(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length);
1030 bufferPos += substringRanges[i].length;
1031 }
1032 if (i < separatorCount) {
1033 copyChars(buffer + bufferPos, separators[i].data(), separators[i].size());
1034 bufferPos += separators[i].size();
1035 }
1036 }
1037
1038 return UString::Rep::create(buffer, totalLength);
1039 }
1040
1041 UString& UString::append(const UString &t)
1042 {
1043 m_rep->checkConsistency();
1044 t.rep()->checkConsistency();
1045
1046 int thisSize = size();
1047 int thisOffset = m_rep->offset;
1048 int tSize = t.size();
1049 int length = thisSize + tSize;
1050 BaseString* base = m_rep->baseString();
1051
1052 // possible cases:
1053 if (thisSize == 0) {
1054 // this is empty
1055 *this = t;
1056 } else if (tSize == 0) {
1057 // t is empty
1058 } else if (m_rep == base && m_rep->rc == 1) {
1059 // this is direct and has refcount of 1 (so we can just alter it directly)
1060 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1061 if (data()) {
1062 copyChars(m_rep->data() + thisSize, t.data(), tSize);
1063 m_rep->len = length;
1064 m_rep->_hash = 0;
1065 }
1066 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) {
1067 // this reaches the end of the buffer - extend it if it's long enough to append to
1068 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1069 if (data()) {
1070 copyChars(m_rep->data() + thisSize, t.data(), tSize);
1071 m_rep = Rep::create(m_rep, 0, length);
1072 }
1073 } else {
1074 // this is shared with someone using more capacity, gotta make a whole new string
1075 size_t newCapacity = expandedSize(length, 0);
1076 UChar* d = allocChars(newCapacity);
1077 if (!d)
1078 makeNull();
1079 else {
1080 copyChars(d, data(), thisSize);
1081 copyChars(d + thisSize, t.data(), tSize);
1082 m_rep = Rep::create(d, length);
1083 m_rep->baseString()->capacity = newCapacity;
1084 }
1085 }
1086
1087 m_rep->checkConsistency();
1088 t.rep()->checkConsistency();
1089
1090 return *this;
1091 }
1092
1093 UString& UString::append(const UChar* tData, int tSize)
1094 {
1095 m_rep = concatenate(m_rep.release(), tData, tSize);
1096 return *this;
1097 }
1098
1099 UString& UString::append(const char* t)
1100 {
1101 m_rep = concatenate(m_rep.release(), t);
1102 return *this;
1103 }
1104
1105 UString& UString::append(UChar c)
1106 {
1107 m_rep->checkConsistency();
1108
1109 int thisOffset = m_rep->offset;
1110 int length = size();
1111 BaseString* base = m_rep->baseString();
1112
1113 // possible cases:
1114 if (length == 0) {
1115 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
1116 size_t newCapacity = expandedSize(1, 0);
1117 UChar* d = allocChars(newCapacity);
1118 if (!d)
1119 makeNull();
1120 else {
1121 d[0] = c;
1122 m_rep = Rep::create(d, 1);
1123 m_rep->baseString()->capacity = newCapacity;
1124 }
1125 } else if (m_rep == base && m_rep->rc == 1) {
1126 // this is direct and has refcount of 1 (so we can just alter it directly)
1127 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1128 UChar* d = m_rep->data();
1129 if (d) {
1130 d[length] = c;
1131 m_rep->len = length + 1;
1132 m_rep->_hash = 0;
1133 }
1134 } else if (thisOffset + length == base->usedCapacity && length >= minShareSize) {
1135 // this reaches the end of the string - extend it and share
1136 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1137 UChar* d = m_rep->data();
1138 if (d) {
1139 d[length] = c;
1140 m_rep = Rep::create(m_rep, 0, length + 1);
1141 }
1142 } else {
1143 // this is shared with someone using more capacity, gotta make a whole new string
1144 size_t newCapacity = expandedSize(length + 1, 0);
1145 UChar* d = allocChars(newCapacity);
1146 if (!d)
1147 makeNull();
1148 else {
1149 copyChars(d, data(), length);
1150 d[length] = c;
1151 m_rep = Rep::create(d, length + 1);
1152 m_rep->baseString()->capacity = newCapacity;
1153 }
1154 }
1155
1156 m_rep->checkConsistency();
1157
1158 return *this;
1159 }
1160
1161 bool UString::getCString(CStringBuffer& buffer) const
1162 {
1163 int length = size();
1164 int neededSize = length + 1;
1165 buffer.resize(neededSize);
1166 char* buf = buffer.data();
1167
1168 UChar ored = 0;
1169 const UChar* p = data();
1170 char* q = buf;
1171 const UChar* limit = p + length;
1172 while (p != limit) {
1173 UChar c = p[0];
1174 ored |= c;
1175 *q = static_cast<char>(c);
1176 ++p;
1177 ++q;
1178 }
1179 *q = '\0';
1180
1181 return !(ored & 0xFF00);
1182 }
1183
1184 char* UString::ascii() const
1185 {
1186 int length = size();
1187 int neededSize = length + 1;
1188 delete[] statBuffer;
1189 statBuffer = new char[neededSize];
1190
1191 const UChar* p = data();
1192 char* q = statBuffer;
1193 const UChar* limit = p + length;
1194 while (p != limit) {
1195 *q = static_cast<char>(p[0]);
1196 ++p;
1197 ++q;
1198 }
1199 *q = '\0';
1200
1201 return statBuffer;
1202 }
1203
1204 UString& UString::operator=(const char* c)
1205 {
1206 if (!c) {
1207 m_rep = &Rep::null();
1208 return *this;
1209 }
1210
1211 if (!c[0]) {
1212 m_rep = &Rep::empty();
1213 return *this;
1214 }
1215
1216 int l = static_cast<int>(strlen(c));
1217 UChar* d;
1218 BaseString* base = m_rep->baseString();
1219 if (m_rep->rc == 1 && l <= base->capacity && m_rep == base && m_rep->offset == 0 && base->preCapacity == 0) {
1220 d = base->buf;
1221 m_rep->_hash = 0;
1222 m_rep->len = l;
1223 } else {
1224 d = allocChars(l);
1225 if (!d) {
1226 makeNull();
1227 return *this;
1228 }
1229 m_rep = Rep::create(d, l);
1230 }
1231 for (int i = 0; i < l; i++)
1232 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
1233
1234 return *this;
1235 }
1236
1237 bool UString::is8Bit() const
1238 {
1239 const UChar* u = data();
1240 const UChar* limit = u + size();
1241 while (u < limit) {
1242 if (u[0] > 0xFF)
1243 return false;
1244 ++u;
1245 }
1246
1247 return true;
1248 }
1249
1250 UChar UString::operator[](int pos) const
1251 {
1252 if (pos >= size())
1253 return '\0';
1254 return data()[pos];
1255 }
1256
1257 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
1258 {
1259 if (size() == 1) {
1260 UChar c = data()[0];
1261 if (isASCIIDigit(c))
1262 return c - '0';
1263 if (isASCIISpace(c) && tolerateEmptyString)
1264 return 0;
1265 return NaN;
1266 }
1267
1268 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
1269 // after the number, so this is too strict a check.
1270 CStringBuffer s;
1271 if (!getCString(s))
1272 return NaN;
1273 const char* c = s.data();
1274
1275 // skip leading white space
1276 while (isASCIISpace(*c))
1277 c++;
1278
1279 // empty string ?
1280 if (*c == '\0')
1281 return tolerateEmptyString ? 0.0 : NaN;
1282
1283 double d;
1284
1285 // hex number ?
1286 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
1287 const char* firstDigitPosition = c + 2;
1288 c++;
1289 d = 0.0;
1290 while (*(++c)) {
1291 if (*c >= '0' && *c <= '9')
1292 d = d * 16.0 + *c - '0';
1293 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
1294 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
1295 else
1296 break;
1297 }
1298
1299 if (d >= mantissaOverflowLowerBound)
1300 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1301 } else {
1302 // regular number ?
1303 char* end;
1304 d = WTF::strtod(c, &end);
1305 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
1306 c = end;
1307 } else {
1308 double sign = 1.0;
1309
1310 if (*c == '+')
1311 c++;
1312 else if (*c == '-') {
1313 sign = -1.0;
1314 c++;
1315 }
1316
1317 // We used strtod() to do the conversion. However, strtod() handles
1318 // infinite values slightly differently than JavaScript in that it
1319 // converts the string "inf" with any capitalization to infinity,
1320 // whereas the ECMA spec requires that it be converted to NaN.
1321
1322 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1323 d = sign * Inf;
1324 c += 8;
1325 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
1326 c = end;
1327 else
1328 return NaN;
1329 }
1330 }
1331
1332 // allow trailing white space
1333 while (isASCIISpace(*c))
1334 c++;
1335 // don't allow anything after - unless tolerant=true
1336 if (!tolerateTrailingJunk && *c != '\0')
1337 d = NaN;
1338
1339 return d;
1340 }
1341
1342 double UString::toDouble(bool tolerateTrailingJunk) const
1343 {
1344 return toDouble(tolerateTrailingJunk, true);
1345 }
1346
1347 double UString::toDouble() const
1348 {
1349 return toDouble(false, true);
1350 }
1351
1352 uint32_t UString::toUInt32(bool* ok) const
1353 {
1354 double d = toDouble();
1355 bool b = true;
1356
1357 if (d != static_cast<uint32_t>(d)) {
1358 b = false;
1359 d = 0;
1360 }
1361
1362 if (ok)
1363 *ok = b;
1364
1365 return static_cast<uint32_t>(d);
1366 }
1367
1368 uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
1369 {
1370 double d = toDouble(false, tolerateEmptyString);
1371 bool b = true;
1372
1373 if (d != static_cast<uint32_t>(d)) {
1374 b = false;
1375 d = 0;
1376 }
1377
1378 if (ok)
1379 *ok = b;
1380
1381 return static_cast<uint32_t>(d);
1382 }
1383
1384 uint32_t UString::toStrictUInt32(bool* ok) const
1385 {
1386 if (ok)
1387 *ok = false;
1388
1389 // Empty string is not OK.
1390 int len = m_rep->len;
1391 if (len == 0)
1392 return 0;
1393 const UChar* p = m_rep->data();
1394 unsigned short c = p[0];
1395
1396 // If the first digit is 0, only 0 itself is OK.
1397 if (c == '0') {
1398 if (len == 1 && ok)
1399 *ok = true;
1400 return 0;
1401 }
1402
1403 // Convert to UInt32, checking for overflow.
1404 uint32_t i = 0;
1405 while (1) {
1406 // Process character, turning it into a digit.
1407 if (c < '0' || c > '9')
1408 return 0;
1409 const unsigned d = c - '0';
1410
1411 // Multiply by 10, checking for overflow out of 32 bits.
1412 if (i > 0xFFFFFFFFU / 10)
1413 return 0;
1414 i *= 10;
1415
1416 // Add in the digit, checking for overflow out of 32 bits.
1417 const unsigned max = 0xFFFFFFFFU - d;
1418 if (i > max)
1419 return 0;
1420 i += d;
1421
1422 // Handle end of string.
1423 if (--len == 0) {
1424 if (ok)
1425 *ok = true;
1426 return i;
1427 }
1428
1429 // Get next character.
1430 c = *(++p);
1431 }
1432 }
1433
1434 int UString::find(const UString& f, int pos) const
1435 {
1436 int fsz = f.size();
1437
1438 if (pos < 0)
1439 pos = 0;
1440
1441 if (fsz == 1) {
1442 UChar ch = f[0];
1443 const UChar* end = data() + size();
1444 for (const UChar* c = data() + pos; c < end; c++) {
1445 if (*c == ch)
1446 return static_cast<int>(c - data());
1447 }
1448 return -1;
1449 }
1450
1451 int sz = size();
1452 if (sz < fsz)
1453 return -1;
1454 if (fsz == 0)
1455 return pos;
1456 const UChar* end = data() + sz - fsz;
1457 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1458 const UChar* fdata = f.data();
1459 unsigned short fchar = fdata[0];
1460 ++fdata;
1461 for (const UChar* c = data() + pos; c <= end; c++) {
1462 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1463 return static_cast<int>(c - data());
1464 }
1465
1466 return -1;
1467 }
1468
1469 int UString::find(UChar ch, int pos) const
1470 {
1471 if (pos < 0)
1472 pos = 0;
1473 const UChar* end = data() + size();
1474 for (const UChar* c = data() + pos; c < end; c++) {
1475 if (*c == ch)
1476 return static_cast<int>(c - data());
1477 }
1478
1479 return -1;
1480 }
1481
1482 int UString::rfind(const UString& f, int pos) const
1483 {
1484 int sz = size();
1485 int fsz = f.size();
1486 if (sz < fsz)
1487 return -1;
1488 if (pos < 0)
1489 pos = 0;
1490 if (pos > sz - fsz)
1491 pos = sz - fsz;
1492 if (fsz == 0)
1493 return pos;
1494 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1495 const UChar* fdata = f.data();
1496 for (const UChar* c = data() + pos; c >= data(); c--) {
1497 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1498 return static_cast<int>(c - data());
1499 }
1500
1501 return -1;
1502 }
1503
1504 int UString::rfind(UChar ch, int pos) const
1505 {
1506 if (isEmpty())
1507 return -1;
1508 if (pos + 1 >= size())
1509 pos = size() - 1;
1510 for (const UChar* c = data() + pos; c >= data(); c--) {
1511 if (*c == ch)
1512 return static_cast<int>(c - data());
1513 }
1514
1515 return -1;
1516 }
1517
1518 UString UString::substr(int pos, int len) const
1519 {
1520 int s = size();
1521
1522 if (pos < 0)
1523 pos = 0;
1524 else if (pos >= s)
1525 pos = s;
1526 if (len < 0)
1527 len = s;
1528 if (pos + len >= s)
1529 len = s - pos;
1530
1531 if (pos == 0 && len == s)
1532 return *this;
1533
1534 return UString(Rep::create(m_rep, pos, len));
1535 }
1536
1537 bool operator==(const UString& s1, const UString& s2)
1538 {
1539 int size = s1.size();
1540 switch (size) {
1541 case 0:
1542 return !s2.size();
1543 case 1:
1544 return s2.size() == 1 && s1.data()[0] == s2.data()[0];
1545 default:
1546 return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0;
1547 }
1548 }
1549
1550 bool operator==(const UString& s1, const char *s2)
1551 {
1552 if (s2 == 0)
1553 return s1.isEmpty();
1554
1555 const UChar* u = s1.data();
1556 const UChar* uend = u + s1.size();
1557 while (u != uend && *s2) {
1558 if (u[0] != (unsigned char)*s2)
1559 return false;
1560 s2++;
1561 u++;
1562 }
1563
1564 return u == uend && *s2 == 0;
1565 }
1566
1567 bool operator<(const UString& s1, const UString& s2)
1568 {
1569 const int l1 = s1.size();
1570 const int l2 = s2.size();
1571 const int lmin = l1 < l2 ? l1 : l2;
1572 const UChar* c1 = s1.data();
1573 const UChar* c2 = s2.data();
1574 int l = 0;
1575 while (l < lmin && *c1 == *c2) {
1576 c1++;
1577 c2++;
1578 l++;
1579 }
1580 if (l < lmin)
1581 return (c1[0] < c2[0]);
1582
1583 return (l1 < l2);
1584 }
1585
1586 bool operator>(const UString& s1, const UString& s2)
1587 {
1588 const int l1 = s1.size();
1589 const int l2 = s2.size();
1590 const int lmin = l1 < l2 ? l1 : l2;
1591 const UChar* c1 = s1.data();
1592 const UChar* c2 = s2.data();
1593 int l = 0;
1594 while (l < lmin && *c1 == *c2) {
1595 c1++;
1596 c2++;
1597 l++;
1598 }
1599 if (l < lmin)
1600 return (c1[0] > c2[0]);
1601
1602 return (l1 > l2);
1603 }
1604
1605 int compare(const UString& s1, const UString& s2)
1606 {
1607 const int l1 = s1.size();
1608 const int l2 = s2.size();
1609 const int lmin = l1 < l2 ? l1 : l2;
1610 const UChar* c1 = s1.data();
1611 const UChar* c2 = s2.data();
1612 int l = 0;
1613 while (l < lmin && *c1 == *c2) {
1614 c1++;
1615 c2++;
1616 l++;
1617 }
1618
1619 if (l < lmin)
1620 return (c1[0] > c2[0]) ? 1 : -1;
1621
1622 if (l1 == l2)
1623 return 0;
1624
1625 return (l1 > l2) ? 1 : -1;
1626 }
1627
1628 bool equal(const UString::Rep* r, const UString::Rep* b)
1629 {
1630 int length = r->len;
1631 if (length != b->len)
1632 return false;
1633 const UChar* d = r->data();
1634 const UChar* s = b->data();
1635 for (int i = 0; i != length; ++i) {
1636 if (d[i] != s[i])
1637 return false;
1638 }
1639 return true;
1640 }
1641
1642 CString UString::UTF8String(bool strict) const
1643 {
1644 // Allocate a buffer big enough to hold all the characters.
1645 const int length = size();
1646 Vector<char, 1024> buffer(length * 3);
1647
1648 // Convert to runs of 8-bit characters.
1649 char* p = buffer.data();
1650 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
1651 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1652 if (result != conversionOK)
1653 return CString();
1654
1655 return CString(buffer.data(), p - buffer.data());
1656 }
1657
1658 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1659 NEVER_INLINE void UString::makeNull()
1660 {
1661 m_rep = &Rep::null();
1662 }
1663
1664 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1665 NEVER_INLINE UString::Rep* UString::nullRep()
1666 {
1667 return &Rep::null();
1668 }
1669
1670 } // namespace JSC