runtime/UString.cpp

   1 /*
   2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
   3  *  Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
   4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
   5  *  Copyright (c) 2009, Google Inc. All rights reserved.
   6  *
   7  *  This library is free software; you can redistribute it and/or
   8  *  modify it under the terms of the GNU Library General Public
   9  *  License as published by the Free Software Foundation; either
  10  *  version 2 of the License, or (at your option) any later version.
  11  *
  12  *  This library is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  *  Library General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU Library General Public License
  18  *  along with this library; see the file COPYING.LIB.  If not, write to
  19  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20  *  Boston, MA 02110-1301, USA.
  21  *
  22  */
  23
  24 #include "config.h"
  25 #include "UString.h"
  26
  27 #include "JSGlobalObjectFunctions.h"
  28 #include "Collector.h"
  29 #include "dtoa.h"
  30 #include "Identifier.h"
  31 #include "Operations.h"
  32 #include <ctype.h>
  33 #include <float.h>
  34 #include <limits.h>
  35 #include <math.h>
  36 #include <stdio.h>
  37 #include <stdlib.h>
  38 #include <wtf/ASCIICType.h>
  39 #include <wtf/Assertions.h>
  40 #include <wtf/MathExtras.h>
  41 #include <wtf/Vector.h>
  42 #include <wtf/unicode/UTF8.h>
  43
  44 #if HAVE(STRING_H)
  45 #include <string.h>
  46 #endif
  47 #if HAVE(STRINGS_H)
  48 #include <strings.h>
  49 #endif
  50
  51 using namespace WTF;
  52 using namespace WTF::Unicode;
  53 using namespace std;
  54
  55 // This can be tuned differently per platform by putting platform #ifs right here.
  56 // If you don't define this macro at all, then copyChars will just call directly
  57 // to memcpy.
  58 #define USTRING_COPY_CHARS_INLINE_CUTOFF 20
  59
  60 namespace JSC {
  61
  62 extern const double NaN;
  63 extern const double Inf;
  64
  65 static inline size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
  66 static inline size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
  67
  68 static inline UChar* allocChars(size_t length)
  69 {
  70     ASSERT(length);
  71     if (length > maxUChars())
  72         return 0;
  73     return static_cast<UChar*>(tryFastMalloc(sizeof(UChar) * length));
  74 }
  75
  76 static inline UChar* reallocChars(UChar* buffer, size_t length)
  77 {
  78     ASSERT(length);
  79     if (length > maxUChars())
  80         return 0;
  81     return static_cast<UChar*>(tryFastRealloc(buffer, sizeof(UChar) * length));
  82 }
  83
  84 static inline void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
  85 {
  86 #ifdef USTRING_COPY_CHARS_INLINE_CUTOFF
  87     if (numCharacters <= USTRING_COPY_CHARS_INLINE_CUTOFF) {
  88         for (unsigned i = 0; i < numCharacters; ++i)
  89             destination[i] = source[i];
  90         return;
  91     }
  92 #endif
  93     memcpy(destination, source, numCharacters * sizeof(UChar));
  94 }
  95
  96 COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes)
  97
  98 CString::CString(const char* c)
  99     : m_length(strlen(c))
 100     , m_data(new char[m_length + 1])
 101 {
 102     memcpy(m_data, c, m_length + 1);
 103 }
 104
 105 CString::CString(const char* c, size_t length)
 106     : m_length(length)
 107     , m_data(new char[length + 1])
 108 {
 109     memcpy(m_data, c, m_length);
 110     m_data[m_length] = 0;
 111 }
 112
 113 CString::CString(const CString& b)
 114 {
 115     m_length = b.m_length;
 116     if (b.m_data) {
 117         m_data = new char[m_length + 1];
 118         memcpy(m_data, b.m_data, m_length + 1);
 119     } else
 120         m_data = 0;
 121 }
 122
 123 CString::~CString()
 124 {
 125     delete [] m_data;
 126 }
 127
 128 CString CString::adopt(char* c, size_t length)
 129 {
 130     CString s;
 131     s.m_data = c;
 132     s.m_length = length;
 133     return s;
 134 }
 135
 136 CString& CString::append(const CString& t)
 137 {
 138     char* n;
 139     n = new char[m_length + t.m_length + 1];
 140     if (m_length)
 141         memcpy(n, m_data, m_length);
 142     if (t.m_length)
 143         memcpy(n + m_length, t.m_data, t.m_length);
 144     m_length += t.m_length;
 145     n[m_length] = 0;
 146
 147     delete [] m_data;
 148     m_data = n;
 149
 150     return *this;
 151 }
 152
 153 CString& CString::operator=(const char* c)
 154 {
 155     if (m_data)
 156         delete [] m_data;
 157     m_length = strlen(c);
 158     m_data = new char[m_length + 1];
 159     memcpy(m_data, c, m_length + 1);
 160
 161     return *this;
 162 }
 163
 164 CString& CString::operator=(const CString& str)
 165 {
 166     if (this == &str)
 167         return *this;
 168
 169     if (m_data)
 170         delete [] m_data;
 171     m_length = str.m_length;
 172     if (str.m_data) {
 173         m_data = new char[m_length + 1];
 174         memcpy(m_data, str.m_data, m_length + 1);
 175     } else
 176         m_data = 0;
 177
 178     return *this;
 179 }
 180
 181 bool operator==(const CString& c1, const CString& c2)
 182 {
 183     size_t len = c1.size();
 184     return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
 185 }
 186
 187 // These static strings are immutable, except for rc, whose initial value is chosen to
 188 // reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
 189 static UChar sharedEmptyChar;
 190 UString::BaseString* UString::Rep::nullBaseString;
 191 UString::BaseString* UString::Rep::emptyBaseString;
 192 UString* UString::nullUString;
 193
 194 static void initializeStaticBaseString(int len, UChar* buf, UString::BaseString& base)
 195 {
 196     base.offset = 0;
 197     base.len = len;
 198     base.rc = INT_MAX / 2;
 199     base._hash = 0;
 200     base.m_identifierTableAndFlags.setFlag(UString::Rep::StaticFlag);
 201     base.m_baseString = 0;
 202     base.buf = buf;
 203     base.preCapacity = 0;
 204     base.usedPreCapacity = 0;
 205     base.capacity = 0;
 206     base.usedCapacity = 0;
 207     base.reportedCost = 0;
 208     base.checkConsistency();
 209 }
 210
 211 void initializeUString()
 212 {
 213     UString::Rep::nullBaseString = new UString::BaseString;
 214     initializeStaticBaseString(0, 0, *UString::Rep::nullBaseString);
 215
 216     UString::Rep::emptyBaseString = new UString::BaseString;
 217     initializeStaticBaseString(0, &sharedEmptyChar, *UString::Rep::emptyBaseString);
 218
 219     UString::nullUString = new UString;
 220 }
 221
 222 static char* statBuffer = 0; // Only used for debugging via UString::ascii().
 223
 224 PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar* d, int l)
 225 {
 226     UChar* copyD = static_cast<UChar*>(fastMalloc(l * sizeof(UChar)));
 227     copyChars(copyD, d, l);
 228     return create(copyD, l);
 229 }
 230
 231 PassRefPtr<UString::Rep> UString::Rep::create(UChar* d, int l)
 232 {
 233     BaseString* r = new BaseString;
 234     r->offset = 0;
 235     r->len = l;
 236     r->rc = 1;
 237     r->_hash = 0;
 238     r->m_baseString = 0;
 239     r->reportedCost = 0;
 240     r->buf = d;
 241     r->usedCapacity = l;
 242     r->capacity = l;
 243     r->usedPreCapacity = 0;
 244     r->preCapacity = 0;
 245
 246     r->checkConsistency();
 247
 248     // steal the single reference this Rep was created with
 249     return adoptRef(r);
 250 }
 251
 252 PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> rep, int offset, int length)
 253 {
 254     ASSERT(rep);
 255     rep->checkConsistency();
 256
 257     int repOffset = rep->offset;
 258
 259     PassRefPtr<BaseString> base = rep->baseString();
 260
 261     ASSERT(-(offset + repOffset) <= base->usedPreCapacity);
 262     ASSERT(offset + repOffset + length <= base->usedCapacity);
 263
 264     Rep* r = new Rep;
 265     r->offset = repOffset + offset;
 266     r->len = length;
 267     r->rc = 1;
 268     r->_hash = 0;
 269     r->setBaseString(base);
 270
 271     r->checkConsistency();
 272
 273     // steal the single reference this Rep was created with
 274     return adoptRef(r);
 275 }
 276
 277 PassRefPtr<UString::Rep> UString::Rep::createFromUTF8(const char* string)
 278 {
 279     if (!string)
 280         return &UString::Rep::null();
 281
 282     size_t length = strlen(string);
 283     Vector<UChar, 1024> buffer(length);
 284     UChar* p = buffer.data();
 285     if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length))
 286         return &UString::Rep::null();
 287
 288     return UString::Rep::createCopying(buffer.data(), p - buffer.data());
 289 }
 290
 291 void UString::Rep::destroy()
 292 {
 293     checkConsistency();
 294
 295     // Static null and empty strings can never be destroyed, but we cannot rely on
 296     // reference counting, because ref/deref are not thread-safe.
 297     if (!isStatic()) {
 298         if (identifierTable())
 299             Identifier::remove(this);
 300         UString::BaseString* base = baseString();
 301         if (base == this)
 302             fastFree(base->buf);
 303         else
 304             base->deref();
 305
 306         delete this;
 307     }
 308 }
 309
 310 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
 311 // or anything like that.
 312 const unsigned PHI = 0x9e3779b9U;
 313
 314 // Paul Hsieh's SuperFastHash
 315 // http://www.azillionmonkeys.com/qed/hash.html
 316 unsigned UString::Rep::computeHash(const UChar* s, int len)
 317 {
 318     unsigned l = len;
 319     uint32_t hash = PHI;
 320     uint32_t tmp;
 321
 322     int rem = l & 1;
 323     l >>= 1;
 324
 325     // Main loop
 326     for (; l > 0; l--) {
 327         hash += s[0];
 328         tmp = (s[1] << 11) ^ hash;
 329         hash = (hash << 16) ^ tmp;
 330         s += 2;
 331         hash += hash >> 11;
 332     }
 333
 334     // Handle end case
 335     if (rem) {
 336         hash += s[0];
 337         hash ^= hash << 11;
 338         hash += hash >> 17;
 339     }
 340
 341     // Force "avalanching" of final 127 bits
 342     hash ^= hash << 3;
 343     hash += hash >> 5;
 344     hash ^= hash << 2;
 345     hash += hash >> 15;
 346     hash ^= hash << 10;
 347
 348     // this avoids ever returning a hash code of 0, since that is used to
 349     // signal "hash not computed yet", using a value that is likely to be
 350     // effectively the same as 0 when the low bits are masked
 351     if (hash == 0)
 352         hash = 0x80000000;
 353
 354     return hash;
 355 }
 356
 357 // Paul Hsieh's SuperFastHash
 358 // http://www.azillionmonkeys.com/qed/hash.html
 359 unsigned UString::Rep::computeHash(const char* s, int l)
 360 {
 361     // This hash is designed to work on 16-bit chunks at a time. But since the normal case
 362     // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
 363     // were 16-bit chunks, which should give matching results
 364
 365     uint32_t hash = PHI;
 366     uint32_t tmp;
 367
 368     size_t rem = l & 1;
 369     l >>= 1;
 370
 371     // Main loop
 372     for (; l > 0; l--) {
 373         hash += static_cast<unsigned char>(s[0]);
 374         tmp = (static_cast<unsigned char>(s[1]) << 11) ^ hash;
 375         hash = (hash << 16) ^ tmp;
 376         s += 2;
 377         hash += hash >> 11;
 378     }
 379
 380     // Handle end case
 381     if (rem) {
 382         hash += static_cast<unsigned char>(s[0]);
 383         hash ^= hash << 11;
 384         hash += hash >> 17;
 385     }
 386
 387     // Force "avalanching" of final 127 bits
 388     hash ^= hash << 3;
 389     hash += hash >> 5;
 390     hash ^= hash << 2;
 391     hash += hash >> 15;
 392     hash ^= hash << 10;
 393
 394     // this avoids ever returning a hash code of 0, since that is used to
 395     // signal "hash not computed yet", using a value that is likely to be
 396     // effectively the same as 0 when the low bits are masked
 397     if (hash == 0)
 398         hash = 0x80000000;
 399
 400     return hash;
 401 }
 402
 403 #ifndef NDEBUG
 404 void UString::Rep::checkConsistency() const
 405 {
 406     const UString::BaseString* base = baseString();
 407
 408     // There is no recursion for base strings.
 409     ASSERT(base == base->baseString());
 410
 411     if (isStatic()) {
 412         // There are only two static strings: null and empty.
 413         ASSERT(!len);
 414
 415         // Static strings cannot get in identifier tables, because they are globally shared.
 416         ASSERT(!identifierTable());
 417     }
 418
 419     // The string fits in buffer.
 420     ASSERT(base->usedPreCapacity <= base->preCapacity);
 421     ASSERT(base->usedCapacity <= base->capacity);
 422     ASSERT(-offset <= base->usedPreCapacity);
 423     ASSERT(offset + len <= base->usedCapacity);
 424 }
 425 #endif
 426
 427 // put these early so they can be inlined
 428 static inline size_t expandedSize(size_t size, size_t otherSize)
 429 {
 430     // Do the size calculation in two parts, returning overflowIndicator if
 431     // we overflow the maximum value that we can handle.
 432
 433     if (size > maxUChars())
 434         return overflowIndicator();
 435
 436     size_t expandedSize = ((size + 10) / 10 * 11) + 1;
 437     if (maxUChars() - expandedSize < otherSize)
 438         return overflowIndicator();
 439
 440     return expandedSize + otherSize;
 441 }
 442
 443 static inline bool expandCapacity(UString::Rep* rep, int requiredLength)
 444 {
 445     rep->checkConsistency();
 446
 447     UString::BaseString* base = rep->baseString();
 448
 449     if (requiredLength > base->capacity) {
 450         size_t newCapacity = expandedSize(requiredLength, base->preCapacity);
 451         UChar* oldBuf = base->buf;
 452         base->buf = reallocChars(base->buf, newCapacity);
 453         if (!base->buf) {
 454             base->buf = oldBuf;
 455             return false;
 456         }
 457         base->capacity = newCapacity - base->preCapacity;
 458     }
 459     if (requiredLength > base->usedCapacity)
 460         base->usedCapacity = requiredLength;
 461
 462     rep->checkConsistency();
 463     return true;
 464 }
 465
 466 void UString::expandCapacity(int requiredLength)
 467 {
 468     if (!JSC::expandCapacity(m_rep.get(), requiredLength))
 469         makeNull();
 470 }
 471
 472 void UString::expandPreCapacity(int requiredPreCap)
 473 {
 474     m_rep->checkConsistency();
 475
 476     BaseString* base = m_rep->baseString();
 477
 478     if (requiredPreCap > base->preCapacity) {
 479         size_t newCapacity = expandedSize(requiredPreCap, base->capacity);
 480         int delta = newCapacity - base->capacity - base->preCapacity;
 481
 482         UChar* newBuf = allocChars(newCapacity);
 483         if (!newBuf) {
 484             makeNull();
 485             return;
 486         }
 487         copyChars(newBuf + delta, base->buf, base->capacity + base->preCapacity);
 488         fastFree(base->buf);
 489         base->buf = newBuf;
 490
 491         base->preCapacity = newCapacity - base->capacity;
 492     }
 493     if (requiredPreCap > base->usedPreCapacity)
 494         base->usedPreCapacity = requiredPreCap;
 495
 496     m_rep->checkConsistency();
 497 }
 498
 499 static PassRefPtr<UString::Rep> createRep(const char* c)
 500 {
 501     if (!c)
 502         return &UString::Rep::null();
 503
 504     if (!c[0])
 505         return &UString::Rep::empty();
 506
 507     size_t length = strlen(c);
 508     UChar* d = allocChars(length);
 509     if (!d)
 510         return &UString::Rep::null();
 511     else {
 512         for (size_t i = 0; i < length; i++)
 513             d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
 514         return UString::Rep::create(d, static_cast<int>(length));
 515     }
 516
 517 }
 518
 519 UString::UString(const char* c)
 520     : m_rep(createRep(c))
 521 {
 522 }
 523
 524 UString::UString(const UChar* c, int length)
 525 {
 526     if (length == 0)
 527         m_rep = &Rep::empty();
 528     else
 529         m_rep = Rep::createCopying(c, length);
 530 }
 531
 532 UString::UString(UChar* c, int length, bool copy)
 533 {
 534     if (length == 0)
 535         m_rep = &Rep::empty();
 536     else if (copy)
 537         m_rep = Rep::createCopying(c, length);
 538     else
 539         m_rep = Rep::create(c, length);
 540 }
 541
 542 UString::UString(const Vector<UChar>& buffer)
 543 {
 544     if (!buffer.size())
 545         m_rep = &Rep::empty();
 546     else
 547         m_rep = Rep::createCopying(buffer.data(), buffer.size());
 548 }
 549
 550 static ALWAYS_INLINE int newCapacityWithOverflowCheck(const int currentCapacity, const int extendLength, const bool plusOne = false)
 551 {
 552     ASSERT_WITH_MESSAGE(extendLength >= 0, "extendedLength = %d", extendLength);
 553
 554     const int plusLength = plusOne ? 1 : 0;
 555     if (currentCapacity > std::numeric_limits<int>::max() - extendLength - plusLength)
 556         CRASH();
 557
 558     return currentCapacity + extendLength + plusLength;
 559 }
 560
 561 static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const UChar* tData, int tSize)
 562 {
 563     RefPtr<UString::Rep> rep = r;
 564
 565     rep->checkConsistency();
 566
 567     int thisSize = rep->size();
 568     int thisOffset = rep->offset;
 569     int length = thisSize + tSize;
 570     UString::BaseString* base = rep->baseString();
 571
 572     // possible cases:
 573     if (tSize == 0) {
 574         // t is empty
 575     } else if (thisSize == 0) {
 576         // this is empty
 577         rep = UString::Rep::createCopying(tData, tSize);
 578     } else if (rep == base && rep->rc == 1) {
 579         // this is direct and has refcount of 1 (so we can just alter it directly)
 580         if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
 581             rep = &UString::Rep::null();
 582         if (rep->data()) {
 583             copyChars(rep->data() + thisSize, tData, tSize);
 584             rep->len = length;
 585             rep->_hash = 0;
 586         }
 587     } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) {
 588         // this reaches the end of the buffer - extend it if it's long enough to append to
 589         if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
 590             rep = &UString::Rep::null();
 591         if (rep->data()) {
 592             copyChars(rep->data() + thisSize, tData, tSize);
 593             rep = UString::Rep::create(rep, 0, length);
 594         }
 595     } else {
 596         // this is shared with someone using more capacity, gotta make a whole new string
 597         size_t newCapacity = expandedSize(length, 0);
 598         UChar* d = allocChars(newCapacity);
 599         if (!d)
 600             rep = &UString::Rep::null();
 601         else {
 602             copyChars(d, rep->data(), thisSize);
 603             copyChars(d + thisSize, tData, tSize);
 604             rep = UString::Rep::create(d, length);
 605             rep->baseString()->capacity = newCapacity;
 606         }
 607     }
 608
 609     rep->checkConsistency();
 610
 611     return rep.release();
 612 }
 613
 614 static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const char* t)
 615 {
 616     RefPtr<UString::Rep> rep = r;
 617
 618     rep->checkConsistency();
 619
 620     int thisSize = rep->size();
 621     int thisOffset = rep->offset;
 622     int tSize = static_cast<int>(strlen(t));
 623     int length = thisSize + tSize;
 624     UString::BaseString* base = rep->baseString();
 625
 626     // possible cases:
 627     if (thisSize == 0) {
 628         // this is empty
 629         rep = createRep(t);
 630     } else if (tSize == 0) {
 631         // t is empty, we'll just return *this below.
 632     } else if (rep == base && rep->rc == 1) {
 633         // this is direct and has refcount of 1 (so we can just alter it directly)
 634         expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
 635         UChar* d = rep->data();
 636         if (d) {
 637             for (int i = 0; i < tSize; ++i)
 638                 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
 639             rep->len = length;
 640             rep->_hash = 0;
 641         }
 642     } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) {
 643         // this string reaches the end of the buffer - extend it
 644         expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
 645         UChar* d = rep->data();
 646         if (d) {
 647             for (int i = 0; i < tSize; ++i)
 648                 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
 649             rep = UString::Rep::create(rep, 0, length);
 650         }
 651     } else {
 652         // this is shared with someone using more capacity, gotta make a whole new string
 653         size_t newCapacity = expandedSize(length, 0);
 654         UChar* d = allocChars(newCapacity);
 655         if (!d)
 656             rep = &UString::Rep::null();
 657         else {
 658             copyChars(d, rep->data(), thisSize);
 659             for (int i = 0; i < tSize; ++i)
 660                 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
 661             rep = UString::Rep::create(d, length);
 662             rep->baseString()->capacity = newCapacity;
 663         }
 664     }
 665
 666     rep->checkConsistency();
 667
 668     return rep.release();
 669 }
 670
 671 PassRefPtr<UString::Rep> concatenate(UString::Rep* a, UString::Rep* b)
 672 {
 673     a->checkConsistency();
 674     b->checkConsistency();
 675
 676     int aSize = a->size();
 677     int aOffset = a->offset;
 678     int bSize = b->size();
 679     int bOffset = b->offset;
 680     int length = aSize + bSize;
 681
 682     // possible cases:
 683
 684     // a is empty
 685     if (aSize == 0)
 686         return b;
 687     // b is empty
 688     if (bSize == 0)
 689         return a;
 690
 691     UString::BaseString* aBase = a->baseString();
 692     if (bSize == 1 && aOffset + aSize == aBase->usedCapacity && aOffset + length <= aBase->capacity) {
 693         // b is a single character (common fast case)
 694         aBase->usedCapacity = aOffset + length;
 695         a->data()[aSize] = b->data()[0];
 696         return UString::Rep::create(a, 0, length);
 697     }
 698
 699     UString::BaseString* bBase = b->baseString();
 700     if (aOffset + aSize == aBase->usedCapacity && aSize >= minShareSize && 4 * aSize >= bSize
 701         && (-bOffset != bBase->usedPreCapacity || aSize >= bSize)) {
 702         // - a reaches the end of its buffer so it qualifies for shared append
 703         // - also, it's at least a quarter the length of b - appending to a much shorter
 704         //   string does more harm than good
 705         // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
 706
 707         UString x(a);
 708         x.expandCapacity(newCapacityWithOverflowCheck(aOffset, length));
 709         if (!a->data() || !x.data())
 710             return 0;
 711         copyChars(a->data() + aSize, b->data(), bSize);
 712         PassRefPtr<UString::Rep> result = UString::Rep::create(a, 0, length);
 713
 714         a->checkConsistency();
 715         b->checkConsistency();
 716         result->checkConsistency();
 717
 718         return result;
 719     }
 720
 721     if (-bOffset == bBase->usedPreCapacity && bSize >= minShareSize && 4 * bSize >= aSize) {
 722         // - b reaches the beginning of its buffer so it qualifies for shared prepend
 723         // - also, it's at least a quarter the length of a - prepending to a much shorter
 724         //   string does more harm than good
 725         UString y(b);
 726         y.expandPreCapacity(-bOffset + aSize);
 727         if (!b->data() || !y.data())
 728             return 0;
 729         copyChars(b->data() - aSize, a->data(), aSize);
 730         PassRefPtr<UString::Rep> result = UString::Rep::create(b, -aSize, length);
 731
 732         a->checkConsistency();
 733         b->checkConsistency();
 734         result->checkConsistency();
 735
 736         return result;
 737     }
 738
 739     // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
 740     size_t newCapacity = expandedSize(length, 0);
 741     UChar* d = allocChars(newCapacity);
 742     if (!d)
 743         return 0;
 744     copyChars(d, a->data(), aSize);
 745     copyChars(d + aSize, b->data(), bSize);
 746     PassRefPtr<UString::Rep> result = UString::Rep::create(d, length);
 747     result->baseString()->capacity = newCapacity;
 748
 749     a->checkConsistency();
 750     b->checkConsistency();
 751     result->checkConsistency();
 752
 753     return result;
 754 }
 755
 756 PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, int i)
 757 {
 758     UChar buf[1 + sizeof(i) * 3];
 759     UChar* end = buf + sizeof(buf) / sizeof(UChar);
 760     UChar* p = end;
 761
 762     if (i == 0)
 763         *--p = '0';
 764     else if (i == INT_MIN) {
 765         char minBuf[1 + sizeof(i) * 3];
 766         sprintf(minBuf, "%d", INT_MIN);
 767         return concatenate(rep, minBuf);
 768     } else {
 769         bool negative = false;
 770         if (i < 0) {
 771             negative = true;
 772             i = -i;
 773         }
 774         while (i) {
 775             *--p = static_cast<unsigned short>((i % 10) + '0');
 776             i /= 10;
 777         }
 778         if (negative)
 779             *--p = '-';
 780     }
 781
 782     return concatenate(rep, p, static_cast<int>(end - p));
 783
 784 }
 785
 786 PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, double d)
 787 {
 788     // avoid ever printing -NaN, in JS conceptually there is only one NaN value
 789     if (isnan(d))
 790         return concatenate(rep, "NaN");
 791
 792     if (d == 0.0) // stringify -0 as 0
 793         d = 0.0;
 794
 795     char buf[80];
 796     int decimalPoint;
 797     int sign;
 798
 799     char* result = WTF::dtoa(d, 0, &decimalPoint, &sign, NULL);
 800     int length = static_cast<int>(strlen(result));
 801
 802     int i = 0;
 803     if (sign)
 804         buf[i++] = '-';
 805
 806     if (decimalPoint <= 0 && decimalPoint > -6) {
 807         buf[i++] = '0';
 808         buf[i++] = '.';
 809         for (int j = decimalPoint; j < 0; j++)
 810             buf[i++] = '0';
 811         strcpy(buf + i, result);
 812     } else if (decimalPoint <= 21 && decimalPoint > 0) {
 813         if (length <= decimalPoint) {
 814             strcpy(buf + i, result);
 815             i += length;
 816             for (int j = 0; j < decimalPoint - length; j++)
 817                 buf[i++] = '0';
 818             buf[i] = '\0';
 819         } else {
 820             strncpy(buf + i, result, decimalPoint);
 821             i += decimalPoint;
 822             buf[i++] = '.';
 823             strcpy(buf + i, result + decimalPoint);
 824         }
 825     } else if (result[0] < '0' || result[0] > '9')
 826         strcpy(buf + i, result);
 827     else {
 828         buf[i++] = result[0];
 829         if (length > 1) {
 830             buf[i++] = '.';
 831             strcpy(buf + i, result + 1);
 832             i += length - 1;
 833         }
 834
 835         buf[i++] = 'e';
 836         buf[i++] = (decimalPoint >= 0) ? '+' : '-';
 837         // decimalPoint can't be more than 3 digits decimal given the
 838         // nature of float representation
 839         int exponential = decimalPoint - 1;
 840         if (exponential < 0)
 841             exponential = -exponential;
 842         if (exponential >= 100)
 843             buf[i++] = static_cast<char>('0' + exponential / 100);
 844         if (exponential >= 10)
 845             buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
 846         buf[i++] = static_cast<char>('0' + exponential % 10);
 847         buf[i++] = '\0';
 848     }
 849
 850   WTF::freedtoa(result);
 851
 852   return concatenate(rep, buf);
 853 }
 854
 855 UString UString::from(int i)
 856 {
 857     UChar buf[1 + sizeof(i) * 3];
 858     UChar* end = buf + sizeof(buf) / sizeof(UChar);
 859     UChar* p = end;
 860
 861     if (i == 0)
 862         *--p = '0';
 863     else if (i == INT_MIN) {
 864         char minBuf[1 + sizeof(i) * 3];
 865         snprintf(minBuf, 1 + sizeof(i) * 3, "%d", INT_MIN);
 866         return UString(minBuf);
 867     } else {
 868         bool negative = false;
 869         if (i < 0) {
 870             negative = true;
 871             i = -i;
 872         }
 873         while (i) {
 874             *--p = static_cast<unsigned short>((i % 10) + '0');
 875             i /= 10;
 876         }
 877         if (negative)
 878             *--p = '-';
 879     }
 880
 881     return UString(p, static_cast<int>(end - p));
 882 }
 883
 884 UString UString::from(unsigned int u)
 885 {
 886     UChar buf[sizeof(u) * 3];
 887     UChar* end = buf + sizeof(buf) / sizeof(UChar);
 888     UChar* p = end;
 889
 890     if (u == 0)
 891         *--p = '0';
 892     else {
 893         while (u) {
 894             *--p = static_cast<unsigned short>((u % 10) + '0');
 895             u /= 10;
 896         }
 897     }
 898
 899     return UString(p, static_cast<int>(end - p));
 900 }
 901
 902 UString UString::from(long l)
 903 {
 904     UChar buf[1 + sizeof(l) * 3];
 905     UChar* end = buf + sizeof(buf) / sizeof(UChar);
 906     UChar* p = end;
 907
 908     if (l == 0)
 909         *--p = '0';
 910     else if (l == LONG_MIN) {
 911         char minBuf[1 + sizeof(l) * 3];
 912         snprintf(minBuf, 1 + sizeof(l) * 3, "%ld", LONG_MIN);
 913         return UString(minBuf);
 914     } else {
 915         bool negative = false;
 916         if (l < 0) {
 917             negative = true;
 918             l = -l;
 919         }
 920         while (l) {
 921             *--p = static_cast<unsigned short>((l % 10) + '0');
 922             l /= 10;
 923         }
 924         if (negative)
 925             *--p = '-';
 926     }
 927
 928     return UString(p, static_cast<int>(end - p));
 929 }
 930
 931 UString UString::from(double d)
 932 {
 933     // avoid ever printing -NaN, in JS conceptually there is only one NaN value
 934     if (isnan(d))
 935         return "NaN";
 936
 937     char buf[80];
 938     int decimalPoint;
 939     int sign;
 940
 941     char* result = WTF::dtoa(d, 0, &decimalPoint, &sign, NULL);
 942     int length = static_cast<int>(strlen(result));
 943
 944     int i = 0;
 945     if (sign)
 946         buf[i++] = '-';
 947
 948     if (decimalPoint <= 0 && decimalPoint > -6) {
 949         buf[i++] = '0';
 950         buf[i++] = '.';
 951         for (int j = decimalPoint; j < 0; j++)
 952             buf[i++] = '0';
 953         strlcpy(buf + i, result, sizeof(buf) - i);
 954     } else if (decimalPoint <= 21 && decimalPoint > 0) {
 955         if (length <= decimalPoint) {
 956             strlcpy(buf + i, result, sizeof(buf) - i);
 957             i += length;
 958             for (int j = 0; j < decimalPoint - length; j++)
 959                 buf[i++] = '0';
 960             buf[i] = '\0';
 961         } else {
 962             int len = (decimalPoint <= static_cast<int>(sizeof(buf)) - i ? decimalPoint : sizeof(buf) - i);
 963             strncpy(buf + i, result, len);
 964             i += len;
 965             buf[i++] = '.';
 966             strlcpy(buf + i, result + decimalPoint, sizeof(buf) - i);
 967         }
 968     } else if (result[0] < '0' || result[0] > '9')
 969         strlcpy(buf + i, result, sizeof(buf) - i);
 970     else {
 971         buf[i++] = result[0];
 972         if (length > 1) {
 973             buf[i++] = '.';
 974             strlcpy(buf + i, result + 1, sizeof(buf) - i);
 975             i += length - 1;
 976         }
 977
 978         buf[i++] = 'e';
 979         buf[i++] = (decimalPoint >= 0) ? '+' : '-';
 980         // decimalPoint can't be more than 3 digits decimal given the
 981         // nature of float representation
 982         int exponential = decimalPoint - 1;
 983         if (exponential < 0)
 984             exponential = -exponential;
 985         if (exponential >= 100)
 986             buf[i++] = static_cast<char>('0' + exponential / 100);
 987         if (exponential >= 10)
 988             buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
 989         buf[i++] = static_cast<char>('0' + exponential % 10);
 990         buf[i++] = '\0';
 991         ASSERT(i <= static_cast<int>(sizeof(buf)));
 992     }
 993
 994   WTF::freedtoa(result);
 995
 996   return UString(buf);
 997 }
 998
 999 UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
1000 {
1001     m_rep->checkConsistency();
1002
1003     if (rangeCount == 1 && separatorCount == 0) {
1004         int thisSize = size();
1005         int position = substringRanges[0].position;
1006         int length = substringRanges[0].length;
1007         if (position <= 0 && length >= thisSize)
1008             return *this;
1009         return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
1010     }
1011
1012     int totalLength = 0;
1013     for (int i = 0; i < rangeCount; i++)
1014         totalLength += substringRanges[i].length;
1015     for (int i = 0; i < separatorCount; i++)
1016         totalLength += separators[i].size();
1017
1018     if (totalLength == 0)
1019         return "";
1020
1021     UChar* buffer = allocChars(totalLength);
1022     if (!buffer)
1023         return null();
1024
1025     int maxCount = max(rangeCount, separatorCount);
1026     int bufferPos = 0;
1027     for (int i = 0; i < maxCount; i++) {
1028         if (i < rangeCount) {
1029             copyChars(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length);
1030             bufferPos += substringRanges[i].length;
1031         }
1032         if (i < separatorCount) {
1033             copyChars(buffer + bufferPos, separators[i].data(), separators[i].size());
1034             bufferPos += separators[i].size();
1035         }
1036     }
1037
1038     return UString::Rep::create(buffer, totalLength);
1039 }
1040
1041 UString& UString::append(const UString &t)
1042 {
1043     m_rep->checkConsistency();
1044     t.rep()->checkConsistency();
1045
1046     int thisSize = size();
1047     int thisOffset = m_rep->offset;
1048     int tSize = t.size();
1049     int length = thisSize + tSize;
1050     BaseString* base = m_rep->baseString();
1051
1052     // possible cases:
1053     if (thisSize == 0) {
1054         // this is empty
1055         *this = t;
1056     } else if (tSize == 0) {
1057         // t is empty
1058     } else if (m_rep == base && m_rep->rc == 1) {
1059         // this is direct and has refcount of 1 (so we can just alter it directly)
1060         expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1061         if (data()) {
1062             copyChars(m_rep->data() + thisSize, t.data(), tSize);
1063             m_rep->len = length;
1064             m_rep->_hash = 0;
1065         }
1066     } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) {
1067         // this reaches the end of the buffer - extend it if it's long enough to append to
1068         expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1069         if (data()) {
1070             copyChars(m_rep->data() + thisSize, t.data(), tSize);
1071             m_rep = Rep::create(m_rep, 0, length);
1072         }
1073     } else {
1074         // this is shared with someone using more capacity, gotta make a whole new string
1075         size_t newCapacity = expandedSize(length, 0);
1076         UChar* d = allocChars(newCapacity);
1077         if (!d)
1078             makeNull();
1079         else {
1080             copyChars(d, data(), thisSize);
1081             copyChars(d + thisSize, t.data(), tSize);
1082             m_rep = Rep::create(d, length);
1083             m_rep->baseString()->capacity = newCapacity;
1084         }
1085     }
1086
1087     m_rep->checkConsistency();
1088     t.rep()->checkConsistency();
1089
1090     return *this;
1091 }
1092
1093 UString& UString::append(const UChar* tData, int tSize)
1094 {
1095     m_rep = concatenate(m_rep.release(), tData, tSize);
1096     return *this;
1097 }
1098
1099 UString& UString::append(const char* t)
1100 {
1101     m_rep = concatenate(m_rep.release(), t);
1102     return *this;
1103 }
1104
1105 UString& UString::append(UChar c)
1106 {
1107     m_rep->checkConsistency();
1108
1109     int thisOffset = m_rep->offset;
1110     int length = size();
1111     BaseString* base = m_rep->baseString();
1112
1113     // possible cases:
1114     if (length == 0) {
1115         // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
1116         size_t newCapacity = expandedSize(1, 0);
1117         UChar* d = allocChars(newCapacity);
1118         if (!d)
1119             makeNull();
1120         else {
1121             d[0] = c;
1122             m_rep = Rep::create(d, 1);
1123             m_rep->baseString()->capacity = newCapacity;
1124         }
1125     } else if (m_rep == base && m_rep->rc == 1) {
1126         // this is direct and has refcount of 1 (so we can just alter it directly)
1127         expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1128         UChar* d = m_rep->data();
1129         if (d) {
1130             d[length] = c;
1131             m_rep->len = length + 1;
1132             m_rep->_hash = 0;
1133         }
1134     } else if (thisOffset + length == base->usedCapacity && length >= minShareSize) {
1135         // this reaches the end of the string - extend it and share
1136         expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1137         UChar* d = m_rep->data();
1138         if (d) {
1139             d[length] = c;
1140             m_rep = Rep::create(m_rep, 0, length + 1);
1141         }
1142     } else {
1143         // this is shared with someone using more capacity, gotta make a whole new string
1144         size_t newCapacity = expandedSize(length + 1, 0);
1145         UChar* d = allocChars(newCapacity);
1146         if (!d)
1147             makeNull();
1148         else {
1149             copyChars(d, data(), length);
1150             d[length] = c;
1151             m_rep = Rep::create(d, length + 1);
1152             m_rep->baseString()->capacity = newCapacity;
1153         }
1154     }
1155
1156     m_rep->checkConsistency();
1157
1158     return *this;
1159 }
1160
1161 bool UString::getCString(CStringBuffer& buffer) const
1162 {
1163     int length = size();
1164     int neededSize = length + 1;
1165     buffer.resize(neededSize);
1166     char* buf = buffer.data();
1167
1168     UChar ored = 0;
1169     const UChar* p = data();
1170     char* q = buf;
1171     const UChar* limit = p + length;
1172     while (p != limit) {
1173         UChar c = p[0];
1174         ored |= c;
1175         *q = static_cast<char>(c);
1176         ++p;
1177         ++q;
1178     }
1179     *q = '\0';
1180
1181     return !(ored & 0xFF00);
1182 }
1183
1184 char* UString::ascii() const
1185 {
1186     int length = size();
1187     int neededSize = length + 1;
1188     delete[] statBuffer;
1189     statBuffer = new char[neededSize];
1190
1191     const UChar* p = data();
1192     char* q = statBuffer;
1193     const UChar* limit = p + length;
1194     while (p != limit) {
1195         *q = static_cast<char>(p[0]);
1196         ++p;
1197         ++q;
1198     }
1199     *q = '\0';
1200
1201     return statBuffer;
1202 }
1203
1204 UString& UString::operator=(const char* c)
1205 {
1206     if (!c) {
1207         m_rep = &Rep::null();
1208         return *this;
1209     }
1210
1211     if (!c[0]) {
1212         m_rep = &Rep::empty();
1213         return *this;
1214     }
1215
1216     int l = static_cast<int>(strlen(c));
1217     UChar* d;
1218     BaseString* base = m_rep->baseString();
1219     if (m_rep->rc == 1 && l <= base->capacity && m_rep == base && m_rep->offset == 0 && base->preCapacity == 0) {
1220         d = base->buf;
1221         m_rep->_hash = 0;
1222         m_rep->len = l;
1223     } else {
1224         d = allocChars(l);
1225         if (!d) {
1226             makeNull();
1227             return *this;
1228         }
1229         m_rep = Rep::create(d, l);
1230     }
1231     for (int i = 0; i < l; i++)
1232         d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
1233
1234     return *this;
1235 }
1236
1237 bool UString::is8Bit() const
1238 {
1239     const UChar* u = data();
1240     const UChar* limit = u + size();
1241     while (u < limit) {
1242         if (u[0] > 0xFF)
1243             return false;
1244         ++u;
1245     }
1246
1247     return true;
1248 }
1249
1250 UChar UString::operator[](int pos) const
1251 {
1252     if (pos >= size())
1253         return '\0';
1254     return data()[pos];
1255 }
1256
1257 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
1258 {
1259     if (size() == 1) {
1260         UChar c = data()[0];
1261         if (isASCIIDigit(c))
1262             return c - '0';
1263         if (isASCIISpace(c) && tolerateEmptyString)
1264             return 0;
1265         return NaN;
1266     }
1267
1268     // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
1269     // after the number, so this is too strict a check.
1270     CStringBuffer s;
1271     if (!getCString(s))
1272         return NaN;
1273     const char* c = s.data();
1274
1275     // skip leading white space
1276     while (isASCIISpace(*c))
1277         c++;
1278
1279     // empty string ?
1280     if (*c == '\0')
1281         return tolerateEmptyString ? 0.0 : NaN;
1282
1283     double d;
1284
1285     // hex number ?
1286     if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
1287         const char* firstDigitPosition = c + 2;
1288         c++;
1289         d = 0.0;
1290         while (*(++c)) {
1291             if (*c >= '0' && *c <= '9')
1292                 d = d * 16.0 + *c - '0';
1293             else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
1294                 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
1295             else
1296                 break;
1297         }
1298
1299         if (d >= mantissaOverflowLowerBound)
1300             d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1301     } else {
1302         // regular number ?
1303         char* end;
1304         d = WTF::strtod(c, &end);
1305         if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
1306             c = end;
1307         } else {
1308             double sign = 1.0;
1309
1310             if (*c == '+')
1311                 c++;
1312             else if (*c == '-') {
1313                 sign = -1.0;
1314                 c++;
1315             }
1316
1317             // We used strtod() to do the conversion. However, strtod() handles
1318             // infinite values slightly differently than JavaScript in that it
1319             // converts the string "inf" with any capitalization to infinity,
1320             // whereas the ECMA spec requires that it be converted to NaN.
1321
1322             if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1323                 d = sign * Inf;
1324                 c += 8;
1325             } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
1326                 c = end;
1327             else
1328                 return NaN;
1329         }
1330     }
1331
1332     // allow trailing white space
1333     while (isASCIISpace(*c))
1334         c++;
1335     // don't allow anything after - unless tolerant=true
1336     if (!tolerateTrailingJunk && *c != '\0')
1337         d = NaN;
1338
1339     return d;
1340 }
1341
1342 double UString::toDouble(bool tolerateTrailingJunk) const
1343 {
1344     return toDouble(tolerateTrailingJunk, true);
1345 }
1346
1347 double UString::toDouble() const
1348 {
1349     return toDouble(false, true);
1350 }
1351
1352 uint32_t UString::toUInt32(bool* ok) const
1353 {
1354     double d = toDouble();
1355     bool b = true;
1356
1357     if (d != static_cast<uint32_t>(d)) {
1358         b = false;
1359         d = 0;
1360     }
1361
1362     if (ok)
1363         *ok = b;
1364
1365     return static_cast<uint32_t>(d);
1366 }
1367
1368 uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
1369 {
1370     double d = toDouble(false, tolerateEmptyString);
1371     bool b = true;
1372
1373     if (d != static_cast<uint32_t>(d)) {
1374         b = false;
1375         d = 0;
1376     }
1377
1378     if (ok)
1379         *ok = b;
1380
1381     return static_cast<uint32_t>(d);
1382 }
1383
1384 uint32_t UString::toStrictUInt32(bool* ok) const
1385 {
1386     if (ok)
1387         *ok = false;
1388
1389     // Empty string is not OK.
1390     int len = m_rep->len;
1391     if (len == 0)
1392         return 0;
1393     const UChar* p = m_rep->data();
1394     unsigned short c = p[0];
1395
1396     // If the first digit is 0, only 0 itself is OK.
1397     if (c == '0') {
1398         if (len == 1 && ok)
1399             *ok = true;
1400         return 0;
1401     }
1402
1403     // Convert to UInt32, checking for overflow.
1404     uint32_t i = 0;
1405     while (1) {
1406         // Process character, turning it into a digit.
1407         if (c < '0' || c > '9')
1408             return 0;
1409         const unsigned d = c - '0';
1410
1411         // Multiply by 10, checking for overflow out of 32 bits.
1412         if (i > 0xFFFFFFFFU / 10)
1413             return 0;
1414         i *= 10;
1415
1416         // Add in the digit, checking for overflow out of 32 bits.
1417         const unsigned max = 0xFFFFFFFFU - d;
1418         if (i > max)
1419             return 0;
1420         i += d;
1421
1422         // Handle end of string.
1423         if (--len == 0) {
1424             if (ok)
1425                 *ok = true;
1426             return i;
1427         }
1428
1429         // Get next character.
1430         c = *(++p);
1431     }
1432 }
1433
1434 int UString::find(const UString& f, int pos) const
1435 {
1436     int fsz = f.size();
1437
1438     if (pos < 0)
1439         pos = 0;
1440
1441     if (fsz == 1) {
1442         UChar ch = f[0];
1443         const UChar* end = data() + size();
1444         for (const UChar* c = data() + pos; c < end; c++) {
1445             if (*c == ch)
1446                 return static_cast<int>(c - data());
1447         }
1448         return -1;
1449     }
1450
1451     int sz = size();
1452     if (sz < fsz)
1453         return -1;
1454     if (fsz == 0)
1455         return pos;
1456     const UChar* end = data() + sz - fsz;
1457     int fsizeminusone = (fsz - 1) * sizeof(UChar);
1458     const UChar* fdata = f.data();
1459     unsigned short fchar = fdata[0];
1460     ++fdata;
1461     for (const UChar* c = data() + pos; c <= end; c++) {
1462         if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1463             return static_cast<int>(c - data());
1464     }
1465
1466     return -1;
1467 }
1468
1469 int UString::find(UChar ch, int pos) const
1470 {
1471     if (pos < 0)
1472         pos = 0;
1473     const UChar* end = data() + size();
1474     for (const UChar* c = data() + pos; c < end; c++) {
1475         if (*c == ch)
1476             return static_cast<int>(c - data());
1477     }
1478
1479     return -1;
1480 }
1481
1482 int UString::rfind(const UString& f, int pos) const
1483 {
1484     int sz = size();
1485     int fsz = f.size();
1486     if (sz < fsz)
1487         return -1;
1488     if (pos < 0)
1489         pos = 0;
1490     if (pos > sz - fsz)
1491         pos = sz - fsz;
1492     if (fsz == 0)
1493         return pos;
1494     int fsizeminusone = (fsz - 1) * sizeof(UChar);
1495     const UChar* fdata = f.data();
1496     for (const UChar* c = data() + pos; c >= data(); c--) {
1497         if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1498             return static_cast<int>(c - data());
1499     }
1500
1501     return -1;
1502 }
1503
1504 int UString::rfind(UChar ch, int pos) const
1505 {
1506     if (isEmpty())
1507         return -1;
1508     if (pos + 1 >= size())
1509         pos = size() - 1;
1510     for (const UChar* c = data() + pos; c >= data(); c--) {
1511         if (*c == ch)
1512             return static_cast<int>(c - data());
1513     }
1514
1515     return -1;
1516 }
1517
1518 UString UString::substr(int pos, int len) const
1519 {
1520     int s = size();
1521
1522     if (pos < 0)
1523         pos = 0;
1524     else if (pos >= s)
1525         pos = s;
1526     if (len < 0)
1527         len = s;
1528     if (pos + len >= s)
1529         len = s - pos;
1530
1531     if (pos == 0 && len == s)
1532         return *this;
1533
1534     return UString(Rep::create(m_rep, pos, len));
1535 }
1536
1537 bool operator==(const UString& s1, const UString& s2)
1538 {
1539     int size = s1.size();
1540     switch (size) {
1541         case 0:
1542             return !s2.size();
1543         case 1:
1544             return s2.size() == 1 && s1.data()[0] == s2.data()[0];
1545         default:
1546             return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0;
1547     }
1548 }
1549
1550 bool operator==(const UString& s1, const char *s2)
1551 {
1552     if (s2 == 0)
1553         return s1.isEmpty();
1554
1555     const UChar* u = s1.data();
1556     const UChar* uend = u + s1.size();
1557     while (u != uend && *s2) {
1558         if (u[0] != (unsigned char)*s2)
1559             return false;
1560         s2++;
1561         u++;
1562     }
1563
1564     return u == uend && *s2 == 0;
1565 }
1566
1567 bool operator<(const UString& s1, const UString& s2)
1568 {
1569     const int l1 = s1.size();
1570     const int l2 = s2.size();
1571     const int lmin = l1 < l2 ? l1 : l2;
1572     const UChar* c1 = s1.data();
1573     const UChar* c2 = s2.data();
1574     int l = 0;
1575     while (l < lmin && *c1 == *c2) {
1576         c1++;
1577         c2++;
1578         l++;
1579     }
1580     if (l < lmin)
1581         return (c1[0] < c2[0]);
1582
1583     return (l1 < l2);
1584 }
1585
1586 bool operator>(const UString& s1, const UString& s2)
1587 {
1588     const int l1 = s1.size();
1589     const int l2 = s2.size();
1590     const int lmin = l1 < l2 ? l1 : l2;
1591     const UChar* c1 = s1.data();
1592     const UChar* c2 = s2.data();
1593     int l = 0;
1594     while (l < lmin && *c1 == *c2) {
1595         c1++;
1596         c2++;
1597         l++;
1598     }
1599     if (l < lmin)
1600         return (c1[0] > c2[0]);
1601
1602     return (l1 > l2);
1603 }
1604
1605 int compare(const UString& s1, const UString& s2)
1606 {
1607     const int l1 = s1.size();
1608     const int l2 = s2.size();
1609     const int lmin = l1 < l2 ? l1 : l2;
1610     const UChar* c1 = s1.data();
1611     const UChar* c2 = s2.data();
1612     int l = 0;
1613     while (l < lmin && *c1 == *c2) {
1614         c1++;
1615         c2++;
1616         l++;
1617     }
1618
1619     if (l < lmin)
1620         return (c1[0] > c2[0]) ? 1 : -1;
1621
1622     if (l1 == l2)
1623         return 0;
1624
1625     return (l1 > l2) ? 1 : -1;
1626 }
1627
1628 bool equal(const UString::Rep* r, const UString::Rep* b)
1629 {
1630     int length = r->len;
1631     if (length != b->len)
1632         return false;
1633     const UChar* d = r->data();
1634     const UChar* s = b->data();
1635     for (int i = 0; i != length; ++i) {
1636         if (d[i] != s[i])
1637             return false;
1638     }
1639     return true;
1640 }
1641
1642 CString UString::UTF8String(bool strict) const
1643 {
1644     // Allocate a buffer big enough to hold all the characters.
1645     const int length = size();
1646     Vector<char, 1024> buffer(length * 3);
1647
1648     // Convert to runs of 8-bit characters.
1649     char* p = buffer.data();
1650     const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
1651     ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1652     if (result != conversionOK)
1653         return CString();
1654
1655     return CString(buffer.data(), p - buffer.data());
1656 }
1657
1658 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1659 NEVER_INLINE void UString::makeNull()
1660 {
1661     m_rep = &Rep::null();
1662 }
1663
1664 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
1665 NEVER_INLINE UString::Rep* UString::nullRep()
1666 {
1667     return &Rep::null();
1668 }
1669
1670 } // namespace JSC