wtf/text/StringImpl.cpp

   1 /*
   2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
   3  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
   4  *           (C) 2001 Dirk Mueller ( mueller@kde.org )
   5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
   6  * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Library General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Library General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Library General Public License
  19  * along with this library; see the file COPYING.LIB.  If not, write to
  20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21  * Boston, MA 02110-1301, USA.
  22  *
  23  */
  24
  25 #include "config.h"
  26 #include "StringImpl.h"
  27
  28 #include "AtomicString.h"
  29 #include "StringBuffer.h"
  30 #include "StringHash.h"
  31 #include <wtf/StdLibExtras.h>
  32 #include <wtf/WTFThreadData.h>
  33
  34 using namespace std;
  35
  36 namespace WTF {
  37
  38 using namespace Unicode;
  39
  40 static const unsigned minLengthToShare = 20;
  41
  42 COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small);
  43
  44 StringImpl::~StringImpl()
  45 {
  46     ASSERT(!isStatic());
  47
  48     if (isAtomic())
  49         AtomicString::remove(this);
  50 #if USE(JSC)
  51     if (isIdentifier()) {
  52         if (!wtfThreadData().currentIdentifierTable()->remove(this))
  53             CRASH();
  54     }
  55 #endif
  56
  57     BufferOwnership ownership = bufferOwnership();
  58     if (ownership != BufferInternal) {
  59         if (ownership == BufferOwned) {
  60             ASSERT(!m_sharedBuffer);
  61             ASSERT(m_data);
  62             fastFree(const_cast<UChar*>(m_data));
  63         } else if (ownership == BufferSubstring) {
  64             ASSERT(m_substringBuffer);
  65             m_substringBuffer->deref();
  66         } else {
  67             ASSERT(ownership == BufferShared);
  68             ASSERT(m_sharedBuffer);
  69             m_sharedBuffer->deref();
  70         }
  71     }
  72 }
  73
  74 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
  75 {
  76     if (!length) {
  77         data = 0;
  78         return empty();
  79     }
  80
  81     // Allocate a single buffer large enough to contain the StringImpl
  82     // struct as well as the data which it contains. This removes one
  83     // heap allocation from this call.
  84     if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)))
  85         CRASH();
  86     size_t size = sizeof(StringImpl) + length * sizeof(UChar);
  87     StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
  88
  89     data = reinterpret_cast<UChar*>(string + 1);
  90     return adoptRef(new (string) StringImpl(length));
  91 }
  92
  93 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
  94 {
  95     if (!characters || !length)
  96         return empty();
  97
  98     UChar* data;
  99     RefPtr<StringImpl> string = createUninitialized(length, data);
 100     memcpy(data, characters, length * sizeof(UChar));
 101     return string.release();
 102 }
 103
 104 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
 105 {
 106     if (!characters || !length)
 107         return empty();
 108
 109     UChar* data;
 110     RefPtr<StringImpl> string = createUninitialized(length, data);
 111     for (unsigned i = 0; i != length; ++i) {
 112         unsigned char c = characters[i];
 113         data[i] = c;
 114     }
 115     return string.release();
 116 }
 117
 118 PassRefPtr<StringImpl> StringImpl::create(const char* string)
 119 {
 120     if (!string)
 121         return empty();
 122     size_t length = strlen(string);
 123     if (length > numeric_limits<unsigned>::max())
 124         CRASH();
 125     return create(string, length);
 126 }
 127
 128 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
 129 {
 130     ASSERT(characters);
 131     ASSERT(minLengthToShare && length >= minLengthToShare);
 132     return adoptRef(new StringImpl(characters, length, sharedBuffer));
 133 }
 134
 135 SharedUChar* StringImpl::sharedBuffer()
 136 {
 137     if (m_length < minLengthToShare)
 138         return 0;
 139     // All static strings are smaller that the minimim length to share.
 140     ASSERT(!isStatic());
 141
 142     BufferOwnership ownership = bufferOwnership();
 143
 144     if (ownership == BufferInternal)
 145         return 0;
 146     if (ownership == BufferSubstring)
 147         return m_substringBuffer->sharedBuffer();
 148     if (ownership == BufferOwned) {
 149         ASSERT(!m_sharedBuffer);
 150         m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).leakRef();
 151         m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared;
 152     }
 153
 154     ASSERT(bufferOwnership() == BufferShared);
 155     ASSERT(m_sharedBuffer);
 156     return m_sharedBuffer;
 157 }
 158
 159 bool StringImpl::containsOnlyWhitespace()
 160 {
 161     // FIXME: The definition of whitespace here includes a number of characters
 162     // that are not whitespace from the point of view of RenderText; I wonder if
 163     // that's a problem in practice.
 164     for (unsigned i = 0; i < m_length; i++)
 165         if (!isASCIISpace(m_data[i]))
 166             return false;
 167     return true;
 168 }
 169
 170 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
 171 {
 172     if (start >= m_length)
 173         return empty();
 174     unsigned maxLength = m_length - start;
 175     if (length >= maxLength) {
 176         if (!start)
 177             return this;
 178         length = maxLength;
 179     }
 180     return create(m_data + start, length);
 181 }
 182
 183 UChar32 StringImpl::characterStartingAt(unsigned i)
 184 {
 185     if (U16_IS_SINGLE(m_data[i]))
 186         return m_data[i];
 187     if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
 188         return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
 189     return 0;
 190 }
 191
 192 PassRefPtr<StringImpl> StringImpl::lower()
 193 {
 194     // Note: This is a hot function in the Dromaeo benchmark, specifically the
 195     // no-op code path up through the first 'return' statement.
 196
 197     // First scan the string for uppercase and non-ASCII characters:
 198     UChar ored = 0;
 199     bool noUpper = true;
 200     const UChar *end = m_data + m_length;
 201     for (const UChar* chp = m_data; chp != end; chp++) {
 202         if (UNLIKELY(isASCIIUpper(*chp)))
 203             noUpper = false;
 204         ored |= *chp;
 205     }
 206
 207     // Nothing to do if the string is all ASCII with no uppercase.
 208     if (noUpper && !(ored & ~0x7F))
 209         return this;
 210
 211     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
 212         CRASH();
 213     int32_t length = m_length;
 214
 215     UChar* data;
 216     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 217
 218     if (!(ored & ~0x7F)) {
 219         // Do a faster loop for the case where all the characters are ASCII.
 220         for (int i = 0; i < length; i++) {
 221             UChar c = m_data[i];
 222             data[i] = toASCIILower(c);
 223         }
 224         return newImpl;
 225     }
 226
 227     // Do a slower implementation for cases that include non-ASCII characters.
 228     bool error;
 229     int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
 230     if (!error && realLength == length)
 231         return newImpl;
 232     newImpl = createUninitialized(realLength, data);
 233     Unicode::toLower(data, realLength, m_data, m_length, &error);
 234     if (error)
 235         return this;
 236     return newImpl;
 237 }
 238
 239 PassRefPtr<StringImpl> StringImpl::upper()
 240 {
 241     // This function could be optimized for no-op cases the way lower() is,
 242     // but in empirical testing, few actual calls to upper() are no-ops, so
 243     // it wouldn't be worth the extra time for pre-scanning.
 244     UChar* data;
 245     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 246
 247     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
 248         CRASH();
 249     int32_t length = m_length;
 250
 251     // Do a faster loop for the case where all the characters are ASCII.
 252     UChar ored = 0;
 253     for (int i = 0; i < length; i++) {
 254         UChar c = m_data[i];
 255         ored |= c;
 256         data[i] = toASCIIUpper(c);
 257     }
 258     if (!(ored & ~0x7F))
 259         return newImpl.release();
 260
 261     // Do a slower implementation for cases that include non-ASCII characters.
 262     bool error;
 263     int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
 264     if (!error && realLength == length)
 265         return newImpl;
 266     newImpl = createUninitialized(realLength, data);
 267     Unicode::toUpper(data, realLength, m_data, m_length, &error);
 268     if (error)
 269         return this;
 270     return newImpl.release();
 271 }
 272
 273 PassRefPtr<StringImpl> StringImpl::secure(UChar character, LastCharacterBehavior behavior)
 274 {
 275     if (!m_length)
 276         return this;
 277
 278     UChar* data;
 279     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 280     unsigned lastCharacterIndex = m_length - 1;
 281     for (unsigned i = 0; i < lastCharacterIndex; ++i)
 282         data[i] = character;
 283     data[lastCharacterIndex] = (behavior == ObscureLastCharacter) ? character : m_data[lastCharacterIndex];
 284     return newImpl.release();
 285 }
 286
 287 PassRefPtr<StringImpl> StringImpl::foldCase()
 288 {
 289     UChar* data;
 290     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 291
 292     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
 293         CRASH();
 294     int32_t length = m_length;
 295
 296     // Do a faster loop for the case where all the characters are ASCII.
 297     UChar ored = 0;
 298     for (int32_t i = 0; i < length; i++) {
 299         UChar c = m_data[i];
 300         ored |= c;
 301         data[i] = toASCIILower(c);
 302     }
 303     if (!(ored & ~0x7F))
 304         return newImpl.release();
 305
 306     // Do a slower implementation for cases that include non-ASCII characters.
 307     bool error;
 308     int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
 309     if (!error && realLength == length)
 310         return newImpl.release();
 311     newImpl = createUninitialized(realLength, data);
 312     Unicode::foldCase(data, realLength, m_data, m_length, &error);
 313     if (error)
 314         return this;
 315     return newImpl.release();
 316 }
 317
 318 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
 319 {
 320     if (!m_length)
 321         return empty();
 322
 323     unsigned start = 0;
 324     unsigned end = m_length - 1;
 325
 326     // skip white space from start
 327     while (start <= end && isSpaceOrNewline(m_data[start]))
 328         start++;
 329
 330     // only white space
 331     if (start > end)
 332         return empty();
 333
 334     // skip white space from end
 335     while (end && isSpaceOrNewline(m_data[end]))
 336         end--;
 337
 338     if (!start && end == m_length - 1)
 339         return this;
 340     return create(m_data + start, end + 1 - start);
 341 }
 342
 343 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
 344 {
 345     const UChar* from = m_data;
 346     const UChar* fromend = from + m_length;
 347
 348     // Assume the common case will not remove any characters
 349     while (from != fromend && !findMatch(*from))
 350         from++;
 351     if (from == fromend)
 352         return this;
 353
 354     StringBuffer data(m_length);
 355     UChar* to = data.characters();
 356     unsigned outc = from - m_data;
 357
 358     if (outc)
 359         memcpy(to, m_data, outc * sizeof(UChar));
 360
 361     while (true) {
 362         while (from != fromend && findMatch(*from))
 363             from++;
 364         while (from != fromend && !findMatch(*from))
 365             to[outc++] = *from++;
 366         if (from == fromend)
 367             break;
 368     }
 369
 370     data.shrink(outc);
 371
 372     return adopt(data);
 373 }
 374
 375 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
 376 {
 377     StringBuffer data(m_length);
 378
 379     const UChar* from = m_data;
 380     const UChar* fromend = from + m_length;
 381     int outc = 0;
 382     bool changedToSpace = false;
 383
 384     UChar* to = data.characters();
 385
 386     while (true) {
 387         while (from != fromend && isSpaceOrNewline(*from)) {
 388             if (*from != ' ')
 389                 changedToSpace = true;
 390             from++;
 391         }
 392         while (from != fromend && !isSpaceOrNewline(*from))
 393             to[outc++] = *from++;
 394         if (from != fromend)
 395             to[outc++] = ' ';
 396         else
 397             break;
 398     }
 399
 400     if (outc > 0 && to[outc - 1] == ' ')
 401         outc--;
 402
 403     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
 404         return this;
 405
 406     data.shrink(outc);
 407
 408     return adopt(data);
 409 }
 410
 411 int StringImpl::toIntStrict(bool* ok, int base)
 412 {
 413     return charactersToIntStrict(m_data, m_length, ok, base);
 414 }
 415
 416 unsigned StringImpl::toUIntStrict(bool* ok, int base)
 417 {
 418     return charactersToUIntStrict(m_data, m_length, ok, base);
 419 }
 420
 421 int64_t StringImpl::toInt64Strict(bool* ok, int base)
 422 {
 423     return charactersToInt64Strict(m_data, m_length, ok, base);
 424 }
 425
 426 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
 427 {
 428     return charactersToUInt64Strict(m_data, m_length, ok, base);
 429 }
 430
 431 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
 432 {
 433     return charactersToIntPtrStrict(m_data, m_length, ok, base);
 434 }
 435
 436 int StringImpl::toInt(bool* ok)
 437 {
 438     return charactersToInt(m_data, m_length, ok);
 439 }
 440
 441 unsigned StringImpl::toUInt(bool* ok)
 442 {
 443     return charactersToUInt(m_data, m_length, ok);
 444 }
 445
 446 int64_t StringImpl::toInt64(bool* ok)
 447 {
 448     return charactersToInt64(m_data, m_length, ok);
 449 }
 450
 451 uint64_t StringImpl::toUInt64(bool* ok)
 452 {
 453     return charactersToUInt64(m_data, m_length, ok);
 454 }
 455
 456 intptr_t StringImpl::toIntPtr(bool* ok)
 457 {
 458     return charactersToIntPtr(m_data, m_length, ok);
 459 }
 460
 461 double StringImpl::toDouble(bool* ok, bool* didReadNumber)
 462 {
 463     return charactersToDouble(m_data, m_length, ok, didReadNumber);
 464 }
 465
 466 float StringImpl::toFloat(bool* ok, bool* didReadNumber)
 467 {
 468     return charactersToFloat(m_data, m_length, ok, didReadNumber);
 469 }
 470
 471 static bool equal(const UChar* a, const char* b, int length)
 472 {
 473     ASSERT(length >= 0);
 474     while (length--) {
 475         unsigned char bc = *b++;
 476         if (*a++ != bc)
 477             return false;
 478     }
 479     return true;
 480 }
 481
 482 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
 483 {
 484     while (length--) {
 485         unsigned char bc = *b++;
 486         if (foldCase(*a++) != foldCase(bc))
 487             return false;
 488     }
 489     return true;
 490 }
 491
 492 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
 493 {
 494     ASSERT(length >= 0);
 495     return umemcasecmp(a, b, length) == 0;
 496 }
 497
 498 int codePointCompare(const StringImpl* s1, const StringImpl* s2)
 499 {
 500     const unsigned l1 = s1 ? s1->length() : 0;
 501     const unsigned l2 = s2 ? s2->length() : 0;
 502     const unsigned lmin = l1 < l2 ? l1 : l2;
 503     const UChar* c1 = s1 ? s1->characters() : 0;
 504     const UChar* c2 = s2 ? s2->characters() : 0;
 505     unsigned pos = 0;
 506     while (pos < lmin && *c1 == *c2) {
 507         c1++;
 508         c2++;
 509         pos++;
 510     }
 511
 512     if (pos < lmin)
 513         return (c1[0] > c2[0]) ? 1 : -1;
 514
 515     if (l1 == l2)
 516         return 0;
 517
 518     return (l1 > l2) ? 1 : -1;
 519 }
 520
 521 size_t StringImpl::find(UChar c, unsigned start)
 522 {
 523     return WTF::find(m_data, m_length, c, start);
 524 }
 525
 526 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
 527 {
 528     return WTF::find(m_data, m_length, matchFunction, start);
 529 }
 530
 531 size_t StringImpl::find(const char* matchString, unsigned index)
 532 {
 533     // Check for null or empty string to match against
 534     if (!matchString)
 535         return notFound;
 536     size_t matchStringLength = strlen(matchString);
 537     if (matchStringLength > numeric_limits<unsigned>::max())
 538         CRASH();
 539     unsigned matchLength = matchStringLength;
 540     if (!matchLength)
 541         return min(index, length());
 542
 543     // Optimization 1: fast case for strings of length 1.
 544     if (matchLength == 1)
 545         return WTF::find(characters(), length(), *(const unsigned char*)matchString, index);
 546
 547     // Check index & matchLength are in range.
 548     if (index > length())
 549         return notFound;
 550     unsigned searchLength = length() - index;
 551     if (matchLength > searchLength)
 552         return notFound;
 553     // delta is the number of additional times to test; delta == 0 means test only once.
 554     unsigned delta = searchLength - matchLength;
 555
 556     const UChar* searchCharacters = characters() + index;
 557     const unsigned char* matchCharacters = (const unsigned char*)matchString;
 558
 559     // Optimization 2: keep a running hash of the strings,
 560     // only call memcmp if the hashes match.
 561     unsigned searchHash = 0;
 562     unsigned matchHash = 0;
 563     for (unsigned i = 0; i < matchLength; ++i) {
 564         searchHash += searchCharacters[i];
 565         matchHash += matchCharacters[i];
 566     }
 567
 568     unsigned i = 0;
 569     // keep looping until we match
 570     while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
 571         if (i == delta)
 572             return notFound;
 573         searchHash += searchCharacters[i + matchLength];
 574         searchHash -= searchCharacters[i];
 575         ++i;
 576     }
 577     return index + i;
 578 }
 579
 580 size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index)
 581 {
 582     // Check for null or empty string to match against
 583     if (!matchString)
 584         return notFound;
 585     size_t matchStringLength = strlen(matchString);
 586     if (matchStringLength > numeric_limits<unsigned>::max())
 587         CRASH();
 588     unsigned matchLength = matchStringLength;
 589     if (!matchLength)
 590         return min(index, length());
 591
 592     // Check index & matchLength are in range.
 593     if (index > length())
 594         return notFound;
 595     unsigned searchLength = length() - index;
 596     if (matchLength > searchLength)
 597         return notFound;
 598     // delta is the number of additional times to test; delta == 0 means test only once.
 599     unsigned delta = searchLength - matchLength;
 600
 601     const UChar* searchCharacters = characters() + index;
 602
 603     unsigned i = 0;
 604     // keep looping until we match
 605     while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
 606         if (i == delta)
 607             return notFound;
 608         ++i;
 609     }
 610     return index + i;
 611 }
 612
 613 size_t StringImpl::find(StringImpl* matchString, unsigned index)
 614 {
 615     // Check for null or empty string to match against
 616     if (!matchString)
 617         return notFound;
 618     unsigned matchLength = matchString->length();
 619     if (!matchLength)
 620         return min(index, length());
 621
 622     // Optimization 1: fast case for strings of length 1.
 623     if (matchLength == 1)
 624         return WTF::find(characters(), length(), matchString->characters()[0], index);
 625
 626     // Check index & matchLength are in range.
 627     if (index > length())
 628         return notFound;
 629     unsigned searchLength = length() - index;
 630     if (matchLength > searchLength)
 631         return notFound;
 632     // delta is the number of additional times to test; delta == 0 means test only once.
 633     unsigned delta = searchLength - matchLength;
 634
 635     const UChar* searchCharacters = characters() + index;
 636     const UChar* matchCharacters = matchString->characters();
 637
 638     // Optimization 2: keep a running hash of the strings,
 639     // only call memcmp if the hashes match.
 640     unsigned searchHash = 0;
 641     unsigned matchHash = 0;
 642     for (unsigned i = 0; i < matchLength; ++i) {
 643         searchHash += searchCharacters[i];
 644         matchHash += matchCharacters[i];
 645     }
 646
 647     unsigned i = 0;
 648     // keep looping until we match
 649     while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) {
 650         if (i == delta)
 651             return notFound;
 652         searchHash += searchCharacters[i + matchLength];
 653         searchHash -= searchCharacters[i];
 654         ++i;
 655     }
 656     return index + i;
 657 }
 658
 659 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
 660 {
 661     // Check for null or empty string to match against
 662     if (!matchString)
 663         return notFound;
 664     unsigned matchLength = matchString->length();
 665     if (!matchLength)
 666         return min(index, length());
 667
 668     // Check index & matchLength are in range.
 669     if (index > length())
 670         return notFound;
 671     unsigned searchLength = length() - index;
 672     if (matchLength > searchLength)
 673         return notFound;
 674     // delta is the number of additional times to test; delta == 0 means test only once.
 675     unsigned delta = searchLength - matchLength;
 676
 677     const UChar* searchCharacters = characters() + index;
 678     const UChar* matchCharacters = matchString->characters();
 679
 680     unsigned i = 0;
 681     // keep looping until we match
 682     while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
 683         if (i == delta)
 684             return notFound;
 685         ++i;
 686     }
 687     return index + i;
 688 }
 689
 690 size_t StringImpl::reverseFind(UChar c, unsigned index)
 691 {
 692     return WTF::reverseFind(m_data, m_length, c, index);
 693 }
 694
 695 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
 696 {
 697     // Check for null or empty string to match against
 698     if (!matchString)
 699         return notFound;
 700     unsigned matchLength = matchString->length();
 701     if (!matchLength)
 702         return min(index, length());
 703
 704     // Optimization 1: fast case for strings of length 1.
 705     if (matchLength == 1)
 706         return WTF::reverseFind(characters(), length(), matchString->characters()[0], index);
 707
 708     // Check index & matchLength are in range.
 709     if (matchLength > length())
 710         return notFound;
 711     // delta is the number of additional times to test; delta == 0 means test only once.
 712     unsigned delta = min(index, length() - matchLength);
 713
 714     const UChar *searchCharacters = characters();
 715     const UChar *matchCharacters = matchString->characters();
 716
 717     // Optimization 2: keep a running hash of the strings,
 718     // only call memcmp if the hashes match.
 719     unsigned searchHash = 0;
 720     unsigned matchHash = 0;
 721     for (unsigned i = 0; i < matchLength; ++i) {
 722         searchHash += searchCharacters[delta + i];
 723         matchHash += matchCharacters[i];
 724     }
 725
 726     // keep looping until we match
 727     while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) {
 728         if (!delta)
 729             return notFound;
 730         delta--;
 731         searchHash -= searchCharacters[delta + matchLength];
 732         searchHash += searchCharacters[delta];
 733     }
 734     return delta;
 735 }
 736
 737 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
 738 {
 739     // Check for null or empty string to match against
 740     if (!matchString)
 741         return notFound;
 742     unsigned matchLength = matchString->length();
 743     if (!matchLength)
 744         return min(index, length());
 745
 746     // Check index & matchLength are in range.
 747     if (matchLength > length())
 748         return notFound;
 749     // delta is the number of additional times to test; delta == 0 means test only once.
 750     unsigned delta = min(index, length() - matchLength);
 751
 752     const UChar *searchCharacters = characters();
 753     const UChar *matchCharacters = matchString->characters();
 754
 755     // keep looping until we match
 756     while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
 757         if (!delta)
 758             return notFound;
 759         delta--;
 760     }
 761     return delta;
 762 }
 763
 764 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
 765 {
 766     ASSERT(matchString);
 767     if (m_length >= matchString->m_length) {
 768         unsigned start = m_length - matchString->m_length;
 769         return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
 770     }
 771     return false;
 772 }
 773
 774 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
 775 {
 776     if (oldC == newC)
 777         return this;
 778     unsigned i;
 779     for (i = 0; i != m_length; ++i)
 780         if (m_data[i] == oldC)
 781             break;
 782     if (i == m_length)
 783         return this;
 784
 785     UChar* data;
 786     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 787
 788     for (i = 0; i != m_length; ++i) {
 789         UChar ch = m_data[i];
 790         if (ch == oldC)
 791             ch = newC;
 792         data[i] = ch;
 793     }
 794     return newImpl.release();
 795 }
 796
 797 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
 798 {
 799     position = min(position, length());
 800     lengthToReplace = min(lengthToReplace, length() - position);
 801     unsigned lengthToInsert = str ? str->length() : 0;
 802     if (!lengthToReplace && !lengthToInsert)
 803         return this;
 804     UChar* data;
 805
 806     if ((length() - lengthToReplace) >= (numeric_limits<unsigned>::max() - lengthToInsert))
 807         CRASH();
 808
 809     RefPtr<StringImpl> newImpl =
 810         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
 811     memcpy(data, characters(), position * sizeof(UChar));
 812     if (str)
 813         memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
 814     memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
 815         (length() - position - lengthToReplace) * sizeof(UChar));
 816     return newImpl.release();
 817 }
 818
 819 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
 820 {
 821     if (!replacement)
 822         return this;
 823
 824     unsigned repStrLength = replacement->length();
 825     size_t srcSegmentStart = 0;
 826     unsigned matchCount = 0;
 827
 828     // Count the matches
 829     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
 830         ++matchCount;
 831         ++srcSegmentStart;
 832     }
 833
 834     // If we have 0 matches, we don't have to do any more work
 835     if (!matchCount)
 836         return this;
 837
 838     if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
 839         CRASH();
 840
 841     unsigned replaceSize = matchCount * repStrLength;
 842     unsigned newSize = m_length - matchCount;
 843     if (newSize >= (numeric_limits<unsigned>::max() - replaceSize))
 844         CRASH();
 845
 846     newSize += replaceSize;
 847
 848     UChar* data;
 849     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
 850
 851     // Construct the new data
 852     size_t srcSegmentEnd;
 853     unsigned srcSegmentLength;
 854     srcSegmentStart = 0;
 855     unsigned dstOffset = 0;
 856
 857     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
 858         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
 859         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 860         dstOffset += srcSegmentLength;
 861         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
 862         dstOffset += repStrLength;
 863         srcSegmentStart = srcSegmentEnd + 1;
 864     }
 865
 866     srcSegmentLength = m_length - srcSegmentStart;
 867     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 868
 869     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
 870
 871     return newImpl.release();
 872 }
 873
 874 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
 875 {
 876     if (!pattern || !replacement)
 877         return this;
 878
 879     unsigned patternLength = pattern->length();
 880     if (!patternLength)
 881         return this;
 882
 883     unsigned repStrLength = replacement->length();
 884     size_t srcSegmentStart = 0;
 885     unsigned matchCount = 0;
 886
 887     // Count the matches
 888     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
 889         ++matchCount;
 890         srcSegmentStart += patternLength;
 891     }
 892
 893     // If we have 0 matches, we don't have to do any more work
 894     if (!matchCount)
 895         return this;
 896
 897     unsigned newSize = m_length - matchCount * patternLength;
 898     if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
 899         CRASH();
 900
 901     if (newSize > (numeric_limits<unsigned>::max() - matchCount * repStrLength))
 902         CRASH();
 903
 904     newSize += matchCount * repStrLength;
 905
 906     UChar* data;
 907     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
 908
 909     // Construct the new data
 910     size_t srcSegmentEnd;
 911     unsigned srcSegmentLength;
 912     srcSegmentStart = 0;
 913     unsigned dstOffset = 0;
 914
 915     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
 916         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
 917         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 918         dstOffset += srcSegmentLength;
 919         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
 920         dstOffset += repStrLength;
 921         srcSegmentStart = srcSegmentEnd + patternLength;
 922     }
 923
 924     srcSegmentLength = m_length - srcSegmentStart;
 925     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 926
 927     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
 928
 929     return newImpl.release();
 930 }
 931
 932 bool equal(const StringImpl* a, const StringImpl* b)
 933 {
 934     return StringHash::equal(a, b);
 935 }
 936
 937 bool equal(const StringImpl* a, const char* b)
 938 {
 939     if (!a)
 940         return !b;
 941     if (!b)
 942         return !a;
 943
 944     unsigned length = a->length();
 945     const UChar* as = a->characters();
 946     for (unsigned i = 0; i != length; ++i) {
 947         unsigned char bc = b[i];
 948         if (!bc)
 949             return false;
 950         if (as[i] != bc)
 951             return false;
 952     }
 953
 954     return !b[length];
 955 }
 956
 957 bool equalIgnoringCase(StringImpl* a, StringImpl* b)
 958 {
 959     return CaseFoldingHash::equal(a, b);
 960 }
 961
 962 bool equalIgnoringCase(StringImpl* a, const char* b)
 963 {
 964     if (!a)
 965         return !b;
 966     if (!b)
 967         return !a;
 968
 969     unsigned length = a->length();
 970     const UChar* as = a->characters();
 971
 972     // Do a faster loop for the case where all the characters are ASCII.
 973     UChar ored = 0;
 974     bool equal = true;
 975     for (unsigned i = 0; i != length; ++i) {
 976         char bc = b[i];
 977         if (!bc)
 978             return false;
 979         UChar ac = as[i];
 980         ored |= ac;
 981         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
 982     }
 983
 984     // Do a slower implementation for cases that include non-ASCII characters.
 985     if (ored & ~0x7F) {
 986         equal = true;
 987         for (unsigned i = 0; i != length; ++i) {
 988             unsigned char bc = b[i];
 989             equal = equal && (foldCase(as[i]) == foldCase(bc));
 990         }
 991     }
 992
 993     return equal && !b[length];
 994 }
 995
 996 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
 997 {
 998     if (StringHash::equal(a, b))
 999         return true;
1000     if (!a && b && !b->length())
1001         return true;
1002     if (!b && a && !a->length())
1003         return true;
1004
1005     return false;
1006 }
1007
1008 WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirectionality)
1009 {
1010     for (unsigned i = 0; i < m_length; ++i) {
1011         WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
1012         if (charDirection == WTF::Unicode::LeftToRight) {
1013             if (hasStrongDirectionality)
1014                 *hasStrongDirectionality = true;
1015             return WTF::Unicode::LeftToRight;
1016         }
1017         if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) {
1018             if (hasStrongDirectionality)
1019                 *hasStrongDirectionality = true;
1020             return WTF::Unicode::RightToLeft;
1021         }
1022     }
1023     if (hasStrongDirectionality)
1024         *hasStrongDirectionality = false;
1025     return WTF::Unicode::LeftToRight;
1026 }
1027
1028 // This is a hot function because it's used when parsing HTML.
1029 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
1030 {
1031     StringBuffer strippedCopy(length);
1032     unsigned strippedLength = 0;
1033     for (unsigned i = 0; i < length; i++) {
1034         if (int c = characters[i])
1035             strippedCopy[strippedLength++] = c;
1036     }
1037     ASSERT(strippedLength < length);  // Only take the slow case when stripping.
1038     strippedCopy.shrink(strippedLength);
1039     return adopt(strippedCopy);
1040 }
1041
1042 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
1043 {
1044     unsigned length = buffer.length();
1045     if (length == 0)
1046         return empty();
1047     return adoptRef(new StringImpl(buffer.release(), length));
1048 }
1049
1050 int StringImpl::wordCount(int maxWordsToCount)
1051 {
1052     unsigned wordCount = 0;
1053     unsigned i;
1054     bool atWord = false;
1055     for (i = 0; i < m_length; i++) {
1056         if (u_isspace(m_data[i])) {
1057             atWord = false;
1058         } else if (!atWord) {
1059             wordCount++;
1060             if (wordCount >= (unsigned)maxWordsToCount)
1061                 return wordCount;
1062             atWord = true;
1063         }
1064     }
1065     return wordCount;
1066 }
1067
1068 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
1069 {
1070     // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer
1071     // get allocated in a single memory block.
1072     UChar* data;
1073     unsigned length = string.m_length;
1074     if (length >= numeric_limits<unsigned>::max())
1075         CRASH();
1076     RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data);
1077     memcpy(data, string.m_data, length * sizeof(UChar));
1078     data[length] = 0;
1079     terminatedString->m_length--;
1080     terminatedString->m_hash = string.m_hash;
1081     terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter;
1082     return terminatedString.release();
1083 }
1084
1085 PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const
1086 {
1087     return create(m_data, m_length);
1088 }
1089
1090 PassRefPtr<StringImpl> StringImpl::crossThreadString()
1091 {
1092     if (SharedUChar* sharedBuffer = this->sharedBuffer())
1093         return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy()));
1094
1095     // If no shared buffer is available, create a copy.
1096     return threadsafeCopy();
1097 }
1098
1099 } // namespace WTF