wtf/text/StringImpl.cpp

   1 /*
   2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
   3  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
   4  *           (C) 2001 Dirk Mueller ( mueller@kde.org )
   5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
   6  * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Library General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Library General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Library General Public License
  19  * along with this library; see the file COPYING.LIB.  If not, write to
  20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21  * Boston, MA 02110-1301, USA.
  22  *
  23  */
  24
  25 #include "config.h"
  26 #include "StringImpl.h"
  27
  28 #include "AtomicString.h"
  29 #include "StringBuffer.h"
  30 #include "StringHash.h"
  31 #include <wtf/StdLibExtras.h>
  32 #include <wtf/WTFThreadData.h>
  33
  34 using namespace WTF;
  35 using namespace Unicode;
  36 using namespace std;
  37
  38 namespace WebCore {
  39
  40 static const unsigned minLengthToShare = 20;
  41
  42 StringImpl::~StringImpl()
  43 {
  44     ASSERT(!isStatic());
  45
  46     if (isAtomic())
  47         AtomicString::remove(this);
  48 #if USE(JSC)
  49     if (isIdentifier())
  50         wtfThreadData().currentIdentifierTable()->remove(this);
  51 #endif
  52
  53     BufferOwnership ownership = bufferOwnership();
  54     if (ownership != BufferInternal) {
  55         if (ownership == BufferOwned) {
  56             ASSERT(!m_sharedBuffer);
  57             ASSERT(m_data);
  58             fastFree(const_cast<UChar*>(m_data));
  59         } else if (ownership == BufferSubstring) {
  60             ASSERT(m_substringBuffer);
  61             m_substringBuffer->deref();
  62         } else {
  63             ASSERT(ownership == BufferShared);
  64             ASSERT(m_sharedBuffer);
  65             m_sharedBuffer->deref();
  66         }
  67     }
  68 }
  69
  70 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
  71 {
  72     if (!length) {
  73         data = 0;
  74         return empty();
  75     }
  76
  77     // Allocate a single buffer large enough to contain the StringImpl
  78     // struct as well as the data which it contains. This removes one
  79     // heap allocation from this call.
  80     if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)))
  81         CRASH();
  82     size_t size = sizeof(StringImpl) + length * sizeof(UChar);
  83     StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
  84
  85     data = reinterpret_cast<UChar*>(string + 1);
  86     return adoptRef(new (string) StringImpl(length));
  87 }
  88
  89 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
  90 {
  91     if (!characters || !length)
  92         return empty();
  93
  94     UChar* data;
  95     RefPtr<StringImpl> string = createUninitialized(length, data);
  96     memcpy(data, characters, length * sizeof(UChar));
  97     return string.release();
  98 }
  99
 100 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
 101 {
 102     if (!characters || !length)
 103         return empty();
 104
 105     UChar* data;
 106     RefPtr<StringImpl> string = createUninitialized(length, data);
 107     for (unsigned i = 0; i != length; ++i) {
 108         unsigned char c = characters[i];
 109         data[i] = c;
 110     }
 111     return string.release();
 112 }
 113
 114 PassRefPtr<StringImpl> StringImpl::create(const char* string)
 115 {
 116     if (!string)
 117         return empty();
 118     size_t length = strlen(string);
 119     if (length > numeric_limits<unsigned>::max())
 120         CRASH();
 121     return create(string, length);
 122 }
 123
 124 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
 125 {
 126     ASSERT(characters);
 127     ASSERT(minLengthToShare && length >= minLengthToShare);
 128     return adoptRef(new StringImpl(characters, length, sharedBuffer));
 129 }
 130
 131 SharedUChar* StringImpl::sharedBuffer()
 132 {
 133     if (m_length < minLengthToShare)
 134         return 0;
 135     // All static strings are smaller that the minimim length to share.
 136     ASSERT(!isStatic());
 137
 138     BufferOwnership ownership = bufferOwnership();
 139
 140     if (ownership == BufferInternal)
 141         return 0;
 142     if (ownership == BufferSubstring)
 143         return m_substringBuffer->sharedBuffer();
 144     if (ownership == BufferOwned) {
 145         ASSERT(!m_sharedBuffer);
 146         m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).releaseRef();
 147         m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared;
 148     }
 149
 150     ASSERT(bufferOwnership() == BufferShared);
 151     ASSERT(m_sharedBuffer);
 152     return m_sharedBuffer;
 153 }
 154
 155 bool StringImpl::containsOnlyWhitespace()
 156 {
 157     // FIXME: The definition of whitespace here includes a number of characters
 158     // that are not whitespace from the point of view of RenderText; I wonder if
 159     // that's a problem in practice.
 160     for (unsigned i = 0; i < m_length; i++)
 161         if (!isASCIISpace(m_data[i]))
 162             return false;
 163     return true;
 164 }
 165
 166 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
 167 {
 168     if (start >= m_length)
 169         return empty();
 170     unsigned maxLength = m_length - start;
 171     if (length >= maxLength) {
 172         if (!start)
 173             return this;
 174         length = maxLength;
 175     }
 176     return create(m_data + start, length);
 177 }
 178
 179 UChar32 StringImpl::characterStartingAt(unsigned i)
 180 {
 181     if (U16_IS_SINGLE(m_data[i]))
 182         return m_data[i];
 183     if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
 184         return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
 185     return 0;
 186 }
 187
 188 PassRefPtr<StringImpl> StringImpl::lower()
 189 {
 190     // Note: This is a hot function in the Dromaeo benchmark, specifically the
 191     // no-op code path up through the first 'return' statement.
 192
 193     // First scan the string for uppercase and non-ASCII characters:
 194     UChar ored = 0;
 195     bool noUpper = true;
 196     const UChar *end = m_data + m_length;
 197     for (const UChar* chp = m_data; chp != end; chp++) {
 198         if (UNLIKELY(isASCIIUpper(*chp)))
 199             noUpper = false;
 200         ored |= *chp;
 201     }
 202
 203     // Nothing to do if the string is all ASCII with no uppercase.
 204     if (noUpper && !(ored & ~0x7F))
 205         return this;
 206
 207     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
 208         CRASH();
 209     int32_t length = m_length;
 210
 211     UChar* data;
 212     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 213
 214     if (!(ored & ~0x7F)) {
 215         // Do a faster loop for the case where all the characters are ASCII.
 216         for (int i = 0; i < length; i++) {
 217             UChar c = m_data[i];
 218             data[i] = toASCIILower(c);
 219         }
 220         return newImpl;
 221     }
 222
 223     // Do a slower implementation for cases that include non-ASCII characters.
 224     bool error;
 225     int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
 226     if (!error && realLength == length)
 227         return newImpl;
 228     newImpl = createUninitialized(realLength, data);
 229     Unicode::toLower(data, realLength, m_data, m_length, &error);
 230     if (error)
 231         return this;
 232     return newImpl;
 233 }
 234
 235 PassRefPtr<StringImpl> StringImpl::upper()
 236 {
 237     // This function could be optimized for no-op cases the way lower() is,
 238     // but in empirical testing, few actual calls to upper() are no-ops, so
 239     // it wouldn't be worth the extra time for pre-scanning.
 240     UChar* data;
 241     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 242
 243     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
 244         CRASH();
 245     int32_t length = m_length;
 246
 247     // Do a faster loop for the case where all the characters are ASCII.
 248     UChar ored = 0;
 249     for (int i = 0; i < length; i++) {
 250         UChar c = m_data[i];
 251         ored |= c;
 252         data[i] = toASCIIUpper(c);
 253     }
 254     if (!(ored & ~0x7F))
 255         return newImpl.release();
 256
 257     // Do a slower implementation for cases that include non-ASCII characters.
 258     bool error;
 259     int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
 260     if (!error && realLength == length)
 261         return newImpl;
 262     newImpl = createUninitialized(realLength, data);
 263     Unicode::toUpper(data, realLength, m_data, m_length, &error);
 264     if (error)
 265         return this;
 266     return newImpl.release();
 267 }
 268
 269 PassRefPtr<StringImpl> StringImpl::secure(UChar character, bool hideLastCharacter)
 270 {
 271     UChar* data;
 272     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 273     if (m_length) {
 274         const unsigned lastCharacterIndex = m_length - 1;
 275         for (unsigned i = 0; i < lastCharacterIndex; ++i)
 276             data[i] = character;
 277         data[lastCharacterIndex] = hideLastCharacter ? character : m_data[lastCharacterIndex];
 278     }
 279     return newImpl.release();
 280 }
 281
 282 PassRefPtr<StringImpl> StringImpl::foldCase()
 283 {
 284     UChar* data;
 285     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 286
 287     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
 288         CRASH();
 289     int32_t length = m_length;
 290
 291     // Do a faster loop for the case where all the characters are ASCII.
 292     UChar ored = 0;
 293     for (int32_t i = 0; i < length; i++) {
 294         UChar c = m_data[i];
 295         ored |= c;
 296         data[i] = toASCIILower(c);
 297     }
 298     if (!(ored & ~0x7F))
 299         return newImpl.release();
 300
 301     // Do a slower implementation for cases that include non-ASCII characters.
 302     bool error;
 303     int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
 304     if (!error && realLength == length)
 305         return newImpl.release();
 306     newImpl = createUninitialized(realLength, data);
 307     Unicode::foldCase(data, realLength, m_data, m_length, &error);
 308     if (error)
 309         return this;
 310     return newImpl.release();
 311 }
 312
 313 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
 314 {
 315     if (!m_length)
 316         return empty();
 317
 318     unsigned start = 0;
 319     unsigned end = m_length - 1;
 320
 321     // skip white space from start
 322     while (start <= end && isSpaceOrNewline(m_data[start]))
 323         start++;
 324
 325     // only white space
 326     if (start > end)
 327         return empty();
 328
 329     // skip white space from end
 330     while (end && isSpaceOrNewline(m_data[end]))
 331         end--;
 332
 333     if (!start && end == m_length - 1)
 334         return this;
 335     return create(m_data + start, end + 1 - start);
 336 }
 337
 338 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
 339 {
 340     const UChar* from = m_data;
 341     const UChar* fromend = from + m_length;
 342
 343     // Assume the common case will not remove any characters
 344     while (from != fromend && !findMatch(*from))
 345         from++;
 346     if (from == fromend)
 347         return this;
 348
 349     StringBuffer data(m_length);
 350     UChar* to = data.characters();
 351     unsigned outc = from - m_data;
 352
 353     if (outc)
 354         memcpy(to, m_data, outc * sizeof(UChar));
 355
 356     while (true) {
 357         while (from != fromend && findMatch(*from))
 358             from++;
 359         while (from != fromend && !findMatch(*from))
 360             to[outc++] = *from++;
 361         if (from == fromend)
 362             break;
 363     }
 364
 365     data.shrink(outc);
 366
 367     return adopt(data);
 368 }
 369
 370 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
 371 {
 372     StringBuffer data(m_length);
 373
 374     const UChar* from = m_data;
 375     const UChar* fromend = from + m_length;
 376     int outc = 0;
 377     bool changedToSpace = false;
 378
 379     UChar* to = data.characters();
 380
 381     while (true) {
 382         while (from != fromend && isSpaceOrNewline(*from)) {
 383             if (*from != ' ')
 384                 changedToSpace = true;
 385             from++;
 386         }
 387         while (from != fromend && !isSpaceOrNewline(*from))
 388             to[outc++] = *from++;
 389         if (from != fromend)
 390             to[outc++] = ' ';
 391         else
 392             break;
 393     }
 394
 395     if (outc > 0 && to[outc - 1] == ' ')
 396         outc--;
 397
 398     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
 399         return this;
 400
 401     data.shrink(outc);
 402
 403     return adopt(data);
 404 }
 405
 406 int StringImpl::toIntStrict(bool* ok, int base)
 407 {
 408     return charactersToIntStrict(m_data, m_length, ok, base);
 409 }
 410
 411 unsigned StringImpl::toUIntStrict(bool* ok, int base)
 412 {
 413     return charactersToUIntStrict(m_data, m_length, ok, base);
 414 }
 415
 416 int64_t StringImpl::toInt64Strict(bool* ok, int base)
 417 {
 418     return charactersToInt64Strict(m_data, m_length, ok, base);
 419 }
 420
 421 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
 422 {
 423     return charactersToUInt64Strict(m_data, m_length, ok, base);
 424 }
 425
 426 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
 427 {
 428     return charactersToIntPtrStrict(m_data, m_length, ok, base);
 429 }
 430
 431 int StringImpl::toInt(bool* ok)
 432 {
 433     return charactersToInt(m_data, m_length, ok);
 434 }
 435
 436 unsigned StringImpl::toUInt(bool* ok)
 437 {
 438     return charactersToUInt(m_data, m_length, ok);
 439 }
 440
 441 int64_t StringImpl::toInt64(bool* ok)
 442 {
 443     return charactersToInt64(m_data, m_length, ok);
 444 }
 445
 446 uint64_t StringImpl::toUInt64(bool* ok)
 447 {
 448     return charactersToUInt64(m_data, m_length, ok);
 449 }
 450
 451 intptr_t StringImpl::toIntPtr(bool* ok)
 452 {
 453     return charactersToIntPtr(m_data, m_length, ok);
 454 }
 455
 456 double StringImpl::toDouble(bool* ok)
 457 {
 458     return charactersToDouble(m_data, m_length, ok);
 459 }
 460
 461 float StringImpl::toFloat(bool* ok)
 462 {
 463     return charactersToFloat(m_data, m_length, ok);
 464 }
 465
 466 static bool equal(const UChar* a, const char* b, int length)
 467 {
 468     ASSERT(length >= 0);
 469     while (length--) {
 470         unsigned char bc = *b++;
 471         if (*a++ != bc)
 472             return false;
 473     }
 474     return true;
 475 }
 476
 477 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
 478 {
 479     while (length--) {
 480         unsigned char bc = *b++;
 481         if (foldCase(*a++) != foldCase(bc))
 482             return false;
 483     }
 484     return true;
 485 }
 486
 487 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
 488 {
 489     ASSERT(length >= 0);
 490     return umemcasecmp(a, b, length) == 0;
 491 }
 492
 493 int StringImpl::find(const char* chs, int index, bool caseSensitive)
 494 {
 495     if (!chs || index < 0)
 496         return -1;
 497
 498     size_t matchStringLength = strlen(chs);
 499     if (matchStringLength > static_cast<unsigned>(numeric_limits<int>::max()))
 500         CRASH();
 501     int chsLength = matchStringLength;
 502     int n = m_length - index;
 503     if (n < 0)
 504         return -1;
 505     n -= chsLength - 1;
 506     if (n <= 0)
 507         return -1;
 508
 509     const char* chsPlusOne = chs + 1;
 510     int chsLengthMinusOne = chsLength - 1;
 511
 512     const UChar* ptr = m_data + index - 1;
 513     if (caseSensitive) {
 514         UChar c = *chs;
 515         do {
 516             if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
 517                 return m_length - chsLength - n + 1;
 518         } while (--n);
 519     } else {
 520         UChar lc = Unicode::foldCase(*chs);
 521         do {
 522             if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
 523                 return m_length - chsLength - n + 1;
 524         } while (--n);
 525     }
 526
 527     return -1;
 528 }
 529
 530 int StringImpl::find(UChar c, int start)
 531 {
 532     return WebCore::find(m_data, m_length, c, start);
 533 }
 534
 535 int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start)
 536 {
 537     return WebCore::find(m_data, m_length, matchFunction, start);
 538 }
 539
 540 int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
 541 {
 542     /*
 543       We use a simple trick for efficiency's sake. Instead of
 544       comparing strings, we compare the sum of str with that of
 545       a part of this string. Only if that matches, we call memcmp
 546       or ucstrnicmp.
 547     */
 548     ASSERT(str);
 549     if (index < 0)
 550         index += m_length;
 551     int lstr = str->m_length;
 552     int lthis = m_length - index;
 553     if ((unsigned)lthis > m_length)
 554         return -1;
 555     int delta = lthis - lstr;
 556     if (delta < 0)
 557         return -1;
 558
 559     const UChar* uthis = m_data + index;
 560     const UChar* ustr = str->m_data;
 561     unsigned hthis = 0;
 562     unsigned hstr = 0;
 563     if (caseSensitive) {
 564         for (int i = 0; i < lstr; i++) {
 565             hthis += uthis[i];
 566             hstr += ustr[i];
 567         }
 568         int i = 0;
 569         while (1) {
 570             if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
 571                 return index + i;
 572             if (i == delta)
 573                 return -1;
 574             hthis += uthis[i + lstr];
 575             hthis -= uthis[i];
 576             i++;
 577         }
 578     } else {
 579         for (int i = 0; i < lstr; i++ ) {
 580             hthis += toASCIILower(uthis[i]);
 581             hstr += toASCIILower(ustr[i]);
 582         }
 583         int i = 0;
 584         while (1) {
 585             if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
 586                 return index + i;
 587             if (i == delta)
 588                 return -1;
 589             hthis += toASCIILower(uthis[i + lstr]);
 590             hthis -= toASCIILower(uthis[i]);
 591             i++;
 592         }
 593     }
 594 }
 595
 596 int StringImpl::reverseFind(UChar c, int index)
 597 {
 598     return WebCore::reverseFind(m_data, m_length, c, index);
 599 }
 600
 601 int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
 602 {
 603     /*
 604      See StringImpl::find() for explanations.
 605      */
 606     ASSERT(str);
 607     int lthis = m_length;
 608     if (index < 0)
 609         index += lthis;
 610
 611     int lstr = str->m_length;
 612     int delta = lthis - lstr;
 613     if ( index < 0 || index > lthis || delta < 0 )
 614         return -1;
 615     if ( index > delta )
 616         index = delta;
 617
 618     const UChar *uthis = m_data;
 619     const UChar *ustr = str->m_data;
 620     unsigned hthis = 0;
 621     unsigned hstr = 0;
 622     int i;
 623     if (caseSensitive) {
 624         for ( i = 0; i < lstr; i++ ) {
 625             hthis += uthis[index + i];
 626             hstr += ustr[i];
 627         }
 628         i = index;
 629         while (1) {
 630             if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
 631                 return i;
 632             if (i == 0)
 633                 return -1;
 634             i--;
 635             hthis -= uthis[i + lstr];
 636             hthis += uthis[i];
 637         }
 638     } else {
 639         for (i = 0; i < lstr; i++) {
 640             hthis += toASCIILower(uthis[index + i]);
 641             hstr += toASCIILower(ustr[i]);
 642         }
 643         i = index;
 644         while (1) {
 645             if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
 646                 return i;
 647             if (i == 0)
 648                 return -1;
 649             i--;
 650             hthis -= toASCIILower(uthis[i + lstr]);
 651             hthis += toASCIILower(uthis[i]);
 652         }
 653     }
 654
 655     // Should never get here.
 656     return -1;
 657 }
 658
 659 bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
 660 {
 661     ASSERT(m_data);
 662     int start = m_length - m_data->m_length;
 663     if (start >= 0)
 664         return (find(m_data, start, caseSensitive) == start);
 665     return false;
 666 }
 667
 668 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
 669 {
 670     if (oldC == newC)
 671         return this;
 672     unsigned i;
 673     for (i = 0; i != m_length; ++i)
 674         if (m_data[i] == oldC)
 675             break;
 676     if (i == m_length)
 677         return this;
 678
 679     UChar* data;
 680     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
 681
 682     for (i = 0; i != m_length; ++i) {
 683         UChar ch = m_data[i];
 684         if (ch == oldC)
 685             ch = newC;
 686         data[i] = ch;
 687     }
 688     return newImpl.release();
 689 }
 690
 691 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
 692 {
 693     position = min(position, length());
 694     lengthToReplace = min(lengthToReplace, length() - position);
 695     unsigned lengthToInsert = str ? str->length() : 0;
 696     if (!lengthToReplace && !lengthToInsert)
 697         return this;
 698     UChar* data;
 699
 700     if ((length() - lengthToReplace) >= (numeric_limits<unsigned>::max() - lengthToInsert))
 701         CRASH();
 702
 703     RefPtr<StringImpl> newImpl =
 704         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
 705     memcpy(data, characters(), position * sizeof(UChar));
 706     if (str)
 707         memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
 708     memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
 709         (length() - position - lengthToReplace) * sizeof(UChar));
 710     return newImpl.release();
 711 }
 712
 713 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
 714 {
 715     if (!replacement)
 716         return this;
 717
 718     int repStrLength = replacement->length();
 719     int srcSegmentStart = 0;
 720     unsigned matchCount = 0;
 721
 722     // Count the matches
 723     while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
 724         ++matchCount;
 725         ++srcSegmentStart;
 726     }
 727
 728     // If we have 0 matches, we don't have to do any more work
 729     if (!matchCount)
 730         return this;
 731
 732     if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
 733         CRASH();
 734
 735     unsigned replaceSize = matchCount * repStrLength;
 736     unsigned newSize = m_length - matchCount;
 737     if (newSize >= (numeric_limits<unsigned>::max() - replaceSize))
 738         CRASH();
 739
 740     newSize += replaceSize;
 741
 742     UChar* data;
 743     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
 744
 745     // Construct the new data
 746     int srcSegmentEnd;
 747     int srcSegmentLength;
 748     srcSegmentStart = 0;
 749     int dstOffset = 0;
 750
 751     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
 752         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
 753         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 754         dstOffset += srcSegmentLength;
 755         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
 756         dstOffset += repStrLength;
 757         srcSegmentStart = srcSegmentEnd + 1;
 758     }
 759
 760     srcSegmentLength = m_length - srcSegmentStart;
 761     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 762
 763     ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
 764
 765     return newImpl.release();
 766 }
 767
 768 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
 769 {
 770     if (!pattern || !replacement)
 771         return this;
 772
 773     int patternLength = pattern->length();
 774     if (!patternLength)
 775         return this;
 776
 777     int repStrLength = replacement->length();
 778     int srcSegmentStart = 0;
 779     unsigned matchCount = 0;
 780
 781     // Count the matches
 782     while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
 783         ++matchCount;
 784         srcSegmentStart += patternLength;
 785     }
 786
 787     // If we have 0 matches, we don't have to do any more work
 788     if (!matchCount)
 789         return this;
 790
 791     unsigned newSize = m_length - matchCount * patternLength;
 792     if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
 793         CRASH();
 794
 795     if (newSize > (numeric_limits<unsigned>::max() - matchCount * repStrLength))
 796         CRASH();
 797
 798     newSize += matchCount * repStrLength;
 799
 800     UChar* data;
 801     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
 802
 803     // Construct the new data
 804     int srcSegmentEnd;
 805     int srcSegmentLength;
 806     srcSegmentStart = 0;
 807     int dstOffset = 0;
 808
 809     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
 810         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
 811         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 812         dstOffset += srcSegmentLength;
 813         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
 814         dstOffset += repStrLength;
 815         srcSegmentStart = srcSegmentEnd + patternLength;
 816     }
 817
 818     srcSegmentLength = m_length - srcSegmentStart;
 819     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
 820
 821     ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
 822
 823     return newImpl.release();
 824 }
 825
 826 bool equal(const StringImpl* a, const StringImpl* b)
 827 {
 828     return StringHash::equal(a, b);
 829 }
 830
 831 bool equal(const StringImpl* a, const char* b)
 832 {
 833     if (!a)
 834         return !b;
 835     if (!b)
 836         return !a;
 837
 838     unsigned length = a->length();
 839     const UChar* as = a->characters();
 840     for (unsigned i = 0; i != length; ++i) {
 841         unsigned char bc = b[i];
 842         if (!bc)
 843             return false;
 844         if (as[i] != bc)
 845             return false;
 846     }
 847
 848     return !b[length];
 849 }
 850
 851 bool equalIgnoringCase(StringImpl* a, StringImpl* b)
 852 {
 853     return CaseFoldingHash::equal(a, b);
 854 }
 855
 856 bool equalIgnoringCase(StringImpl* a, const char* b)
 857 {
 858     if (!a)
 859         return !b;
 860     if (!b)
 861         return !a;
 862
 863     unsigned length = a->length();
 864     const UChar* as = a->characters();
 865
 866     // Do a faster loop for the case where all the characters are ASCII.
 867     UChar ored = 0;
 868     bool equal = true;
 869     for (unsigned i = 0; i != length; ++i) {
 870         char bc = b[i];
 871         if (!bc)
 872             return false;
 873         UChar ac = as[i];
 874         ored |= ac;
 875         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
 876     }
 877
 878     // Do a slower implementation for cases that include non-ASCII characters.
 879     if (ored & ~0x7F) {
 880         equal = true;
 881         for (unsigned i = 0; i != length; ++i) {
 882             unsigned char bc = b[i];
 883             equal = equal && (foldCase(as[i]) == foldCase(bc));
 884         }
 885     }
 886
 887     return equal && !b[length];
 888 }
 889
 890 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
 891 {
 892     if (StringHash::equal(a, b))
 893         return true;
 894     if (!a && b && !b->length())
 895         return true;
 896     if (!b && a && !a->length())
 897         return true;
 898
 899     return false;
 900 }
 901
 902 Vector<char> StringImpl::ascii()
 903 {
 904     Vector<char> buffer(m_length + 1);
 905     for (unsigned i = 0; i != m_length; ++i) {
 906         UChar c = m_data[i];
 907         if ((c >= 0x20 && c < 0x7F) || c == 0x00)
 908             buffer[i] = static_cast<char>(c);
 909         else
 910             buffer[i] = '?';
 911     }
 912     buffer[m_length] = '\0';
 913     return buffer;
 914 }
 915
 916 WTF::Unicode::Direction StringImpl::defaultWritingDirection()
 917 {
 918     for (unsigned i = 0; i < m_length; ++i) {
 919         WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
 920         if (charDirection == WTF::Unicode::LeftToRight)
 921             return WTF::Unicode::LeftToRight;
 922         if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic)
 923             return WTF::Unicode::RightToLeft;
 924     }
 925     return WTF::Unicode::LeftToRight;
 926 }
 927
 928 // This is a hot function because it's used when parsing HTML.
 929 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
 930 {
 931     StringBuffer strippedCopy(length);
 932     unsigned strippedLength = 0;
 933     for (unsigned i = 0; i < length; i++) {
 934         if (int c = characters[i])
 935             strippedCopy[strippedLength++] = c;
 936     }
 937     ASSERT(strippedLength < length);  // Only take the slow case when stripping.
 938     strippedCopy.shrink(strippedLength);
 939     return adopt(strippedCopy);
 940 }
 941
 942 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
 943 {
 944     unsigned length = buffer.length();
 945     if (length == 0)
 946         return empty();
 947     return adoptRef(new StringImpl(buffer.release(), length));
 948 }
 949
 950 int StringImpl::wordCount(int maxWordsToCount)
 951 {
 952     unsigned wordCount = 0;
 953     unsigned i;
 954     bool atWord = false;
 955     for (i = 0; i < m_length; i++) {
 956         if (u_isspace(m_data[i])) {
 957             atWord = false;
 958         } else if (!atWord) {
 959             wordCount++;
 960             if (wordCount >= (unsigned)maxWordsToCount)
 961                 return wordCount;
 962             atWord = true;
 963         }
 964     }
 965     return wordCount;
 966 }
 967
 968 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
 969 {
 970     // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer
 971     // get allocated in a single memory block.
 972     UChar* data;
 973     unsigned length = string.m_length;
 974     if (length >= numeric_limits<unsigned>::max())
 975         CRASH();
 976     RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data);
 977     memcpy(data, string.m_data, length * sizeof(UChar));
 978     data[length] = 0;
 979     terminatedString->m_length--;
 980     terminatedString->m_hash = string.m_hash;
 981     terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter;
 982     return terminatedString.release();
 983 }
 984
 985 PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const
 986 {
 987     return create(m_data, m_length);
 988 }
 989
 990 PassRefPtr<StringImpl> StringImpl::crossThreadString()
 991 {
 992     if (SharedUChar* sharedBuffer = this->sharedBuffer())
 993         return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy()));
 994
 995     // If no shared buffer is available, create a copy.
 996     return threadsafeCopy();
 997 }
 998
 999 } // namespace WebCore