2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
26 #include "StringImpl.h"
28 #include "AtomicString.h"
29 #include "StringBuffer.h"
30 #include "StringHash.h"
31 #include <wtf/StdLibExtras.h>
32 #include <wtf/WTFThreadData.h>
38 using namespace Unicode
;
40 static const unsigned minLengthToShare
= 20;
42 COMPILE_ASSERT(sizeof(StringImpl
) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small
);
44 StringImpl::~StringImpl()
49 AtomicString::remove(this);
52 if (!wtfThreadData().currentIdentifierTable()->remove(this))
57 BufferOwnership ownership
= bufferOwnership();
58 if (ownership
!= BufferInternal
) {
59 if (ownership
== BufferOwned
) {
60 ASSERT(!m_sharedBuffer
);
62 fastFree(const_cast<UChar
*>(m_data
));
63 } else if (ownership
== BufferSubstring
) {
64 ASSERT(m_substringBuffer
);
65 m_substringBuffer
->deref();
67 ASSERT(ownership
== BufferShared
);
68 ASSERT(m_sharedBuffer
);
69 m_sharedBuffer
->deref();
74 PassRefPtr
<StringImpl
> StringImpl::createUninitialized(unsigned length
, UChar
*& data
)
81 // Allocate a single buffer large enough to contain the StringImpl
82 // struct as well as the data which it contains. This removes one
83 // heap allocation from this call.
84 if (length
> ((std::numeric_limits
<unsigned>::max() - sizeof(StringImpl
)) / sizeof(UChar
)))
86 size_t size
= sizeof(StringImpl
) + length
* sizeof(UChar
);
87 StringImpl
* string
= static_cast<StringImpl
*>(fastMalloc(size
));
89 data
= reinterpret_cast<UChar
*>(string
+ 1);
90 return adoptRef(new (string
) StringImpl(length
));
93 PassRefPtr
<StringImpl
> StringImpl::create(const UChar
* characters
, unsigned length
)
95 if (!characters
|| !length
)
99 RefPtr
<StringImpl
> string
= createUninitialized(length
, data
);
100 memcpy(data
, characters
, length
* sizeof(UChar
));
101 return string
.release();
104 PassRefPtr
<StringImpl
> StringImpl::create(const char* characters
, unsigned length
)
106 if (!characters
|| !length
)
110 RefPtr
<StringImpl
> string
= createUninitialized(length
, data
);
111 for (unsigned i
= 0; i
!= length
; ++i
) {
112 unsigned char c
= characters
[i
];
115 return string
.release();
118 PassRefPtr
<StringImpl
> StringImpl::create(const char* string
)
122 size_t length
= strlen(string
);
123 if (length
> numeric_limits
<unsigned>::max())
125 return create(string
, length
);
128 PassRefPtr
<StringImpl
> StringImpl::create(const UChar
* characters
, unsigned length
, PassRefPtr
<SharedUChar
> sharedBuffer
)
131 ASSERT(minLengthToShare
&& length
>= minLengthToShare
);
132 return adoptRef(new StringImpl(characters
, length
, sharedBuffer
));
135 SharedUChar
* StringImpl::sharedBuffer()
137 if (m_length
< minLengthToShare
)
139 // All static strings are smaller that the minimim length to share.
142 BufferOwnership ownership
= bufferOwnership();
144 if (ownership
== BufferInternal
)
146 if (ownership
== BufferSubstring
)
147 return m_substringBuffer
->sharedBuffer();
148 if (ownership
== BufferOwned
) {
149 ASSERT(!m_sharedBuffer
);
150 m_sharedBuffer
= SharedUChar::create(new SharableUChar(m_data
)).leakRef();
151 m_refCountAndFlags
= (m_refCountAndFlags
& ~s_refCountMaskBufferOwnership
) | BufferShared
;
154 ASSERT(bufferOwnership() == BufferShared
);
155 ASSERT(m_sharedBuffer
);
156 return m_sharedBuffer
;
159 bool StringImpl::containsOnlyWhitespace()
161 // FIXME: The definition of whitespace here includes a number of characters
162 // that are not whitespace from the point of view of RenderText; I wonder if
163 // that's a problem in practice.
164 for (unsigned i
= 0; i
< m_length
; i
++)
165 if (!isASCIISpace(m_data
[i
]))
170 PassRefPtr
<StringImpl
> StringImpl::substring(unsigned start
, unsigned length
)
172 if (start
>= m_length
)
174 unsigned maxLength
= m_length
- start
;
175 if (length
>= maxLength
) {
180 return create(m_data
+ start
, length
);
183 UChar32
StringImpl::characterStartingAt(unsigned i
)
185 if (U16_IS_SINGLE(m_data
[i
]))
187 if (i
+ 1 < m_length
&& U16_IS_LEAD(m_data
[i
]) && U16_IS_TRAIL(m_data
[i
+ 1]))
188 return U16_GET_SUPPLEMENTARY(m_data
[i
], m_data
[i
+ 1]);
192 PassRefPtr
<StringImpl
> StringImpl::lower()
194 // Note: This is a hot function in the Dromaeo benchmark, specifically the
195 // no-op code path up through the first 'return' statement.
197 // First scan the string for uppercase and non-ASCII characters:
200 const UChar
*end
= m_data
+ m_length
;
201 for (const UChar
* chp
= m_data
; chp
!= end
; chp
++) {
202 if (UNLIKELY(isASCIIUpper(*chp
)))
207 // Nothing to do if the string is all ASCII with no uppercase.
208 if (noUpper
&& !(ored
& ~0x7F))
211 if (m_length
> static_cast<unsigned>(numeric_limits
<int32_t>::max()))
213 int32_t length
= m_length
;
216 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
218 if (!(ored
& ~0x7F)) {
219 // Do a faster loop for the case where all the characters are ASCII.
220 for (int i
= 0; i
< length
; i
++) {
222 data
[i
] = toASCIILower(c
);
227 // Do a slower implementation for cases that include non-ASCII characters.
229 int32_t realLength
= Unicode::toLower(data
, length
, m_data
, m_length
, &error
);
230 if (!error
&& realLength
== length
)
232 newImpl
= createUninitialized(realLength
, data
);
233 Unicode::toLower(data
, realLength
, m_data
, m_length
, &error
);
239 PassRefPtr
<StringImpl
> StringImpl::upper()
241 // This function could be optimized for no-op cases the way lower() is,
242 // but in empirical testing, few actual calls to upper() are no-ops, so
243 // it wouldn't be worth the extra time for pre-scanning.
245 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
247 if (m_length
> static_cast<unsigned>(numeric_limits
<int32_t>::max()))
249 int32_t length
= m_length
;
251 // Do a faster loop for the case where all the characters are ASCII.
253 for (int i
= 0; i
< length
; i
++) {
256 data
[i
] = toASCIIUpper(c
);
259 return newImpl
.release();
261 // Do a slower implementation for cases that include non-ASCII characters.
263 int32_t realLength
= Unicode::toUpper(data
, length
, m_data
, m_length
, &error
);
264 if (!error
&& realLength
== length
)
266 newImpl
= createUninitialized(realLength
, data
);
267 Unicode::toUpper(data
, realLength
, m_data
, m_length
, &error
);
270 return newImpl
.release();
273 PassRefPtr
<StringImpl
> StringImpl::secure(UChar character
, LastCharacterBehavior behavior
)
279 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
280 unsigned lastCharacterIndex
= m_length
- 1;
281 for (unsigned i
= 0; i
< lastCharacterIndex
; ++i
)
283 data
[lastCharacterIndex
] = (behavior
== ObscureLastCharacter
) ? character
: m_data
[lastCharacterIndex
];
284 return newImpl
.release();
287 PassRefPtr
<StringImpl
> StringImpl::foldCase()
290 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
292 if (m_length
> static_cast<unsigned>(numeric_limits
<int32_t>::max()))
294 int32_t length
= m_length
;
296 // Do a faster loop for the case where all the characters are ASCII.
298 for (int32_t i
= 0; i
< length
; i
++) {
301 data
[i
] = toASCIILower(c
);
304 return newImpl
.release();
306 // Do a slower implementation for cases that include non-ASCII characters.
308 int32_t realLength
= Unicode::foldCase(data
, length
, m_data
, m_length
, &error
);
309 if (!error
&& realLength
== length
)
310 return newImpl
.release();
311 newImpl
= createUninitialized(realLength
, data
);
312 Unicode::foldCase(data
, realLength
, m_data
, m_length
, &error
);
315 return newImpl
.release();
318 PassRefPtr
<StringImpl
> StringImpl::stripWhiteSpace()
324 unsigned end
= m_length
- 1;
326 // skip white space from start
327 while (start
<= end
&& isSpaceOrNewline(m_data
[start
]))
334 // skip white space from end
335 while (end
&& isSpaceOrNewline(m_data
[end
]))
338 if (!start
&& end
== m_length
- 1)
340 return create(m_data
+ start
, end
+ 1 - start
);
343 PassRefPtr
<StringImpl
> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch
)
345 const UChar
* from
= m_data
;
346 const UChar
* fromend
= from
+ m_length
;
348 // Assume the common case will not remove any characters
349 while (from
!= fromend
&& !findMatch(*from
))
354 StringBuffer
data(m_length
);
355 UChar
* to
= data
.characters();
356 unsigned outc
= from
- m_data
;
359 memcpy(to
, m_data
, outc
* sizeof(UChar
));
362 while (from
!= fromend
&& findMatch(*from
))
364 while (from
!= fromend
&& !findMatch(*from
))
365 to
[outc
++] = *from
++;
375 PassRefPtr
<StringImpl
> StringImpl::simplifyWhiteSpace()
377 StringBuffer
data(m_length
);
379 const UChar
* from
= m_data
;
380 const UChar
* fromend
= from
+ m_length
;
382 bool changedToSpace
= false;
384 UChar
* to
= data
.characters();
387 while (from
!= fromend
&& isSpaceOrNewline(*from
)) {
389 changedToSpace
= true;
392 while (from
!= fromend
&& !isSpaceOrNewline(*from
))
393 to
[outc
++] = *from
++;
400 if (outc
> 0 && to
[outc
- 1] == ' ')
403 if (static_cast<unsigned>(outc
) == m_length
&& !changedToSpace
)
411 int StringImpl::toIntStrict(bool* ok
, int base
)
413 return charactersToIntStrict(m_data
, m_length
, ok
, base
);
416 unsigned StringImpl::toUIntStrict(bool* ok
, int base
)
418 return charactersToUIntStrict(m_data
, m_length
, ok
, base
);
421 int64_t StringImpl::toInt64Strict(bool* ok
, int base
)
423 return charactersToInt64Strict(m_data
, m_length
, ok
, base
);
426 uint64_t StringImpl::toUInt64Strict(bool* ok
, int base
)
428 return charactersToUInt64Strict(m_data
, m_length
, ok
, base
);
431 intptr_t StringImpl::toIntPtrStrict(bool* ok
, int base
)
433 return charactersToIntPtrStrict(m_data
, m_length
, ok
, base
);
436 int StringImpl::toInt(bool* ok
)
438 return charactersToInt(m_data
, m_length
, ok
);
441 unsigned StringImpl::toUInt(bool* ok
)
443 return charactersToUInt(m_data
, m_length
, ok
);
446 int64_t StringImpl::toInt64(bool* ok
)
448 return charactersToInt64(m_data
, m_length
, ok
);
451 uint64_t StringImpl::toUInt64(bool* ok
)
453 return charactersToUInt64(m_data
, m_length
, ok
);
456 intptr_t StringImpl::toIntPtr(bool* ok
)
458 return charactersToIntPtr(m_data
, m_length
, ok
);
461 double StringImpl::toDouble(bool* ok
, bool* didReadNumber
)
463 return charactersToDouble(m_data
, m_length
, ok
, didReadNumber
);
466 float StringImpl::toFloat(bool* ok
, bool* didReadNumber
)
468 return charactersToFloat(m_data
, m_length
, ok
, didReadNumber
);
471 static bool equal(const UChar
* a
, const char* b
, int length
)
475 unsigned char bc
= *b
++;
482 bool equalIgnoringCase(const UChar
* a
, const char* b
, unsigned length
)
485 unsigned char bc
= *b
++;
486 if (foldCase(*a
++) != foldCase(bc
))
492 static inline bool equalIgnoringCase(const UChar
* a
, const UChar
* b
, int length
)
495 return umemcasecmp(a
, b
, length
) == 0;
498 int codePointCompare(const StringImpl
* s1
, const StringImpl
* s2
)
500 const unsigned l1
= s1
? s1
->length() : 0;
501 const unsigned l2
= s2
? s2
->length() : 0;
502 const unsigned lmin
= l1
< l2
? l1
: l2
;
503 const UChar
* c1
= s1
? s1
->characters() : 0;
504 const UChar
* c2
= s2
? s2
->characters() : 0;
506 while (pos
< lmin
&& *c1
== *c2
) {
513 return (c1
[0] > c2
[0]) ? 1 : -1;
518 return (l1
> l2
) ? 1 : -1;
521 size_t StringImpl::find(UChar c
, unsigned start
)
523 return WTF::find(m_data
, m_length
, c
, start
);
526 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction
, unsigned start
)
528 return WTF::find(m_data
, m_length
, matchFunction
, start
);
531 size_t StringImpl::find(const char* matchString
, unsigned index
)
533 // Check for null or empty string to match against
536 size_t matchStringLength
= strlen(matchString
);
537 if (matchStringLength
> numeric_limits
<unsigned>::max())
539 unsigned matchLength
= matchStringLength
;
541 return min(index
, length());
543 // Optimization 1: fast case for strings of length 1.
544 if (matchLength
== 1)
545 return WTF::find(characters(), length(), *(const unsigned char*)matchString
, index
);
547 // Check index & matchLength are in range.
548 if (index
> length())
550 unsigned searchLength
= length() - index
;
551 if (matchLength
> searchLength
)
553 // delta is the number of additional times to test; delta == 0 means test only once.
554 unsigned delta
= searchLength
- matchLength
;
556 const UChar
* searchCharacters
= characters() + index
;
557 const unsigned char* matchCharacters
= (const unsigned char*)matchString
;
559 // Optimization 2: keep a running hash of the strings,
560 // only call memcmp if the hashes match.
561 unsigned searchHash
= 0;
562 unsigned matchHash
= 0;
563 for (unsigned i
= 0; i
< matchLength
; ++i
) {
564 searchHash
+= searchCharacters
[i
];
565 matchHash
+= matchCharacters
[i
];
569 // keep looping until we match
570 while (searchHash
!= matchHash
|| !equal(searchCharacters
+ i
, matchString
, matchLength
)) {
573 searchHash
+= searchCharacters
[i
+ matchLength
];
574 searchHash
-= searchCharacters
[i
];
580 size_t StringImpl::findIgnoringCase(const char* matchString
, unsigned index
)
582 // Check for null or empty string to match against
585 size_t matchStringLength
= strlen(matchString
);
586 if (matchStringLength
> numeric_limits
<unsigned>::max())
588 unsigned matchLength
= matchStringLength
;
590 return min(index
, length());
592 // Check index & matchLength are in range.
593 if (index
> length())
595 unsigned searchLength
= length() - index
;
596 if (matchLength
> searchLength
)
598 // delta is the number of additional times to test; delta == 0 means test only once.
599 unsigned delta
= searchLength
- matchLength
;
601 const UChar
* searchCharacters
= characters() + index
;
604 // keep looping until we match
605 while (!equalIgnoringCase(searchCharacters
+ i
, matchString
, matchLength
)) {
613 size_t StringImpl::find(StringImpl
* matchString
, unsigned index
)
615 // Check for null or empty string to match against
618 unsigned matchLength
= matchString
->length();
620 return min(index
, length());
622 // Optimization 1: fast case for strings of length 1.
623 if (matchLength
== 1)
624 return WTF::find(characters(), length(), matchString
->characters()[0], index
);
626 // Check index & matchLength are in range.
627 if (index
> length())
629 unsigned searchLength
= length() - index
;
630 if (matchLength
> searchLength
)
632 // delta is the number of additional times to test; delta == 0 means test only once.
633 unsigned delta
= searchLength
- matchLength
;
635 const UChar
* searchCharacters
= characters() + index
;
636 const UChar
* matchCharacters
= matchString
->characters();
638 // Optimization 2: keep a running hash of the strings,
639 // only call memcmp if the hashes match.
640 unsigned searchHash
= 0;
641 unsigned matchHash
= 0;
642 for (unsigned i
= 0; i
< matchLength
; ++i
) {
643 searchHash
+= searchCharacters
[i
];
644 matchHash
+= matchCharacters
[i
];
648 // keep looping until we match
649 while (searchHash
!= matchHash
|| memcmp(searchCharacters
+ i
, matchCharacters
, matchLength
* sizeof(UChar
))) {
652 searchHash
+= searchCharacters
[i
+ matchLength
];
653 searchHash
-= searchCharacters
[i
];
659 size_t StringImpl::findIgnoringCase(StringImpl
* matchString
, unsigned index
)
661 // Check for null or empty string to match against
664 unsigned matchLength
= matchString
->length();
666 return min(index
, length());
668 // Check index & matchLength are in range.
669 if (index
> length())
671 unsigned searchLength
= length() - index
;
672 if (matchLength
> searchLength
)
674 // delta is the number of additional times to test; delta == 0 means test only once.
675 unsigned delta
= searchLength
- matchLength
;
677 const UChar
* searchCharacters
= characters() + index
;
678 const UChar
* matchCharacters
= matchString
->characters();
681 // keep looping until we match
682 while (!equalIgnoringCase(searchCharacters
+ i
, matchCharacters
, matchLength
)) {
690 size_t StringImpl::reverseFind(UChar c
, unsigned index
)
692 return WTF::reverseFind(m_data
, m_length
, c
, index
);
695 size_t StringImpl::reverseFind(StringImpl
* matchString
, unsigned index
)
697 // Check for null or empty string to match against
700 unsigned matchLength
= matchString
->length();
702 return min(index
, length());
704 // Optimization 1: fast case for strings of length 1.
705 if (matchLength
== 1)
706 return WTF::reverseFind(characters(), length(), matchString
->characters()[0], index
);
708 // Check index & matchLength are in range.
709 if (matchLength
> length())
711 // delta is the number of additional times to test; delta == 0 means test only once.
712 unsigned delta
= min(index
, length() - matchLength
);
714 const UChar
*searchCharacters
= characters();
715 const UChar
*matchCharacters
= matchString
->characters();
717 // Optimization 2: keep a running hash of the strings,
718 // only call memcmp if the hashes match.
719 unsigned searchHash
= 0;
720 unsigned matchHash
= 0;
721 for (unsigned i
= 0; i
< matchLength
; ++i
) {
722 searchHash
+= searchCharacters
[delta
+ i
];
723 matchHash
+= matchCharacters
[i
];
726 // keep looping until we match
727 while (searchHash
!= matchHash
|| memcmp(searchCharacters
+ delta
, matchCharacters
, matchLength
* sizeof(UChar
))) {
731 searchHash
-= searchCharacters
[delta
+ matchLength
];
732 searchHash
+= searchCharacters
[delta
];
737 size_t StringImpl::reverseFindIgnoringCase(StringImpl
* matchString
, unsigned index
)
739 // Check for null or empty string to match against
742 unsigned matchLength
= matchString
->length();
744 return min(index
, length());
746 // Check index & matchLength are in range.
747 if (matchLength
> length())
749 // delta is the number of additional times to test; delta == 0 means test only once.
750 unsigned delta
= min(index
, length() - matchLength
);
752 const UChar
*searchCharacters
= characters();
753 const UChar
*matchCharacters
= matchString
->characters();
755 // keep looping until we match
756 while (!equalIgnoringCase(searchCharacters
+ delta
, matchCharacters
, matchLength
)) {
764 bool StringImpl::endsWith(StringImpl
* matchString
, bool caseSensitive
)
767 if (m_length
>= matchString
->m_length
) {
768 unsigned start
= m_length
- matchString
->m_length
;
769 return (caseSensitive
? find(matchString
, start
) : findIgnoringCase(matchString
, start
)) == start
;
774 PassRefPtr
<StringImpl
> StringImpl::replace(UChar oldC
, UChar newC
)
779 for (i
= 0; i
!= m_length
; ++i
)
780 if (m_data
[i
] == oldC
)
786 RefPtr
<StringImpl
> newImpl
= createUninitialized(m_length
, data
);
788 for (i
= 0; i
!= m_length
; ++i
) {
789 UChar ch
= m_data
[i
];
794 return newImpl
.release();
797 PassRefPtr
<StringImpl
> StringImpl::replace(unsigned position
, unsigned lengthToReplace
, StringImpl
* str
)
799 position
= min(position
, length());
800 lengthToReplace
= min(lengthToReplace
, length() - position
);
801 unsigned lengthToInsert
= str
? str
->length() : 0;
802 if (!lengthToReplace
&& !lengthToInsert
)
806 if ((length() - lengthToReplace
) >= (numeric_limits
<unsigned>::max() - lengthToInsert
))
809 RefPtr
<StringImpl
> newImpl
=
810 createUninitialized(length() - lengthToReplace
+ lengthToInsert
, data
);
811 memcpy(data
, characters(), position
* sizeof(UChar
));
813 memcpy(data
+ position
, str
->characters(), lengthToInsert
* sizeof(UChar
));
814 memcpy(data
+ position
+ lengthToInsert
, characters() + position
+ lengthToReplace
,
815 (length() - position
- lengthToReplace
) * sizeof(UChar
));
816 return newImpl
.release();
819 PassRefPtr
<StringImpl
> StringImpl::replace(UChar pattern
, StringImpl
* replacement
)
824 unsigned repStrLength
= replacement
->length();
825 size_t srcSegmentStart
= 0;
826 unsigned matchCount
= 0;
829 while ((srcSegmentStart
= find(pattern
, srcSegmentStart
)) != notFound
) {
834 // If we have 0 matches, we don't have to do any more work
838 if (repStrLength
&& matchCount
> numeric_limits
<unsigned>::max() / repStrLength
)
841 unsigned replaceSize
= matchCount
* repStrLength
;
842 unsigned newSize
= m_length
- matchCount
;
843 if (newSize
>= (numeric_limits
<unsigned>::max() - replaceSize
))
846 newSize
+= replaceSize
;
849 RefPtr
<StringImpl
> newImpl
= createUninitialized(newSize
, data
);
851 // Construct the new data
852 size_t srcSegmentEnd
;
853 unsigned srcSegmentLength
;
855 unsigned dstOffset
= 0;
857 while ((srcSegmentEnd
= find(pattern
, srcSegmentStart
)) != notFound
) {
858 srcSegmentLength
= srcSegmentEnd
- srcSegmentStart
;
859 memcpy(data
+ dstOffset
, m_data
+ srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
860 dstOffset
+= srcSegmentLength
;
861 memcpy(data
+ dstOffset
, replacement
->m_data
, repStrLength
* sizeof(UChar
));
862 dstOffset
+= repStrLength
;
863 srcSegmentStart
= srcSegmentEnd
+ 1;
866 srcSegmentLength
= m_length
- srcSegmentStart
;
867 memcpy(data
+ dstOffset
, m_data
+ srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
869 ASSERT(dstOffset
+ srcSegmentLength
== newImpl
->length());
871 return newImpl
.release();
874 PassRefPtr
<StringImpl
> StringImpl::replace(StringImpl
* pattern
, StringImpl
* replacement
)
876 if (!pattern
|| !replacement
)
879 unsigned patternLength
= pattern
->length();
883 unsigned repStrLength
= replacement
->length();
884 size_t srcSegmentStart
= 0;
885 unsigned matchCount
= 0;
888 while ((srcSegmentStart
= find(pattern
, srcSegmentStart
)) != notFound
) {
890 srcSegmentStart
+= patternLength
;
893 // If we have 0 matches, we don't have to do any more work
897 unsigned newSize
= m_length
- matchCount
* patternLength
;
898 if (repStrLength
&& matchCount
> numeric_limits
<unsigned>::max() / repStrLength
)
901 if (newSize
> (numeric_limits
<unsigned>::max() - matchCount
* repStrLength
))
904 newSize
+= matchCount
* repStrLength
;
907 RefPtr
<StringImpl
> newImpl
= createUninitialized(newSize
, data
);
909 // Construct the new data
910 size_t srcSegmentEnd
;
911 unsigned srcSegmentLength
;
913 unsigned dstOffset
= 0;
915 while ((srcSegmentEnd
= find(pattern
, srcSegmentStart
)) != notFound
) {
916 srcSegmentLength
= srcSegmentEnd
- srcSegmentStart
;
917 memcpy(data
+ dstOffset
, m_data
+ srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
918 dstOffset
+= srcSegmentLength
;
919 memcpy(data
+ dstOffset
, replacement
->m_data
, repStrLength
* sizeof(UChar
));
920 dstOffset
+= repStrLength
;
921 srcSegmentStart
= srcSegmentEnd
+ patternLength
;
924 srcSegmentLength
= m_length
- srcSegmentStart
;
925 memcpy(data
+ dstOffset
, m_data
+ srcSegmentStart
, srcSegmentLength
* sizeof(UChar
));
927 ASSERT(dstOffset
+ srcSegmentLength
== newImpl
->length());
929 return newImpl
.release();
932 bool equal(const StringImpl
* a
, const StringImpl
* b
)
934 return StringHash::equal(a
, b
);
937 bool equal(const StringImpl
* a
, const char* b
)
944 unsigned length
= a
->length();
945 const UChar
* as
= a
->characters();
946 for (unsigned i
= 0; i
!= length
; ++i
) {
947 unsigned char bc
= b
[i
];
957 bool equalIgnoringCase(StringImpl
* a
, StringImpl
* b
)
959 return CaseFoldingHash::equal(a
, b
);
962 bool equalIgnoringCase(StringImpl
* a
, const char* b
)
969 unsigned length
= a
->length();
970 const UChar
* as
= a
->characters();
972 // Do a faster loop for the case where all the characters are ASCII.
975 for (unsigned i
= 0; i
!= length
; ++i
) {
981 equal
= equal
&& (toASCIILower(ac
) == toASCIILower(bc
));
984 // Do a slower implementation for cases that include non-ASCII characters.
987 for (unsigned i
= 0; i
!= length
; ++i
) {
988 unsigned char bc
= b
[i
];
989 equal
= equal
&& (foldCase(as
[i
]) == foldCase(bc
));
993 return equal
&& !b
[length
];
996 bool equalIgnoringNullity(StringImpl
* a
, StringImpl
* b
)
998 if (StringHash::equal(a
, b
))
1000 if (!a
&& b
&& !b
->length())
1002 if (!b
&& a
&& !a
->length())
1008 WTF::Unicode::Direction
StringImpl::defaultWritingDirection(bool* hasStrongDirectionality
)
1010 for (unsigned i
= 0; i
< m_length
; ++i
) {
1011 WTF::Unicode::Direction charDirection
= WTF::Unicode::direction(m_data
[i
]);
1012 if (charDirection
== WTF::Unicode::LeftToRight
) {
1013 if (hasStrongDirectionality
)
1014 *hasStrongDirectionality
= true;
1015 return WTF::Unicode::LeftToRight
;
1017 if (charDirection
== WTF::Unicode::RightToLeft
|| charDirection
== WTF::Unicode::RightToLeftArabic
) {
1018 if (hasStrongDirectionality
)
1019 *hasStrongDirectionality
= true;
1020 return WTF::Unicode::RightToLeft
;
1023 if (hasStrongDirectionality
)
1024 *hasStrongDirectionality
= false;
1025 return WTF::Unicode::LeftToRight
;
1028 // This is a hot function because it's used when parsing HTML.
1029 PassRefPtr
<StringImpl
> StringImpl::createStrippingNullCharactersSlowCase(const UChar
* characters
, unsigned length
)
1031 StringBuffer
strippedCopy(length
);
1032 unsigned strippedLength
= 0;
1033 for (unsigned i
= 0; i
< length
; i
++) {
1034 if (int c
= characters
[i
])
1035 strippedCopy
[strippedLength
++] = c
;
1037 ASSERT(strippedLength
< length
); // Only take the slow case when stripping.
1038 strippedCopy
.shrink(strippedLength
);
1039 return adopt(strippedCopy
);
1042 PassRefPtr
<StringImpl
> StringImpl::adopt(StringBuffer
& buffer
)
1044 unsigned length
= buffer
.length();
1047 return adoptRef(new StringImpl(buffer
.release(), length
));
1050 int StringImpl::wordCount(int maxWordsToCount
)
1052 unsigned wordCount
= 0;
1054 bool atWord
= false;
1055 for (i
= 0; i
< m_length
; i
++) {
1056 if (u_isspace(m_data
[i
])) {
1058 } else if (!atWord
) {
1060 if (wordCount
>= (unsigned)maxWordsToCount
)
1068 PassRefPtr
<StringImpl
> StringImpl::createWithTerminatingNullCharacter(const StringImpl
& string
)
1070 // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer
1071 // get allocated in a single memory block.
1073 unsigned length
= string
.m_length
;
1074 if (length
>= numeric_limits
<unsigned>::max())
1076 RefPtr
<StringImpl
> terminatedString
= createUninitialized(length
+ 1, data
);
1077 memcpy(data
, string
.m_data
, length
* sizeof(UChar
));
1079 terminatedString
->m_length
--;
1080 terminatedString
->m_hash
= string
.m_hash
;
1081 terminatedString
->m_refCountAndFlags
|= s_refCountFlagHasTerminatingNullCharacter
;
1082 return terminatedString
.release();
1085 PassRefPtr
<StringImpl
> StringImpl::threadsafeCopy() const
1087 return create(m_data
, m_length
);
1090 PassRefPtr
<StringImpl
> StringImpl::crossThreadString()
1092 if (SharedUChar
* sharedBuffer
= this->sharedBuffer())
1093 return adoptRef(new StringImpl(m_data
, m_length
, sharedBuffer
->crossThreadCopy()));
1095 // If no shared buffer is available, create a copy.
1096 return threadsafeCopy();