]>
Commit | Line | Data |
---|---|---|
4e4e5a6f A |
1 | /* |
2 | * Copyright (C) 1999 Lars Knoll (knoll@kde.org) | |
3 | * (C) 1999 Antti Koivisto (koivisto@kde.org) | |
4 | * (C) 2001 Dirk Mueller ( mueller@kde.org ) | |
5 | * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. | |
6 | * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) | |
7 | * | |
8 | * This library is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Library General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2 of the License, or (at your option) any later version. | |
12 | * | |
13 | * This library is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Library General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Library General Public License | |
19 | * along with this library; see the file COPYING.LIB. If not, write to | |
20 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
21 | * Boston, MA 02110-1301, USA. | |
22 | * | |
23 | */ | |
24 | ||
25 | #include "config.h" | |
26 | #include "StringImpl.h" | |
27 | ||
28 | #include "AtomicString.h" | |
29 | #include "StringBuffer.h" | |
30 | #include "StringHash.h" | |
31 | #include <wtf/StdLibExtras.h> | |
32 | #include <wtf/WTFThreadData.h> | |
33 | ||
34 | using namespace WTF; | |
35 | using namespace Unicode; | |
36 | using namespace std; | |
37 | ||
38 | namespace WebCore { | |
39 | ||
40 | static const unsigned minLengthToShare = 20; | |
41 | ||
42 | StringImpl::~StringImpl() | |
43 | { | |
44 | ASSERT(!isStatic()); | |
45 | ||
46 | if (isAtomic()) | |
47 | AtomicString::remove(this); | |
48 | #if USE(JSC) | |
49 | if (isIdentifier()) | |
50 | wtfThreadData().currentIdentifierTable()->remove(this); | |
51 | #endif | |
52 | ||
53 | BufferOwnership ownership = bufferOwnership(); | |
54 | if (ownership != BufferInternal) { | |
55 | if (ownership == BufferOwned) { | |
56 | ASSERT(!m_sharedBuffer); | |
57 | ASSERT(m_data); | |
58 | fastFree(const_cast<UChar*>(m_data)); | |
59 | } else if (ownership == BufferSubstring) { | |
60 | ASSERT(m_substringBuffer); | |
61 | m_substringBuffer->deref(); | |
62 | } else { | |
63 | ASSERT(ownership == BufferShared); | |
64 | ASSERT(m_sharedBuffer); | |
65 | m_sharedBuffer->deref(); | |
66 | } | |
67 | } | |
68 | } | |
69 | ||
70 | PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) | |
71 | { | |
72 | if (!length) { | |
73 | data = 0; | |
74 | return empty(); | |
75 | } | |
76 | ||
77 | // Allocate a single buffer large enough to contain the StringImpl | |
78 | // struct as well as the data which it contains. This removes one | |
79 | // heap allocation from this call. | |
b80e6193 | 80 | if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) |
4e4e5a6f A |
81 | CRASH(); |
82 | size_t size = sizeof(StringImpl) + length * sizeof(UChar); | |
83 | StringImpl* string = static_cast<StringImpl*>(fastMalloc(size)); | |
84 | ||
85 | data = reinterpret_cast<UChar*>(string + 1); | |
86 | return adoptRef(new (string) StringImpl(length)); | |
87 | } | |
88 | ||
89 | PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length) | |
90 | { | |
91 | if (!characters || !length) | |
92 | return empty(); | |
93 | ||
94 | UChar* data; | |
b80e6193 | 95 | RefPtr<StringImpl> string = createUninitialized(length, data); |
4e4e5a6f | 96 | memcpy(data, characters, length * sizeof(UChar)); |
b80e6193 | 97 | return string.release(); |
4e4e5a6f A |
98 | } |
99 | ||
100 | PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length) | |
101 | { | |
102 | if (!characters || !length) | |
103 | return empty(); | |
104 | ||
105 | UChar* data; | |
b80e6193 | 106 | RefPtr<StringImpl> string = createUninitialized(length, data); |
4e4e5a6f A |
107 | for (unsigned i = 0; i != length; ++i) { |
108 | unsigned char c = characters[i]; | |
109 | data[i] = c; | |
110 | } | |
b80e6193 | 111 | return string.release(); |
4e4e5a6f A |
112 | } |
113 | ||
114 | PassRefPtr<StringImpl> StringImpl::create(const char* string) | |
115 | { | |
116 | if (!string) | |
117 | return empty(); | |
b80e6193 A |
118 | size_t length = strlen(string); |
119 | if (length > numeric_limits<unsigned>::max()) | |
120 | CRASH(); | |
121 | return create(string, length); | |
4e4e5a6f A |
122 | } |
123 | ||
124 | PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer) | |
125 | { | |
126 | ASSERT(characters); | |
127 | ASSERT(minLengthToShare && length >= minLengthToShare); | |
128 | return adoptRef(new StringImpl(characters, length, sharedBuffer)); | |
129 | } | |
130 | ||
131 | SharedUChar* StringImpl::sharedBuffer() | |
132 | { | |
133 | if (m_length < minLengthToShare) | |
134 | return 0; | |
135 | // All static strings are smaller that the minimim length to share. | |
136 | ASSERT(!isStatic()); | |
137 | ||
138 | BufferOwnership ownership = bufferOwnership(); | |
139 | ||
140 | if (ownership == BufferInternal) | |
141 | return 0; | |
142 | if (ownership == BufferSubstring) | |
143 | return m_substringBuffer->sharedBuffer(); | |
144 | if (ownership == BufferOwned) { | |
145 | ASSERT(!m_sharedBuffer); | |
146 | m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).releaseRef(); | |
147 | m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared; | |
148 | } | |
149 | ||
150 | ASSERT(bufferOwnership() == BufferShared); | |
151 | ASSERT(m_sharedBuffer); | |
152 | return m_sharedBuffer; | |
153 | } | |
154 | ||
155 | bool StringImpl::containsOnlyWhitespace() | |
156 | { | |
157 | // FIXME: The definition of whitespace here includes a number of characters | |
158 | // that are not whitespace from the point of view of RenderText; I wonder if | |
159 | // that's a problem in practice. | |
160 | for (unsigned i = 0; i < m_length; i++) | |
161 | if (!isASCIISpace(m_data[i])) | |
162 | return false; | |
163 | return true; | |
164 | } | |
165 | ||
166 | PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length) | |
167 | { | |
168 | if (start >= m_length) | |
169 | return empty(); | |
170 | unsigned maxLength = m_length - start; | |
171 | if (length >= maxLength) { | |
172 | if (!start) | |
173 | return this; | |
174 | length = maxLength; | |
175 | } | |
176 | return create(m_data + start, length); | |
177 | } | |
178 | ||
179 | UChar32 StringImpl::characterStartingAt(unsigned i) | |
180 | { | |
181 | if (U16_IS_SINGLE(m_data[i])) | |
182 | return m_data[i]; | |
183 | if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1])) | |
184 | return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]); | |
185 | return 0; | |
186 | } | |
187 | ||
188 | PassRefPtr<StringImpl> StringImpl::lower() | |
189 | { | |
190 | // Note: This is a hot function in the Dromaeo benchmark, specifically the | |
191 | // no-op code path up through the first 'return' statement. | |
192 | ||
193 | // First scan the string for uppercase and non-ASCII characters: | |
194 | UChar ored = 0; | |
195 | bool noUpper = true; | |
196 | const UChar *end = m_data + m_length; | |
197 | for (const UChar* chp = m_data; chp != end; chp++) { | |
198 | if (UNLIKELY(isASCIIUpper(*chp))) | |
199 | noUpper = false; | |
200 | ored |= *chp; | |
201 | } | |
202 | ||
203 | // Nothing to do if the string is all ASCII with no uppercase. | |
204 | if (noUpper && !(ored & ~0x7F)) | |
205 | return this; | |
206 | ||
b80e6193 A |
207 | if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) |
208 | CRASH(); | |
4e4e5a6f | 209 | int32_t length = m_length; |
b80e6193 | 210 | |
4e4e5a6f A |
211 | UChar* data; |
212 | RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
213 | ||
214 | if (!(ored & ~0x7F)) { | |
215 | // Do a faster loop for the case where all the characters are ASCII. | |
216 | for (int i = 0; i < length; i++) { | |
217 | UChar c = m_data[i]; | |
218 | data[i] = toASCIILower(c); | |
219 | } | |
220 | return newImpl; | |
221 | } | |
222 | ||
223 | // Do a slower implementation for cases that include non-ASCII characters. | |
224 | bool error; | |
225 | int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error); | |
226 | if (!error && realLength == length) | |
227 | return newImpl; | |
228 | newImpl = createUninitialized(realLength, data); | |
229 | Unicode::toLower(data, realLength, m_data, m_length, &error); | |
230 | if (error) | |
231 | return this; | |
232 | return newImpl; | |
233 | } | |
234 | ||
235 | PassRefPtr<StringImpl> StringImpl::upper() | |
236 | { | |
237 | // This function could be optimized for no-op cases the way lower() is, | |
238 | // but in empirical testing, few actual calls to upper() are no-ops, so | |
239 | // it wouldn't be worth the extra time for pre-scanning. | |
240 | UChar* data; | |
b80e6193 A |
241 | RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
242 | ||
243 | if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | |
244 | CRASH(); | |
4e4e5a6f A |
245 | int32_t length = m_length; |
246 | ||
247 | // Do a faster loop for the case where all the characters are ASCII. | |
248 | UChar ored = 0; | |
249 | for (int i = 0; i < length; i++) { | |
250 | UChar c = m_data[i]; | |
251 | ored |= c; | |
252 | data[i] = toASCIIUpper(c); | |
253 | } | |
254 | if (!(ored & ~0x7F)) | |
b80e6193 | 255 | return newImpl.release(); |
4e4e5a6f A |
256 | |
257 | // Do a slower implementation for cases that include non-ASCII characters. | |
258 | bool error; | |
259 | int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error); | |
260 | if (!error && realLength == length) | |
261 | return newImpl; | |
262 | newImpl = createUninitialized(realLength, data); | |
263 | Unicode::toUpper(data, realLength, m_data, m_length, &error); | |
264 | if (error) | |
265 | return this; | |
b80e6193 | 266 | return newImpl.release(); |
4e4e5a6f A |
267 | } |
268 | ||
b80e6193 | 269 | PassRefPtr<StringImpl> StringImpl::secure(UChar character, bool hideLastCharacter) |
4e4e5a6f | 270 | { |
b80e6193 A |
271 | UChar* data; |
272 | RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); | |
273 | if (m_length) { | |
274 | const unsigned lastCharacterIndex = m_length - 1; | |
275 | for (unsigned i = 0; i < lastCharacterIndex; ++i) | |
276 | data[i] = character; | |
277 | data[lastCharacterIndex] = hideLastCharacter ? character : m_data[lastCharacterIndex]; | |
4e4e5a6f | 278 | } |
b80e6193 | 279 | return newImpl.release(); |
4e4e5a6f A |
280 | } |
281 | ||
282 | PassRefPtr<StringImpl> StringImpl::foldCase() | |
283 | { | |
284 | UChar* data; | |
b80e6193 A |
285 | RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
286 | ||
287 | if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max())) | |
288 | CRASH(); | |
4e4e5a6f A |
289 | int32_t length = m_length; |
290 | ||
291 | // Do a faster loop for the case where all the characters are ASCII. | |
292 | UChar ored = 0; | |
b80e6193 | 293 | for (int32_t i = 0; i < length; i++) { |
4e4e5a6f A |
294 | UChar c = m_data[i]; |
295 | ored |= c; | |
296 | data[i] = toASCIILower(c); | |
297 | } | |
298 | if (!(ored & ~0x7F)) | |
b80e6193 | 299 | return newImpl.release(); |
4e4e5a6f A |
300 | |
301 | // Do a slower implementation for cases that include non-ASCII characters. | |
302 | bool error; | |
303 | int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error); | |
304 | if (!error && realLength == length) | |
b80e6193 | 305 | return newImpl.release(); |
4e4e5a6f A |
306 | newImpl = createUninitialized(realLength, data); |
307 | Unicode::foldCase(data, realLength, m_data, m_length, &error); | |
308 | if (error) | |
309 | return this; | |
b80e6193 | 310 | return newImpl.release(); |
4e4e5a6f A |
311 | } |
312 | ||
313 | PassRefPtr<StringImpl> StringImpl::stripWhiteSpace() | |
314 | { | |
315 | if (!m_length) | |
316 | return empty(); | |
317 | ||
318 | unsigned start = 0; | |
319 | unsigned end = m_length - 1; | |
320 | ||
321 | // skip white space from start | |
322 | while (start <= end && isSpaceOrNewline(m_data[start])) | |
323 | start++; | |
324 | ||
325 | // only white space | |
326 | if (start > end) | |
327 | return empty(); | |
328 | ||
329 | // skip white space from end | |
330 | while (end && isSpaceOrNewline(m_data[end])) | |
331 | end--; | |
332 | ||
333 | if (!start && end == m_length - 1) | |
334 | return this; | |
335 | return create(m_data + start, end + 1 - start); | |
336 | } | |
337 | ||
338 | PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch) | |
339 | { | |
340 | const UChar* from = m_data; | |
341 | const UChar* fromend = from + m_length; | |
342 | ||
343 | // Assume the common case will not remove any characters | |
344 | while (from != fromend && !findMatch(*from)) | |
345 | from++; | |
346 | if (from == fromend) | |
347 | return this; | |
348 | ||
349 | StringBuffer data(m_length); | |
350 | UChar* to = data.characters(); | |
351 | unsigned outc = from - m_data; | |
352 | ||
353 | if (outc) | |
354 | memcpy(to, m_data, outc * sizeof(UChar)); | |
355 | ||
356 | while (true) { | |
357 | while (from != fromend && findMatch(*from)) | |
358 | from++; | |
359 | while (from != fromend && !findMatch(*from)) | |
360 | to[outc++] = *from++; | |
361 | if (from == fromend) | |
362 | break; | |
363 | } | |
364 | ||
365 | data.shrink(outc); | |
366 | ||
367 | return adopt(data); | |
368 | } | |
369 | ||
370 | PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace() | |
371 | { | |
372 | StringBuffer data(m_length); | |
373 | ||
374 | const UChar* from = m_data; | |
375 | const UChar* fromend = from + m_length; | |
376 | int outc = 0; | |
377 | bool changedToSpace = false; | |
378 | ||
379 | UChar* to = data.characters(); | |
380 | ||
381 | while (true) { | |
382 | while (from != fromend && isSpaceOrNewline(*from)) { | |
383 | if (*from != ' ') | |
384 | changedToSpace = true; | |
385 | from++; | |
386 | } | |
387 | while (from != fromend && !isSpaceOrNewline(*from)) | |
388 | to[outc++] = *from++; | |
389 | if (from != fromend) | |
390 | to[outc++] = ' '; | |
391 | else | |
392 | break; | |
393 | } | |
394 | ||
395 | if (outc > 0 && to[outc - 1] == ' ') | |
396 | outc--; | |
397 | ||
398 | if (static_cast<unsigned>(outc) == m_length && !changedToSpace) | |
399 | return this; | |
400 | ||
401 | data.shrink(outc); | |
402 | ||
403 | return adopt(data); | |
404 | } | |
405 | ||
406 | int StringImpl::toIntStrict(bool* ok, int base) | |
407 | { | |
408 | return charactersToIntStrict(m_data, m_length, ok, base); | |
409 | } | |
410 | ||
411 | unsigned StringImpl::toUIntStrict(bool* ok, int base) | |
412 | { | |
413 | return charactersToUIntStrict(m_data, m_length, ok, base); | |
414 | } | |
415 | ||
416 | int64_t StringImpl::toInt64Strict(bool* ok, int base) | |
417 | { | |
418 | return charactersToInt64Strict(m_data, m_length, ok, base); | |
419 | } | |
420 | ||
421 | uint64_t StringImpl::toUInt64Strict(bool* ok, int base) | |
422 | { | |
423 | return charactersToUInt64Strict(m_data, m_length, ok, base); | |
424 | } | |
425 | ||
426 | intptr_t StringImpl::toIntPtrStrict(bool* ok, int base) | |
427 | { | |
428 | return charactersToIntPtrStrict(m_data, m_length, ok, base); | |
429 | } | |
430 | ||
431 | int StringImpl::toInt(bool* ok) | |
432 | { | |
433 | return charactersToInt(m_data, m_length, ok); | |
434 | } | |
435 | ||
436 | unsigned StringImpl::toUInt(bool* ok) | |
437 | { | |
438 | return charactersToUInt(m_data, m_length, ok); | |
439 | } | |
440 | ||
441 | int64_t StringImpl::toInt64(bool* ok) | |
442 | { | |
443 | return charactersToInt64(m_data, m_length, ok); | |
444 | } | |
445 | ||
446 | uint64_t StringImpl::toUInt64(bool* ok) | |
447 | { | |
448 | return charactersToUInt64(m_data, m_length, ok); | |
449 | } | |
450 | ||
451 | intptr_t StringImpl::toIntPtr(bool* ok) | |
452 | { | |
453 | return charactersToIntPtr(m_data, m_length, ok); | |
454 | } | |
455 | ||
456 | double StringImpl::toDouble(bool* ok) | |
457 | { | |
458 | return charactersToDouble(m_data, m_length, ok); | |
459 | } | |
460 | ||
461 | float StringImpl::toFloat(bool* ok) | |
462 | { | |
463 | return charactersToFloat(m_data, m_length, ok); | |
464 | } | |
465 | ||
466 | static bool equal(const UChar* a, const char* b, int length) | |
467 | { | |
468 | ASSERT(length >= 0); | |
469 | while (length--) { | |
470 | unsigned char bc = *b++; | |
471 | if (*a++ != bc) | |
472 | return false; | |
473 | } | |
474 | return true; | |
475 | } | |
476 | ||
477 | bool equalIgnoringCase(const UChar* a, const char* b, unsigned length) | |
478 | { | |
479 | while (length--) { | |
480 | unsigned char bc = *b++; | |
481 | if (foldCase(*a++) != foldCase(bc)) | |
482 | return false; | |
483 | } | |
484 | return true; | |
485 | } | |
486 | ||
487 | static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) | |
488 | { | |
489 | ASSERT(length >= 0); | |
490 | return umemcasecmp(a, b, length) == 0; | |
491 | } | |
492 | ||
493 | int StringImpl::find(const char* chs, int index, bool caseSensitive) | |
494 | { | |
495 | if (!chs || index < 0) | |
496 | return -1; | |
497 | ||
b80e6193 A |
498 | size_t matchStringLength = strlen(chs); |
499 | if (matchStringLength > static_cast<unsigned>(numeric_limits<int>::max())) | |
500 | CRASH(); | |
501 | int chsLength = matchStringLength; | |
4e4e5a6f A |
502 | int n = m_length - index; |
503 | if (n < 0) | |
504 | return -1; | |
505 | n -= chsLength - 1; | |
506 | if (n <= 0) | |
507 | return -1; | |
508 | ||
509 | const char* chsPlusOne = chs + 1; | |
510 | int chsLengthMinusOne = chsLength - 1; | |
511 | ||
512 | const UChar* ptr = m_data + index - 1; | |
513 | if (caseSensitive) { | |
514 | UChar c = *chs; | |
515 | do { | |
516 | if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne)) | |
517 | return m_length - chsLength - n + 1; | |
518 | } while (--n); | |
519 | } else { | |
520 | UChar lc = Unicode::foldCase(*chs); | |
521 | do { | |
522 | if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne)) | |
523 | return m_length - chsLength - n + 1; | |
524 | } while (--n); | |
525 | } | |
526 | ||
527 | return -1; | |
528 | } | |
529 | ||
530 | int StringImpl::find(UChar c, int start) | |
531 | { | |
532 | return WebCore::find(m_data, m_length, c, start); | |
533 | } | |
534 | ||
535 | int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start) | |
536 | { | |
537 | return WebCore::find(m_data, m_length, matchFunction, start); | |
538 | } | |
539 | ||
540 | int StringImpl::find(StringImpl* str, int index, bool caseSensitive) | |
541 | { | |
542 | /* | |
543 | We use a simple trick for efficiency's sake. Instead of | |
544 | comparing strings, we compare the sum of str with that of | |
545 | a part of this string. Only if that matches, we call memcmp | |
546 | or ucstrnicmp. | |
547 | */ | |
548 | ASSERT(str); | |
549 | if (index < 0) | |
550 | index += m_length; | |
551 | int lstr = str->m_length; | |
552 | int lthis = m_length - index; | |
553 | if ((unsigned)lthis > m_length) | |
554 | return -1; | |
555 | int delta = lthis - lstr; | |
556 | if (delta < 0) | |
557 | return -1; | |
558 | ||
559 | const UChar* uthis = m_data + index; | |
560 | const UChar* ustr = str->m_data; | |
561 | unsigned hthis = 0; | |
562 | unsigned hstr = 0; | |
563 | if (caseSensitive) { | |
564 | for (int i = 0; i < lstr; i++) { | |
565 | hthis += uthis[i]; | |
566 | hstr += ustr[i]; | |
567 | } | |
568 | int i = 0; | |
569 | while (1) { | |
570 | if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) | |
571 | return index + i; | |
572 | if (i == delta) | |
573 | return -1; | |
574 | hthis += uthis[i + lstr]; | |
575 | hthis -= uthis[i]; | |
576 | i++; | |
577 | } | |
578 | } else { | |
579 | for (int i = 0; i < lstr; i++ ) { | |
580 | hthis += toASCIILower(uthis[i]); | |
581 | hstr += toASCIILower(ustr[i]); | |
582 | } | |
583 | int i = 0; | |
584 | while (1) { | |
585 | if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr)) | |
586 | return index + i; | |
587 | if (i == delta) | |
588 | return -1; | |
589 | hthis += toASCIILower(uthis[i + lstr]); | |
590 | hthis -= toASCIILower(uthis[i]); | |
591 | i++; | |
592 | } | |
593 | } | |
594 | } | |
595 | ||
596 | int StringImpl::reverseFind(UChar c, int index) | |
597 | { | |
598 | return WebCore::reverseFind(m_data, m_length, c, index); | |
599 | } | |
600 | ||
601 | int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive) | |
602 | { | |
603 | /* | |
604 | See StringImpl::find() for explanations. | |
605 | */ | |
606 | ASSERT(str); | |
607 | int lthis = m_length; | |
608 | if (index < 0) | |
609 | index += lthis; | |
610 | ||
611 | int lstr = str->m_length; | |
612 | int delta = lthis - lstr; | |
613 | if ( index < 0 || index > lthis || delta < 0 ) | |
614 | return -1; | |
615 | if ( index > delta ) | |
616 | index = delta; | |
617 | ||
618 | const UChar *uthis = m_data; | |
619 | const UChar *ustr = str->m_data; | |
620 | unsigned hthis = 0; | |
621 | unsigned hstr = 0; | |
622 | int i; | |
623 | if (caseSensitive) { | |
624 | for ( i = 0; i < lstr; i++ ) { | |
625 | hthis += uthis[index + i]; | |
626 | hstr += ustr[i]; | |
627 | } | |
628 | i = index; | |
629 | while (1) { | |
630 | if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) | |
631 | return i; | |
632 | if (i == 0) | |
633 | return -1; | |
634 | i--; | |
635 | hthis -= uthis[i + lstr]; | |
636 | hthis += uthis[i]; | |
637 | } | |
638 | } else { | |
639 | for (i = 0; i < lstr; i++) { | |
640 | hthis += toASCIILower(uthis[index + i]); | |
641 | hstr += toASCIILower(ustr[i]); | |
642 | } | |
643 | i = index; | |
644 | while (1) { | |
645 | if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) ) | |
646 | return i; | |
647 | if (i == 0) | |
648 | return -1; | |
649 | i--; | |
650 | hthis -= toASCIILower(uthis[i + lstr]); | |
651 | hthis += toASCIILower(uthis[i]); | |
652 | } | |
653 | } | |
654 | ||
655 | // Should never get here. | |
656 | return -1; | |
657 | } | |
658 | ||
659 | bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive) | |
660 | { | |
661 | ASSERT(m_data); | |
662 | int start = m_length - m_data->m_length; | |
663 | if (start >= 0) | |
664 | return (find(m_data, start, caseSensitive) == start); | |
665 | return false; | |
666 | } | |
667 | ||
668 | PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC) | |
669 | { | |
670 | if (oldC == newC) | |
671 | return this; | |
672 | unsigned i; | |
673 | for (i = 0; i != m_length; ++i) | |
674 | if (m_data[i] == oldC) | |
675 | break; | |
676 | if (i == m_length) | |
677 | return this; | |
678 | ||
679 | UChar* data; | |
b80e6193 | 680 | RefPtr<StringImpl> newImpl = createUninitialized(m_length, data); |
4e4e5a6f A |
681 | |
682 | for (i = 0; i != m_length; ++i) { | |
683 | UChar ch = m_data[i]; | |
684 | if (ch == oldC) | |
685 | ch = newC; | |
686 | data[i] = ch; | |
687 | } | |
b80e6193 | 688 | return newImpl.release(); |
4e4e5a6f A |
689 | } |
690 | ||
691 | PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) | |
692 | { | |
693 | position = min(position, length()); | |
694 | lengthToReplace = min(lengthToReplace, length() - position); | |
695 | unsigned lengthToInsert = str ? str->length() : 0; | |
696 | if (!lengthToReplace && !lengthToInsert) | |
697 | return this; | |
698 | UChar* data; | |
699 | ||
700 | if ((length() - lengthToReplace) >= (numeric_limits<unsigned>::max() - lengthToInsert)) | |
701 | CRASH(); | |
702 | ||
b80e6193 | 703 | RefPtr<StringImpl> newImpl = |
4e4e5a6f A |
704 | createUninitialized(length() - lengthToReplace + lengthToInsert, data); |
705 | memcpy(data, characters(), position * sizeof(UChar)); | |
706 | if (str) | |
707 | memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar)); | |
708 | memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace, | |
709 | (length() - position - lengthToReplace) * sizeof(UChar)); | |
b80e6193 | 710 | return newImpl.release(); |
4e4e5a6f A |
711 | } |
712 | ||
713 | PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement) | |
714 | { | |
715 | if (!replacement) | |
716 | return this; | |
717 | ||
718 | int repStrLength = replacement->length(); | |
719 | int srcSegmentStart = 0; | |
720 | unsigned matchCount = 0; | |
721 | ||
722 | // Count the matches | |
723 | while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { | |
724 | ++matchCount; | |
725 | ++srcSegmentStart; | |
726 | } | |
727 | ||
728 | // If we have 0 matches, we don't have to do any more work | |
729 | if (!matchCount) | |
730 | return this; | |
731 | ||
732 | if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength) | |
733 | CRASH(); | |
734 | ||
735 | unsigned replaceSize = matchCount * repStrLength; | |
736 | unsigned newSize = m_length - matchCount; | |
737 | if (newSize >= (numeric_limits<unsigned>::max() - replaceSize)) | |
738 | CRASH(); | |
739 | ||
740 | newSize += replaceSize; | |
741 | ||
742 | UChar* data; | |
b80e6193 | 743 | RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
4e4e5a6f A |
744 | |
745 | // Construct the new data | |
746 | int srcSegmentEnd; | |
747 | int srcSegmentLength; | |
748 | srcSegmentStart = 0; | |
749 | int dstOffset = 0; | |
750 | ||
751 | while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { | |
752 | srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
753 | memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); | |
754 | dstOffset += srcSegmentLength; | |
755 | memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); | |
756 | dstOffset += repStrLength; | |
757 | srcSegmentStart = srcSegmentEnd + 1; | |
758 | } | |
759 | ||
760 | srcSegmentLength = m_length - srcSegmentStart; | |
761 | memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); | |
762 | ||
763 | ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); | |
764 | ||
b80e6193 | 765 | return newImpl.release(); |
4e4e5a6f A |
766 | } |
767 | ||
768 | PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement) | |
769 | { | |
770 | if (!pattern || !replacement) | |
771 | return this; | |
772 | ||
773 | int patternLength = pattern->length(); | |
774 | if (!patternLength) | |
775 | return this; | |
776 | ||
777 | int repStrLength = replacement->length(); | |
778 | int srcSegmentStart = 0; | |
779 | unsigned matchCount = 0; | |
780 | ||
781 | // Count the matches | |
782 | while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { | |
783 | ++matchCount; | |
784 | srcSegmentStart += patternLength; | |
785 | } | |
786 | ||
787 | // If we have 0 matches, we don't have to do any more work | |
788 | if (!matchCount) | |
789 | return this; | |
790 | ||
791 | unsigned newSize = m_length - matchCount * patternLength; | |
792 | if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength) | |
793 | CRASH(); | |
794 | ||
795 | if (newSize > (numeric_limits<unsigned>::max() - matchCount * repStrLength)) | |
796 | CRASH(); | |
797 | ||
798 | newSize += matchCount * repStrLength; | |
799 | ||
800 | UChar* data; | |
b80e6193 | 801 | RefPtr<StringImpl> newImpl = createUninitialized(newSize, data); |
4e4e5a6f A |
802 | |
803 | // Construct the new data | |
804 | int srcSegmentEnd; | |
805 | int srcSegmentLength; | |
806 | srcSegmentStart = 0; | |
807 | int dstOffset = 0; | |
808 | ||
809 | while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { | |
810 | srcSegmentLength = srcSegmentEnd - srcSegmentStart; | |
811 | memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); | |
812 | dstOffset += srcSegmentLength; | |
813 | memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); | |
814 | dstOffset += repStrLength; | |
815 | srcSegmentStart = srcSegmentEnd + patternLength; | |
816 | } | |
817 | ||
818 | srcSegmentLength = m_length - srcSegmentStart; | |
819 | memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); | |
820 | ||
821 | ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length())); | |
822 | ||
b80e6193 | 823 | return newImpl.release(); |
4e4e5a6f A |
824 | } |
825 | ||
826 | bool equal(const StringImpl* a, const StringImpl* b) | |
827 | { | |
828 | return StringHash::equal(a, b); | |
829 | } | |
830 | ||
831 | bool equal(const StringImpl* a, const char* b) | |
832 | { | |
833 | if (!a) | |
834 | return !b; | |
835 | if (!b) | |
836 | return !a; | |
837 | ||
838 | unsigned length = a->length(); | |
839 | const UChar* as = a->characters(); | |
840 | for (unsigned i = 0; i != length; ++i) { | |
841 | unsigned char bc = b[i]; | |
842 | if (!bc) | |
843 | return false; | |
844 | if (as[i] != bc) | |
845 | return false; | |
846 | } | |
847 | ||
848 | return !b[length]; | |
849 | } | |
850 | ||
851 | bool equalIgnoringCase(StringImpl* a, StringImpl* b) | |
852 | { | |
853 | return CaseFoldingHash::equal(a, b); | |
854 | } | |
855 | ||
856 | bool equalIgnoringCase(StringImpl* a, const char* b) | |
857 | { | |
858 | if (!a) | |
859 | return !b; | |
860 | if (!b) | |
861 | return !a; | |
862 | ||
863 | unsigned length = a->length(); | |
864 | const UChar* as = a->characters(); | |
865 | ||
866 | // Do a faster loop for the case where all the characters are ASCII. | |
867 | UChar ored = 0; | |
868 | bool equal = true; | |
869 | for (unsigned i = 0; i != length; ++i) { | |
870 | char bc = b[i]; | |
871 | if (!bc) | |
872 | return false; | |
873 | UChar ac = as[i]; | |
874 | ored |= ac; | |
875 | equal = equal && (toASCIILower(ac) == toASCIILower(bc)); | |
876 | } | |
877 | ||
878 | // Do a slower implementation for cases that include non-ASCII characters. | |
879 | if (ored & ~0x7F) { | |
880 | equal = true; | |
881 | for (unsigned i = 0; i != length; ++i) { | |
882 | unsigned char bc = b[i]; | |
883 | equal = equal && (foldCase(as[i]) == foldCase(bc)); | |
884 | } | |
885 | } | |
886 | ||
887 | return equal && !b[length]; | |
888 | } | |
889 | ||
890 | bool equalIgnoringNullity(StringImpl* a, StringImpl* b) | |
891 | { | |
892 | if (StringHash::equal(a, b)) | |
893 | return true; | |
894 | if (!a && b && !b->length()) | |
895 | return true; | |
896 | if (!b && a && !a->length()) | |
897 | return true; | |
898 | ||
899 | return false; | |
900 | } | |
901 | ||
902 | Vector<char> StringImpl::ascii() | |
903 | { | |
904 | Vector<char> buffer(m_length + 1); | |
905 | for (unsigned i = 0; i != m_length; ++i) { | |
906 | UChar c = m_data[i]; | |
907 | if ((c >= 0x20 && c < 0x7F) || c == 0x00) | |
908 | buffer[i] = static_cast<char>(c); | |
909 | else | |
910 | buffer[i] = '?'; | |
911 | } | |
912 | buffer[m_length] = '\0'; | |
913 | return buffer; | |
914 | } | |
915 | ||
916 | WTF::Unicode::Direction StringImpl::defaultWritingDirection() | |
917 | { | |
918 | for (unsigned i = 0; i < m_length; ++i) { | |
919 | WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]); | |
920 | if (charDirection == WTF::Unicode::LeftToRight) | |
921 | return WTF::Unicode::LeftToRight; | |
922 | if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) | |
923 | return WTF::Unicode::RightToLeft; | |
924 | } | |
925 | return WTF::Unicode::LeftToRight; | |
926 | } | |
927 | ||
928 | // This is a hot function because it's used when parsing HTML. | |
929 | PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length) | |
930 | { | |
931 | StringBuffer strippedCopy(length); | |
932 | unsigned strippedLength = 0; | |
933 | for (unsigned i = 0; i < length; i++) { | |
934 | if (int c = characters[i]) | |
935 | strippedCopy[strippedLength++] = c; | |
936 | } | |
937 | ASSERT(strippedLength < length); // Only take the slow case when stripping. | |
938 | strippedCopy.shrink(strippedLength); | |
939 | return adopt(strippedCopy); | |
940 | } | |
941 | ||
942 | PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer) | |
943 | { | |
944 | unsigned length = buffer.length(); | |
945 | if (length == 0) | |
946 | return empty(); | |
947 | return adoptRef(new StringImpl(buffer.release(), length)); | |
948 | } | |
949 | ||
950 | int StringImpl::wordCount(int maxWordsToCount) | |
951 | { | |
952 | unsigned wordCount = 0; | |
953 | unsigned i; | |
954 | bool atWord = false; | |
955 | for (i = 0; i < m_length; i++) { | |
956 | if (u_isspace(m_data[i])) { | |
957 | atWord = false; | |
958 | } else if (!atWord) { | |
959 | wordCount++; | |
960 | if (wordCount >= (unsigned)maxWordsToCount) | |
961 | return wordCount; | |
962 | atWord = true; | |
963 | } | |
964 | } | |
965 | return wordCount; | |
966 | } | |
967 | ||
968 | PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string) | |
969 | { | |
970 | // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer | |
b80e6193 | 971 | // get allocated in a single memory block. |
4e4e5a6f | 972 | UChar* data; |
b80e6193 A |
973 | unsigned length = string.m_length; |
974 | if (length >= numeric_limits<unsigned>::max()) | |
975 | CRASH(); | |
4e4e5a6f A |
976 | RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data); |
977 | memcpy(data, string.m_data, length * sizeof(UChar)); | |
978 | data[length] = 0; | |
979 | terminatedString->m_length--; | |
980 | terminatedString->m_hash = string.m_hash; | |
981 | terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter; | |
982 | return terminatedString.release(); | |
983 | } | |
984 | ||
985 | PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const | |
986 | { | |
987 | return create(m_data, m_length); | |
988 | } | |
989 | ||
990 | PassRefPtr<StringImpl> StringImpl::crossThreadString() | |
991 | { | |
992 | if (SharedUChar* sharedBuffer = this->sharedBuffer()) | |
993 | return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy())); | |
994 | ||
995 | // If no shared buffer is available, create a copy. | |
996 | return threadsafeCopy(); | |
997 | } | |
998 | ||
999 | } // namespace WebCore |