]>
Commit | Line | Data |
---|---|---|
9dae56ea A |
1 | /* |
2 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) | |
3 | * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. | |
4 | * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) | |
5 | * Copyright (c) 2009, Google Inc. All rights reserved. | |
6 | * | |
7 | * This library is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU Library General Public | |
9 | * License as published by the Free Software Foundation; either | |
10 | * version 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | * This library is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | * Library General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU Library General Public License | |
18 | * along with this library; see the file COPYING.LIB. If not, write to | |
19 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
20 | * Boston, MA 02110-1301, USA. | |
21 | * | |
22 | */ | |
23 | ||
24 | #include "config.h" | |
25 | #include "UString.h" | |
26 | ||
27 | #include "JSGlobalObjectFunctions.h" | |
28 | #include "Collector.h" | |
29 | #include "dtoa.h" | |
30 | #include "Identifier.h" | |
31 | #include "Operations.h" | |
32 | #include <ctype.h> | |
33 | #include <float.h> | |
34 | #include <limits.h> | |
35 | #include <math.h> | |
36 | #include <stdio.h> | |
37 | #include <stdlib.h> | |
38 | #include <wtf/ASCIICType.h> | |
39 | #include <wtf/Assertions.h> | |
40 | #include <wtf/MathExtras.h> | |
41 | #include <wtf/Vector.h> | |
42 | #include <wtf/unicode/UTF8.h> | |
43 | ||
44 | #if HAVE(STRING_H) | |
45 | #include <string.h> | |
46 | #endif | |
47 | #if HAVE(STRINGS_H) | |
48 | #include <strings.h> | |
49 | #endif | |
50 | ||
51 | using namespace WTF; | |
52 | using namespace WTF::Unicode; | |
53 | using namespace std; | |
54 | ||
55 | // This can be tuned differently per platform by putting platform #ifs right here. | |
56 | // If you don't define this macro at all, then copyChars will just call directly | |
57 | // to memcpy. | |
58 | #define USTRING_COPY_CHARS_INLINE_CUTOFF 20 | |
59 | ||
60 | namespace JSC { | |
61 | ||
62 | extern const double NaN; | |
63 | extern const double Inf; | |
64 | ||
65 | static inline size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); } | |
66 | static inline size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); } | |
67 | ||
68 | static inline UChar* allocChars(size_t length) | |
69 | { | |
70 | ASSERT(length); | |
71 | if (length > maxUChars()) | |
72 | return 0; | |
73 | return static_cast<UChar*>(tryFastMalloc(sizeof(UChar) * length)); | |
74 | } | |
75 | ||
76 | static inline UChar* reallocChars(UChar* buffer, size_t length) | |
77 | { | |
78 | ASSERT(length); | |
79 | if (length > maxUChars()) | |
80 | return 0; | |
81 | return static_cast<UChar*>(tryFastRealloc(buffer, sizeof(UChar) * length)); | |
82 | } | |
83 | ||
84 | static inline void copyChars(UChar* destination, const UChar* source, unsigned numCharacters) | |
85 | { | |
86 | #ifdef USTRING_COPY_CHARS_INLINE_CUTOFF | |
87 | if (numCharacters <= USTRING_COPY_CHARS_INLINE_CUTOFF) { | |
88 | for (unsigned i = 0; i < numCharacters; ++i) | |
89 | destination[i] = source[i]; | |
90 | return; | |
91 | } | |
92 | #endif | |
93 | memcpy(destination, source, numCharacters * sizeof(UChar)); | |
94 | } | |
95 | ||
96 | COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes) | |
97 | ||
98 | CString::CString(const char* c) | |
99 | : m_length(strlen(c)) | |
100 | , m_data(new char[m_length + 1]) | |
101 | { | |
102 | memcpy(m_data, c, m_length + 1); | |
103 | } | |
104 | ||
105 | CString::CString(const char* c, size_t length) | |
106 | : m_length(length) | |
107 | , m_data(new char[length + 1]) | |
108 | { | |
109 | memcpy(m_data, c, m_length); | |
110 | m_data[m_length] = 0; | |
111 | } | |
112 | ||
113 | CString::CString(const CString& b) | |
114 | { | |
115 | m_length = b.m_length; | |
116 | if (b.m_data) { | |
117 | m_data = new char[m_length + 1]; | |
118 | memcpy(m_data, b.m_data, m_length + 1); | |
119 | } else | |
120 | m_data = 0; | |
121 | } | |
122 | ||
123 | CString::~CString() | |
124 | { | |
125 | delete [] m_data; | |
126 | } | |
127 | ||
128 | CString CString::adopt(char* c, size_t length) | |
129 | { | |
130 | CString s; | |
131 | s.m_data = c; | |
132 | s.m_length = length; | |
133 | return s; | |
134 | } | |
135 | ||
136 | CString& CString::append(const CString& t) | |
137 | { | |
138 | char* n; | |
139 | n = new char[m_length + t.m_length + 1]; | |
140 | if (m_length) | |
141 | memcpy(n, m_data, m_length); | |
142 | if (t.m_length) | |
143 | memcpy(n + m_length, t.m_data, t.m_length); | |
144 | m_length += t.m_length; | |
145 | n[m_length] = 0; | |
146 | ||
147 | delete [] m_data; | |
148 | m_data = n; | |
149 | ||
150 | return *this; | |
151 | } | |
152 | ||
153 | CString& CString::operator=(const char* c) | |
154 | { | |
155 | if (m_data) | |
156 | delete [] m_data; | |
157 | m_length = strlen(c); | |
158 | m_data = new char[m_length + 1]; | |
159 | memcpy(m_data, c, m_length + 1); | |
160 | ||
161 | return *this; | |
162 | } | |
163 | ||
164 | CString& CString::operator=(const CString& str) | |
165 | { | |
166 | if (this == &str) | |
167 | return *this; | |
168 | ||
169 | if (m_data) | |
170 | delete [] m_data; | |
171 | m_length = str.m_length; | |
172 | if (str.m_data) { | |
173 | m_data = new char[m_length + 1]; | |
174 | memcpy(m_data, str.m_data, m_length + 1); | |
175 | } else | |
176 | m_data = 0; | |
177 | ||
178 | return *this; | |
179 | } | |
180 | ||
181 | bool operator==(const CString& c1, const CString& c2) | |
182 | { | |
183 | size_t len = c1.size(); | |
184 | return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0); | |
185 | } | |
186 | ||
187 | // These static strings are immutable, except for rc, whose initial value is chosen to | |
188 | // reduce the possibility of it becoming zero due to ref/deref not being thread-safe. | |
189 | static UChar sharedEmptyChar; | |
190 | UString::BaseString* UString::Rep::nullBaseString; | |
191 | UString::BaseString* UString::Rep::emptyBaseString; | |
192 | UString* UString::nullUString; | |
193 | ||
194 | static void initializeStaticBaseString(int len, UChar* buf, UString::BaseString& base) | |
195 | { | |
196 | base.offset = 0; | |
197 | base.len = len; | |
198 | base.rc = INT_MAX / 2; | |
199 | base._hash = 0; | |
200 | base.m_identifierTableAndFlags.setFlag(UString::Rep::StaticFlag); | |
201 | base.m_baseString = 0; | |
202 | base.buf = buf; | |
203 | base.preCapacity = 0; | |
204 | base.usedPreCapacity = 0; | |
205 | base.capacity = 0; | |
206 | base.usedCapacity = 0; | |
207 | base.reportedCost = 0; | |
208 | base.checkConsistency(); | |
209 | } | |
210 | ||
211 | void initializeUString() | |
212 | { | |
213 | UString::Rep::nullBaseString = new UString::BaseString; | |
214 | initializeStaticBaseString(0, 0, *UString::Rep::nullBaseString); | |
215 | ||
216 | UString::Rep::emptyBaseString = new UString::BaseString; | |
217 | initializeStaticBaseString(0, &sharedEmptyChar, *UString::Rep::emptyBaseString); | |
218 | ||
219 | UString::nullUString = new UString; | |
220 | } | |
221 | ||
222 | static char* statBuffer = 0; // Only used for debugging via UString::ascii(). | |
223 | ||
224 | PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar* d, int l) | |
225 | { | |
226 | UChar* copyD = static_cast<UChar*>(fastMalloc(l * sizeof(UChar))); | |
227 | copyChars(copyD, d, l); | |
228 | return create(copyD, l); | |
229 | } | |
230 | ||
231 | PassRefPtr<UString::Rep> UString::Rep::create(UChar* d, int l) | |
232 | { | |
233 | BaseString* r = new BaseString; | |
234 | r->offset = 0; | |
235 | r->len = l; | |
236 | r->rc = 1; | |
237 | r->_hash = 0; | |
238 | r->m_baseString = 0; | |
239 | r->reportedCost = 0; | |
240 | r->buf = d; | |
241 | r->usedCapacity = l; | |
242 | r->capacity = l; | |
243 | r->usedPreCapacity = 0; | |
244 | r->preCapacity = 0; | |
245 | ||
246 | r->checkConsistency(); | |
247 | ||
248 | // steal the single reference this Rep was created with | |
249 | return adoptRef(r); | |
250 | } | |
251 | ||
252 | PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> rep, int offset, int length) | |
253 | { | |
254 | ASSERT(rep); | |
255 | rep->checkConsistency(); | |
256 | ||
257 | int repOffset = rep->offset; | |
258 | ||
259 | PassRefPtr<BaseString> base = rep->baseString(); | |
260 | ||
261 | ASSERT(-(offset + repOffset) <= base->usedPreCapacity); | |
262 | ASSERT(offset + repOffset + length <= base->usedCapacity); | |
263 | ||
264 | Rep* r = new Rep; | |
265 | r->offset = repOffset + offset; | |
266 | r->len = length; | |
267 | r->rc = 1; | |
268 | r->_hash = 0; | |
269 | r->setBaseString(base); | |
270 | ||
271 | r->checkConsistency(); | |
272 | ||
273 | // steal the single reference this Rep was created with | |
274 | return adoptRef(r); | |
275 | } | |
276 | ||
277 | PassRefPtr<UString::Rep> UString::Rep::createFromUTF8(const char* string) | |
278 | { | |
279 | if (!string) | |
280 | return &UString::Rep::null(); | |
281 | ||
282 | size_t length = strlen(string); | |
283 | Vector<UChar, 1024> buffer(length); | |
284 | UChar* p = buffer.data(); | |
285 | if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length)) | |
286 | return &UString::Rep::null(); | |
287 | ||
288 | return UString::Rep::createCopying(buffer.data(), p - buffer.data()); | |
289 | } | |
290 | ||
291 | void UString::Rep::destroy() | |
292 | { | |
293 | checkConsistency(); | |
294 | ||
295 | // Static null and empty strings can never be destroyed, but we cannot rely on | |
296 | // reference counting, because ref/deref are not thread-safe. | |
297 | if (!isStatic()) { | |
298 | if (identifierTable()) | |
299 | Identifier::remove(this); | |
300 | UString::BaseString* base = baseString(); | |
301 | if (base == this) | |
302 | fastFree(base->buf); | |
303 | else | |
304 | base->deref(); | |
305 | ||
306 | delete this; | |
307 | } | |
308 | } | |
309 | ||
310 | // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's | |
311 | // or anything like that. | |
312 | const unsigned PHI = 0x9e3779b9U; | |
313 | ||
314 | // Paul Hsieh's SuperFastHash | |
315 | // http://www.azillionmonkeys.com/qed/hash.html | |
316 | unsigned UString::Rep::computeHash(const UChar* s, int len) | |
317 | { | |
318 | unsigned l = len; | |
319 | uint32_t hash = PHI; | |
320 | uint32_t tmp; | |
321 | ||
322 | int rem = l & 1; | |
323 | l >>= 1; | |
324 | ||
325 | // Main loop | |
326 | for (; l > 0; l--) { | |
327 | hash += s[0]; | |
328 | tmp = (s[1] << 11) ^ hash; | |
329 | hash = (hash << 16) ^ tmp; | |
330 | s += 2; | |
331 | hash += hash >> 11; | |
332 | } | |
333 | ||
334 | // Handle end case | |
335 | if (rem) { | |
336 | hash += s[0]; | |
337 | hash ^= hash << 11; | |
338 | hash += hash >> 17; | |
339 | } | |
340 | ||
341 | // Force "avalanching" of final 127 bits | |
342 | hash ^= hash << 3; | |
343 | hash += hash >> 5; | |
344 | hash ^= hash << 2; | |
345 | hash += hash >> 15; | |
346 | hash ^= hash << 10; | |
347 | ||
348 | // this avoids ever returning a hash code of 0, since that is used to | |
349 | // signal "hash not computed yet", using a value that is likely to be | |
350 | // effectively the same as 0 when the low bits are masked | |
351 | if (hash == 0) | |
352 | hash = 0x80000000; | |
353 | ||
354 | return hash; | |
355 | } | |
356 | ||
357 | // Paul Hsieh's SuperFastHash | |
358 | // http://www.azillionmonkeys.com/qed/hash.html | |
359 | unsigned UString::Rep::computeHash(const char* s, int l) | |
360 | { | |
361 | // This hash is designed to work on 16-bit chunks at a time. But since the normal case | |
362 | // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they | |
363 | // were 16-bit chunks, which should give matching results | |
364 | ||
365 | uint32_t hash = PHI; | |
366 | uint32_t tmp; | |
367 | ||
368 | size_t rem = l & 1; | |
369 | l >>= 1; | |
370 | ||
371 | // Main loop | |
372 | for (; l > 0; l--) { | |
373 | hash += static_cast<unsigned char>(s[0]); | |
374 | tmp = (static_cast<unsigned char>(s[1]) << 11) ^ hash; | |
375 | hash = (hash << 16) ^ tmp; | |
376 | s += 2; | |
377 | hash += hash >> 11; | |
378 | } | |
379 | ||
380 | // Handle end case | |
381 | if (rem) { | |
382 | hash += static_cast<unsigned char>(s[0]); | |
383 | hash ^= hash << 11; | |
384 | hash += hash >> 17; | |
385 | } | |
386 | ||
387 | // Force "avalanching" of final 127 bits | |
388 | hash ^= hash << 3; | |
389 | hash += hash >> 5; | |
390 | hash ^= hash << 2; | |
391 | hash += hash >> 15; | |
392 | hash ^= hash << 10; | |
393 | ||
394 | // this avoids ever returning a hash code of 0, since that is used to | |
395 | // signal "hash not computed yet", using a value that is likely to be | |
396 | // effectively the same as 0 when the low bits are masked | |
397 | if (hash == 0) | |
398 | hash = 0x80000000; | |
399 | ||
400 | return hash; | |
401 | } | |
402 | ||
403 | #ifndef NDEBUG | |
404 | void UString::Rep::checkConsistency() const | |
405 | { | |
406 | const UString::BaseString* base = baseString(); | |
407 | ||
408 | // There is no recursion for base strings. | |
409 | ASSERT(base == base->baseString()); | |
410 | ||
411 | if (isStatic()) { | |
412 | // There are only two static strings: null and empty. | |
413 | ASSERT(!len); | |
414 | ||
415 | // Static strings cannot get in identifier tables, because they are globally shared. | |
416 | ASSERT(!identifierTable()); | |
417 | } | |
418 | ||
419 | // The string fits in buffer. | |
420 | ASSERT(base->usedPreCapacity <= base->preCapacity); | |
421 | ASSERT(base->usedCapacity <= base->capacity); | |
422 | ASSERT(-offset <= base->usedPreCapacity); | |
423 | ASSERT(offset + len <= base->usedCapacity); | |
424 | } | |
425 | #endif | |
426 | ||
427 | // put these early so they can be inlined | |
428 | static inline size_t expandedSize(size_t size, size_t otherSize) | |
429 | { | |
430 | // Do the size calculation in two parts, returning overflowIndicator if | |
431 | // we overflow the maximum value that we can handle. | |
432 | ||
433 | if (size > maxUChars()) | |
434 | return overflowIndicator(); | |
435 | ||
436 | size_t expandedSize = ((size + 10) / 10 * 11) + 1; | |
437 | if (maxUChars() - expandedSize < otherSize) | |
438 | return overflowIndicator(); | |
439 | ||
440 | return expandedSize + otherSize; | |
441 | } | |
442 | ||
443 | static inline bool expandCapacity(UString::Rep* rep, int requiredLength) | |
444 | { | |
445 | rep->checkConsistency(); | |
446 | ||
447 | UString::BaseString* base = rep->baseString(); | |
448 | ||
449 | if (requiredLength > base->capacity) { | |
450 | size_t newCapacity = expandedSize(requiredLength, base->preCapacity); | |
451 | UChar* oldBuf = base->buf; | |
452 | base->buf = reallocChars(base->buf, newCapacity); | |
453 | if (!base->buf) { | |
454 | base->buf = oldBuf; | |
455 | return false; | |
456 | } | |
457 | base->capacity = newCapacity - base->preCapacity; | |
458 | } | |
459 | if (requiredLength > base->usedCapacity) | |
460 | base->usedCapacity = requiredLength; | |
461 | ||
462 | rep->checkConsistency(); | |
463 | return true; | |
464 | } | |
465 | ||
466 | void UString::expandCapacity(int requiredLength) | |
467 | { | |
468 | if (!JSC::expandCapacity(m_rep.get(), requiredLength)) | |
469 | makeNull(); | |
470 | } | |
471 | ||
472 | void UString::expandPreCapacity(int requiredPreCap) | |
473 | { | |
474 | m_rep->checkConsistency(); | |
475 | ||
476 | BaseString* base = m_rep->baseString(); | |
477 | ||
478 | if (requiredPreCap > base->preCapacity) { | |
479 | size_t newCapacity = expandedSize(requiredPreCap, base->capacity); | |
480 | int delta = newCapacity - base->capacity - base->preCapacity; | |
481 | ||
482 | UChar* newBuf = allocChars(newCapacity); | |
483 | if (!newBuf) { | |
484 | makeNull(); | |
485 | return; | |
486 | } | |
487 | copyChars(newBuf + delta, base->buf, base->capacity + base->preCapacity); | |
488 | fastFree(base->buf); | |
489 | base->buf = newBuf; | |
490 | ||
491 | base->preCapacity = newCapacity - base->capacity; | |
492 | } | |
493 | if (requiredPreCap > base->usedPreCapacity) | |
494 | base->usedPreCapacity = requiredPreCap; | |
495 | ||
496 | m_rep->checkConsistency(); | |
497 | } | |
498 | ||
499 | static PassRefPtr<UString::Rep> createRep(const char* c) | |
500 | { | |
501 | if (!c) | |
502 | return &UString::Rep::null(); | |
503 | ||
504 | if (!c[0]) | |
505 | return &UString::Rep::empty(); | |
506 | ||
507 | size_t length = strlen(c); | |
508 | UChar* d = allocChars(length); | |
509 | if (!d) | |
510 | return &UString::Rep::null(); | |
511 | else { | |
512 | for (size_t i = 0; i < length; i++) | |
513 | d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend | |
514 | return UString::Rep::create(d, static_cast<int>(length)); | |
515 | } | |
516 | ||
517 | } | |
518 | ||
519 | UString::UString(const char* c) | |
520 | : m_rep(createRep(c)) | |
521 | { | |
522 | } | |
523 | ||
524 | UString::UString(const UChar* c, int length) | |
525 | { | |
526 | if (length == 0) | |
527 | m_rep = &Rep::empty(); | |
528 | else | |
529 | m_rep = Rep::createCopying(c, length); | |
530 | } | |
531 | ||
532 | UString::UString(UChar* c, int length, bool copy) | |
533 | { | |
534 | if (length == 0) | |
535 | m_rep = &Rep::empty(); | |
536 | else if (copy) | |
537 | m_rep = Rep::createCopying(c, length); | |
538 | else | |
539 | m_rep = Rep::create(c, length); | |
540 | } | |
541 | ||
542 | UString::UString(const Vector<UChar>& buffer) | |
543 | { | |
544 | if (!buffer.size()) | |
545 | m_rep = &Rep::empty(); | |
546 | else | |
547 | m_rep = Rep::createCopying(buffer.data(), buffer.size()); | |
548 | } | |
549 | ||
550 | static ALWAYS_INLINE int newCapacityWithOverflowCheck(const int currentCapacity, const int extendLength, const bool plusOne = false) | |
551 | { | |
552 | ASSERT_WITH_MESSAGE(extendLength >= 0, "extendedLength = %d", extendLength); | |
553 | ||
554 | const int plusLength = plusOne ? 1 : 0; | |
555 | if (currentCapacity > std::numeric_limits<int>::max() - extendLength - plusLength) | |
556 | CRASH(); | |
557 | ||
558 | return currentCapacity + extendLength + plusLength; | |
559 | } | |
560 | ||
561 | static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const UChar* tData, int tSize) | |
562 | { | |
563 | RefPtr<UString::Rep> rep = r; | |
564 | ||
565 | rep->checkConsistency(); | |
566 | ||
567 | int thisSize = rep->size(); | |
568 | int thisOffset = rep->offset; | |
569 | int length = thisSize + tSize; | |
570 | UString::BaseString* base = rep->baseString(); | |
571 | ||
572 | // possible cases: | |
573 | if (tSize == 0) { | |
574 | // t is empty | |
575 | } else if (thisSize == 0) { | |
576 | // this is empty | |
577 | rep = UString::Rep::createCopying(tData, tSize); | |
578 | } else if (rep == base && rep->rc == 1) { | |
579 | // this is direct and has refcount of 1 (so we can just alter it directly) | |
580 | if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length))) | |
581 | rep = &UString::Rep::null(); | |
582 | if (rep->data()) { | |
583 | copyChars(rep->data() + thisSize, tData, tSize); | |
584 | rep->len = length; | |
585 | rep->_hash = 0; | |
586 | } | |
587 | } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) { | |
588 | // this reaches the end of the buffer - extend it if it's long enough to append to | |
589 | if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length))) | |
590 | rep = &UString::Rep::null(); | |
591 | if (rep->data()) { | |
592 | copyChars(rep->data() + thisSize, tData, tSize); | |
593 | rep = UString::Rep::create(rep, 0, length); | |
594 | } | |
595 | } else { | |
596 | // this is shared with someone using more capacity, gotta make a whole new string | |
597 | size_t newCapacity = expandedSize(length, 0); | |
598 | UChar* d = allocChars(newCapacity); | |
599 | if (!d) | |
600 | rep = &UString::Rep::null(); | |
601 | else { | |
602 | copyChars(d, rep->data(), thisSize); | |
603 | copyChars(d + thisSize, tData, tSize); | |
604 | rep = UString::Rep::create(d, length); | |
605 | rep->baseString()->capacity = newCapacity; | |
606 | } | |
607 | } | |
608 | ||
609 | rep->checkConsistency(); | |
610 | ||
611 | return rep.release(); | |
612 | } | |
613 | ||
614 | static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const char* t) | |
615 | { | |
616 | RefPtr<UString::Rep> rep = r; | |
617 | ||
618 | rep->checkConsistency(); | |
619 | ||
620 | int thisSize = rep->size(); | |
621 | int thisOffset = rep->offset; | |
622 | int tSize = static_cast<int>(strlen(t)); | |
623 | int length = thisSize + tSize; | |
624 | UString::BaseString* base = rep->baseString(); | |
625 | ||
626 | // possible cases: | |
627 | if (thisSize == 0) { | |
628 | // this is empty | |
629 | rep = createRep(t); | |
630 | } else if (tSize == 0) { | |
631 | // t is empty, we'll just return *this below. | |
632 | } else if (rep == base && rep->rc == 1) { | |
633 | // this is direct and has refcount of 1 (so we can just alter it directly) | |
634 | expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)); | |
635 | UChar* d = rep->data(); | |
636 | if (d) { | |
637 | for (int i = 0; i < tSize; ++i) | |
638 | d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend | |
639 | rep->len = length; | |
640 | rep->_hash = 0; | |
641 | } | |
642 | } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) { | |
643 | // this string reaches the end of the buffer - extend it | |
644 | expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)); | |
645 | UChar* d = rep->data(); | |
646 | if (d) { | |
647 | for (int i = 0; i < tSize; ++i) | |
648 | d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend | |
649 | rep = UString::Rep::create(rep, 0, length); | |
650 | } | |
651 | } else { | |
652 | // this is shared with someone using more capacity, gotta make a whole new string | |
653 | size_t newCapacity = expandedSize(length, 0); | |
654 | UChar* d = allocChars(newCapacity); | |
655 | if (!d) | |
656 | rep = &UString::Rep::null(); | |
657 | else { | |
658 | copyChars(d, rep->data(), thisSize); | |
659 | for (int i = 0; i < tSize; ++i) | |
660 | d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend | |
661 | rep = UString::Rep::create(d, length); | |
662 | rep->baseString()->capacity = newCapacity; | |
663 | } | |
664 | } | |
665 | ||
666 | rep->checkConsistency(); | |
667 | ||
668 | return rep.release(); | |
669 | } | |
670 | ||
671 | PassRefPtr<UString::Rep> concatenate(UString::Rep* a, UString::Rep* b) | |
672 | { | |
673 | a->checkConsistency(); | |
674 | b->checkConsistency(); | |
675 | ||
676 | int aSize = a->size(); | |
677 | int aOffset = a->offset; | |
678 | int bSize = b->size(); | |
679 | int bOffset = b->offset; | |
680 | int length = aSize + bSize; | |
681 | ||
682 | // possible cases: | |
683 | ||
684 | // a is empty | |
685 | if (aSize == 0) | |
686 | return b; | |
687 | // b is empty | |
688 | if (bSize == 0) | |
689 | return a; | |
690 | ||
691 | UString::BaseString* aBase = a->baseString(); | |
692 | if (bSize == 1 && aOffset + aSize == aBase->usedCapacity && aOffset + length <= aBase->capacity) { | |
693 | // b is a single character (common fast case) | |
694 | aBase->usedCapacity = aOffset + length; | |
695 | a->data()[aSize] = b->data()[0]; | |
696 | return UString::Rep::create(a, 0, length); | |
697 | } | |
698 | ||
699 | UString::BaseString* bBase = b->baseString(); | |
700 | if (aOffset + aSize == aBase->usedCapacity && aSize >= minShareSize && 4 * aSize >= bSize | |
701 | && (-bOffset != bBase->usedPreCapacity || aSize >= bSize)) { | |
702 | // - a reaches the end of its buffer so it qualifies for shared append | |
703 | // - also, it's at least a quarter the length of b - appending to a much shorter | |
704 | // string does more harm than good | |
705 | // - however, if b qualifies for prepend and is longer than a, we'd rather prepend | |
706 | ||
707 | UString x(a); | |
708 | x.expandCapacity(newCapacityWithOverflowCheck(aOffset, length)); | |
709 | if (!a->data() || !x.data()) | |
710 | return 0; | |
711 | copyChars(a->data() + aSize, b->data(), bSize); | |
712 | PassRefPtr<UString::Rep> result = UString::Rep::create(a, 0, length); | |
713 | ||
714 | a->checkConsistency(); | |
715 | b->checkConsistency(); | |
716 | result->checkConsistency(); | |
717 | ||
718 | return result; | |
719 | } | |
720 | ||
721 | if (-bOffset == bBase->usedPreCapacity && bSize >= minShareSize && 4 * bSize >= aSize) { | |
722 | // - b reaches the beginning of its buffer so it qualifies for shared prepend | |
723 | // - also, it's at least a quarter the length of a - prepending to a much shorter | |
724 | // string does more harm than good | |
725 | UString y(b); | |
726 | y.expandPreCapacity(-bOffset + aSize); | |
727 | if (!b->data() || !y.data()) | |
728 | return 0; | |
729 | copyChars(b->data() - aSize, a->data(), aSize); | |
730 | PassRefPtr<UString::Rep> result = UString::Rep::create(b, -aSize, length); | |
731 | ||
732 | a->checkConsistency(); | |
733 | b->checkConsistency(); | |
734 | result->checkConsistency(); | |
735 | ||
736 | return result; | |
737 | } | |
738 | ||
739 | // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string | |
740 | size_t newCapacity = expandedSize(length, 0); | |
741 | UChar* d = allocChars(newCapacity); | |
742 | if (!d) | |
743 | return 0; | |
744 | copyChars(d, a->data(), aSize); | |
745 | copyChars(d + aSize, b->data(), bSize); | |
746 | PassRefPtr<UString::Rep> result = UString::Rep::create(d, length); | |
747 | result->baseString()->capacity = newCapacity; | |
748 | ||
749 | a->checkConsistency(); | |
750 | b->checkConsistency(); | |
751 | result->checkConsistency(); | |
752 | ||
753 | return result; | |
754 | } | |
755 | ||
756 | PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, int i) | |
757 | { | |
758 | UChar buf[1 + sizeof(i) * 3]; | |
759 | UChar* end = buf + sizeof(buf) / sizeof(UChar); | |
760 | UChar* p = end; | |
761 | ||
762 | if (i == 0) | |
763 | *--p = '0'; | |
764 | else if (i == INT_MIN) { | |
765 | char minBuf[1 + sizeof(i) * 3]; | |
766 | sprintf(minBuf, "%d", INT_MIN); | |
767 | return concatenate(rep, minBuf); | |
768 | } else { | |
769 | bool negative = false; | |
770 | if (i < 0) { | |
771 | negative = true; | |
772 | i = -i; | |
773 | } | |
774 | while (i) { | |
775 | *--p = static_cast<unsigned short>((i % 10) + '0'); | |
776 | i /= 10; | |
777 | } | |
778 | if (negative) | |
779 | *--p = '-'; | |
780 | } | |
781 | ||
782 | return concatenate(rep, p, static_cast<int>(end - p)); | |
783 | ||
784 | } | |
785 | ||
786 | PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, double d) | |
787 | { | |
788 | // avoid ever printing -NaN, in JS conceptually there is only one NaN value | |
789 | if (isnan(d)) | |
790 | return concatenate(rep, "NaN"); | |
791 | ||
792 | if (d == 0.0) // stringify -0 as 0 | |
793 | d = 0.0; | |
794 | ||
795 | char buf[80]; | |
796 | int decimalPoint; | |
797 | int sign; | |
798 | ||
799 | char* result = WTF::dtoa(d, 0, &decimalPoint, &sign, NULL); | |
800 | int length = static_cast<int>(strlen(result)); | |
801 | ||
802 | int i = 0; | |
803 | if (sign) | |
804 | buf[i++] = '-'; | |
805 | ||
806 | if (decimalPoint <= 0 && decimalPoint > -6) { | |
807 | buf[i++] = '0'; | |
808 | buf[i++] = '.'; | |
809 | for (int j = decimalPoint; j < 0; j++) | |
810 | buf[i++] = '0'; | |
811 | strcpy(buf + i, result); | |
812 | } else if (decimalPoint <= 21 && decimalPoint > 0) { | |
813 | if (length <= decimalPoint) { | |
814 | strcpy(buf + i, result); | |
815 | i += length; | |
816 | for (int j = 0; j < decimalPoint - length; j++) | |
817 | buf[i++] = '0'; | |
818 | buf[i] = '\0'; | |
819 | } else { | |
820 | strncpy(buf + i, result, decimalPoint); | |
821 | i += decimalPoint; | |
822 | buf[i++] = '.'; | |
823 | strcpy(buf + i, result + decimalPoint); | |
824 | } | |
825 | } else if (result[0] < '0' || result[0] > '9') | |
826 | strcpy(buf + i, result); | |
827 | else { | |
828 | buf[i++] = result[0]; | |
829 | if (length > 1) { | |
830 | buf[i++] = '.'; | |
831 | strcpy(buf + i, result + 1); | |
832 | i += length - 1; | |
833 | } | |
834 | ||
835 | buf[i++] = 'e'; | |
836 | buf[i++] = (decimalPoint >= 0) ? '+' : '-'; | |
837 | // decimalPoint can't be more than 3 digits decimal given the | |
838 | // nature of float representation | |
839 | int exponential = decimalPoint - 1; | |
840 | if (exponential < 0) | |
841 | exponential = -exponential; | |
842 | if (exponential >= 100) | |
843 | buf[i++] = static_cast<char>('0' + exponential / 100); | |
844 | if (exponential >= 10) | |
845 | buf[i++] = static_cast<char>('0' + (exponential % 100) / 10); | |
846 | buf[i++] = static_cast<char>('0' + exponential % 10); | |
847 | buf[i++] = '\0'; | |
848 | } | |
849 | ||
850 | WTF::freedtoa(result); | |
851 | ||
852 | return concatenate(rep, buf); | |
853 | } | |
854 | ||
855 | UString UString::from(int i) | |
856 | { | |
857 | UChar buf[1 + sizeof(i) * 3]; | |
858 | UChar* end = buf + sizeof(buf) / sizeof(UChar); | |
859 | UChar* p = end; | |
860 | ||
861 | if (i == 0) | |
862 | *--p = '0'; | |
863 | else if (i == INT_MIN) { | |
864 | char minBuf[1 + sizeof(i) * 3]; | |
865 | snprintf(minBuf, 1 + sizeof(i) * 3, "%d", INT_MIN); | |
866 | return UString(minBuf); | |
867 | } else { | |
868 | bool negative = false; | |
869 | if (i < 0) { | |
870 | negative = true; | |
871 | i = -i; | |
872 | } | |
873 | while (i) { | |
874 | *--p = static_cast<unsigned short>((i % 10) + '0'); | |
875 | i /= 10; | |
876 | } | |
877 | if (negative) | |
878 | *--p = '-'; | |
879 | } | |
880 | ||
881 | return UString(p, static_cast<int>(end - p)); | |
882 | } | |
883 | ||
884 | UString UString::from(unsigned int u) | |
885 | { | |
886 | UChar buf[sizeof(u) * 3]; | |
887 | UChar* end = buf + sizeof(buf) / sizeof(UChar); | |
888 | UChar* p = end; | |
889 | ||
890 | if (u == 0) | |
891 | *--p = '0'; | |
892 | else { | |
893 | while (u) { | |
894 | *--p = static_cast<unsigned short>((u % 10) + '0'); | |
895 | u /= 10; | |
896 | } | |
897 | } | |
898 | ||
899 | return UString(p, static_cast<int>(end - p)); | |
900 | } | |
901 | ||
902 | UString UString::from(long l) | |
903 | { | |
904 | UChar buf[1 + sizeof(l) * 3]; | |
905 | UChar* end = buf + sizeof(buf) / sizeof(UChar); | |
906 | UChar* p = end; | |
907 | ||
908 | if (l == 0) | |
909 | *--p = '0'; | |
910 | else if (l == LONG_MIN) { | |
911 | char minBuf[1 + sizeof(l) * 3]; | |
912 | snprintf(minBuf, 1 + sizeof(l) * 3, "%ld", LONG_MIN); | |
913 | return UString(minBuf); | |
914 | } else { | |
915 | bool negative = false; | |
916 | if (l < 0) { | |
917 | negative = true; | |
918 | l = -l; | |
919 | } | |
920 | while (l) { | |
921 | *--p = static_cast<unsigned short>((l % 10) + '0'); | |
922 | l /= 10; | |
923 | } | |
924 | if (negative) | |
925 | *--p = '-'; | |
926 | } | |
927 | ||
928 | return UString(p, static_cast<int>(end - p)); | |
929 | } | |
930 | ||
931 | UString UString::from(double d) | |
932 | { | |
933 | // avoid ever printing -NaN, in JS conceptually there is only one NaN value | |
934 | if (isnan(d)) | |
935 | return "NaN"; | |
936 | ||
937 | char buf[80]; | |
938 | int decimalPoint; | |
939 | int sign; | |
940 | ||
941 | char* result = WTF::dtoa(d, 0, &decimalPoint, &sign, NULL); | |
942 | int length = static_cast<int>(strlen(result)); | |
943 | ||
944 | int i = 0; | |
945 | if (sign) | |
946 | buf[i++] = '-'; | |
947 | ||
948 | if (decimalPoint <= 0 && decimalPoint > -6) { | |
949 | buf[i++] = '0'; | |
950 | buf[i++] = '.'; | |
951 | for (int j = decimalPoint; j < 0; j++) | |
952 | buf[i++] = '0'; | |
953 | strlcpy(buf + i, result, sizeof(buf) - i); | |
954 | } else if (decimalPoint <= 21 && decimalPoint > 0) { | |
955 | if (length <= decimalPoint) { | |
956 | strlcpy(buf + i, result, sizeof(buf) - i); | |
957 | i += length; | |
958 | for (int j = 0; j < decimalPoint - length; j++) | |
959 | buf[i++] = '0'; | |
960 | buf[i] = '\0'; | |
961 | } else { | |
962 | int len = (decimalPoint <= static_cast<int>(sizeof(buf)) - i ? decimalPoint : sizeof(buf) - i); | |
963 | strncpy(buf + i, result, len); | |
964 | i += len; | |
965 | buf[i++] = '.'; | |
966 | strlcpy(buf + i, result + decimalPoint, sizeof(buf) - i); | |
967 | } | |
968 | } else if (result[0] < '0' || result[0] > '9') | |
969 | strlcpy(buf + i, result, sizeof(buf) - i); | |
970 | else { | |
971 | buf[i++] = result[0]; | |
972 | if (length > 1) { | |
973 | buf[i++] = '.'; | |
974 | strlcpy(buf + i, result + 1, sizeof(buf) - i); | |
975 | i += length - 1; | |
976 | } | |
977 | ||
978 | buf[i++] = 'e'; | |
979 | buf[i++] = (decimalPoint >= 0) ? '+' : '-'; | |
980 | // decimalPoint can't be more than 3 digits decimal given the | |
981 | // nature of float representation | |
982 | int exponential = decimalPoint - 1; | |
983 | if (exponential < 0) | |
984 | exponential = -exponential; | |
985 | if (exponential >= 100) | |
986 | buf[i++] = static_cast<char>('0' + exponential / 100); | |
987 | if (exponential >= 10) | |
988 | buf[i++] = static_cast<char>('0' + (exponential % 100) / 10); | |
989 | buf[i++] = static_cast<char>('0' + exponential % 10); | |
990 | buf[i++] = '\0'; | |
991 | ASSERT(i <= static_cast<int>(sizeof(buf))); | |
992 | } | |
993 | ||
994 | WTF::freedtoa(result); | |
995 | ||
996 | return UString(buf); | |
997 | } | |
998 | ||
999 | UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const | |
1000 | { | |
1001 | m_rep->checkConsistency(); | |
1002 | ||
1003 | if (rangeCount == 1 && separatorCount == 0) { | |
1004 | int thisSize = size(); | |
1005 | int position = substringRanges[0].position; | |
1006 | int length = substringRanges[0].length; | |
1007 | if (position <= 0 && length >= thisSize) | |
1008 | return *this; | |
1009 | return UString::Rep::create(m_rep, max(0, position), min(thisSize, length)); | |
1010 | } | |
1011 | ||
1012 | int totalLength = 0; | |
1013 | for (int i = 0; i < rangeCount; i++) | |
1014 | totalLength += substringRanges[i].length; | |
1015 | for (int i = 0; i < separatorCount; i++) | |
1016 | totalLength += separators[i].size(); | |
1017 | ||
1018 | if (totalLength == 0) | |
1019 | return ""; | |
1020 | ||
1021 | UChar* buffer = allocChars(totalLength); | |
1022 | if (!buffer) | |
1023 | return null(); | |
1024 | ||
1025 | int maxCount = max(rangeCount, separatorCount); | |
1026 | int bufferPos = 0; | |
1027 | for (int i = 0; i < maxCount; i++) { | |
1028 | if (i < rangeCount) { | |
1029 | copyChars(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length); | |
1030 | bufferPos += substringRanges[i].length; | |
1031 | } | |
1032 | if (i < separatorCount) { | |
1033 | copyChars(buffer + bufferPos, separators[i].data(), separators[i].size()); | |
1034 | bufferPos += separators[i].size(); | |
1035 | } | |
1036 | } | |
1037 | ||
1038 | return UString::Rep::create(buffer, totalLength); | |
1039 | } | |
1040 | ||
1041 | UString& UString::append(const UString &t) | |
1042 | { | |
1043 | m_rep->checkConsistency(); | |
1044 | t.rep()->checkConsistency(); | |
1045 | ||
1046 | int thisSize = size(); | |
1047 | int thisOffset = m_rep->offset; | |
1048 | int tSize = t.size(); | |
1049 | int length = thisSize + tSize; | |
1050 | BaseString* base = m_rep->baseString(); | |
1051 | ||
1052 | // possible cases: | |
1053 | if (thisSize == 0) { | |
1054 | // this is empty | |
1055 | *this = t; | |
1056 | } else if (tSize == 0) { | |
1057 | // t is empty | |
1058 | } else if (m_rep == base && m_rep->rc == 1) { | |
1059 | // this is direct and has refcount of 1 (so we can just alter it directly) | |
1060 | expandCapacity(newCapacityWithOverflowCheck(thisOffset, length)); | |
1061 | if (data()) { | |
1062 | copyChars(m_rep->data() + thisSize, t.data(), tSize); | |
1063 | m_rep->len = length; | |
1064 | m_rep->_hash = 0; | |
1065 | } | |
1066 | } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize) { | |
1067 | // this reaches the end of the buffer - extend it if it's long enough to append to | |
1068 | expandCapacity(newCapacityWithOverflowCheck(thisOffset, length)); | |
1069 | if (data()) { | |
1070 | copyChars(m_rep->data() + thisSize, t.data(), tSize); | |
1071 | m_rep = Rep::create(m_rep, 0, length); | |
1072 | } | |
1073 | } else { | |
1074 | // this is shared with someone using more capacity, gotta make a whole new string | |
1075 | size_t newCapacity = expandedSize(length, 0); | |
1076 | UChar* d = allocChars(newCapacity); | |
1077 | if (!d) | |
1078 | makeNull(); | |
1079 | else { | |
1080 | copyChars(d, data(), thisSize); | |
1081 | copyChars(d + thisSize, t.data(), tSize); | |
1082 | m_rep = Rep::create(d, length); | |
1083 | m_rep->baseString()->capacity = newCapacity; | |
1084 | } | |
1085 | } | |
1086 | ||
1087 | m_rep->checkConsistency(); | |
1088 | t.rep()->checkConsistency(); | |
1089 | ||
1090 | return *this; | |
1091 | } | |
1092 | ||
1093 | UString& UString::append(const UChar* tData, int tSize) | |
1094 | { | |
1095 | m_rep = concatenate(m_rep.release(), tData, tSize); | |
1096 | return *this; | |
1097 | } | |
1098 | ||
1099 | UString& UString::append(const char* t) | |
1100 | { | |
1101 | m_rep = concatenate(m_rep.release(), t); | |
1102 | return *this; | |
1103 | } | |
1104 | ||
1105 | UString& UString::append(UChar c) | |
1106 | { | |
1107 | m_rep->checkConsistency(); | |
1108 | ||
1109 | int thisOffset = m_rep->offset; | |
1110 | int length = size(); | |
1111 | BaseString* base = m_rep->baseString(); | |
1112 | ||
1113 | // possible cases: | |
1114 | if (length == 0) { | |
1115 | // this is empty - must make a new m_rep because we don't want to pollute the shared empty one | |
1116 | size_t newCapacity = expandedSize(1, 0); | |
1117 | UChar* d = allocChars(newCapacity); | |
1118 | if (!d) | |
1119 | makeNull(); | |
1120 | else { | |
1121 | d[0] = c; | |
1122 | m_rep = Rep::create(d, 1); | |
1123 | m_rep->baseString()->capacity = newCapacity; | |
1124 | } | |
1125 | } else if (m_rep == base && m_rep->rc == 1) { | |
1126 | // this is direct and has refcount of 1 (so we can just alter it directly) | |
1127 | expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true)); | |
1128 | UChar* d = m_rep->data(); | |
1129 | if (d) { | |
1130 | d[length] = c; | |
1131 | m_rep->len = length + 1; | |
1132 | m_rep->_hash = 0; | |
1133 | } | |
1134 | } else if (thisOffset + length == base->usedCapacity && length >= minShareSize) { | |
1135 | // this reaches the end of the string - extend it and share | |
1136 | expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true)); | |
1137 | UChar* d = m_rep->data(); | |
1138 | if (d) { | |
1139 | d[length] = c; | |
1140 | m_rep = Rep::create(m_rep, 0, length + 1); | |
1141 | } | |
1142 | } else { | |
1143 | // this is shared with someone using more capacity, gotta make a whole new string | |
1144 | size_t newCapacity = expandedSize(length + 1, 0); | |
1145 | UChar* d = allocChars(newCapacity); | |
1146 | if (!d) | |
1147 | makeNull(); | |
1148 | else { | |
1149 | copyChars(d, data(), length); | |
1150 | d[length] = c; | |
1151 | m_rep = Rep::create(d, length + 1); | |
1152 | m_rep->baseString()->capacity = newCapacity; | |
1153 | } | |
1154 | } | |
1155 | ||
1156 | m_rep->checkConsistency(); | |
1157 | ||
1158 | return *this; | |
1159 | } | |
1160 | ||
1161 | bool UString::getCString(CStringBuffer& buffer) const | |
1162 | { | |
1163 | int length = size(); | |
1164 | int neededSize = length + 1; | |
1165 | buffer.resize(neededSize); | |
1166 | char* buf = buffer.data(); | |
1167 | ||
1168 | UChar ored = 0; | |
1169 | const UChar* p = data(); | |
1170 | char* q = buf; | |
1171 | const UChar* limit = p + length; | |
1172 | while (p != limit) { | |
1173 | UChar c = p[0]; | |
1174 | ored |= c; | |
1175 | *q = static_cast<char>(c); | |
1176 | ++p; | |
1177 | ++q; | |
1178 | } | |
1179 | *q = '\0'; | |
1180 | ||
1181 | return !(ored & 0xFF00); | |
1182 | } | |
1183 | ||
1184 | char* UString::ascii() const | |
1185 | { | |
1186 | int length = size(); | |
1187 | int neededSize = length + 1; | |
1188 | delete[] statBuffer; | |
1189 | statBuffer = new char[neededSize]; | |
1190 | ||
1191 | const UChar* p = data(); | |
1192 | char* q = statBuffer; | |
1193 | const UChar* limit = p + length; | |
1194 | while (p != limit) { | |
1195 | *q = static_cast<char>(p[0]); | |
1196 | ++p; | |
1197 | ++q; | |
1198 | } | |
1199 | *q = '\0'; | |
1200 | ||
1201 | return statBuffer; | |
1202 | } | |
1203 | ||
1204 | UString& UString::operator=(const char* c) | |
1205 | { | |
1206 | if (!c) { | |
1207 | m_rep = &Rep::null(); | |
1208 | return *this; | |
1209 | } | |
1210 | ||
1211 | if (!c[0]) { | |
1212 | m_rep = &Rep::empty(); | |
1213 | return *this; | |
1214 | } | |
1215 | ||
1216 | int l = static_cast<int>(strlen(c)); | |
1217 | UChar* d; | |
1218 | BaseString* base = m_rep->baseString(); | |
1219 | if (m_rep->rc == 1 && l <= base->capacity && m_rep == base && m_rep->offset == 0 && base->preCapacity == 0) { | |
1220 | d = base->buf; | |
1221 | m_rep->_hash = 0; | |
1222 | m_rep->len = l; | |
1223 | } else { | |
1224 | d = allocChars(l); | |
1225 | if (!d) { | |
1226 | makeNull(); | |
1227 | return *this; | |
1228 | } | |
1229 | m_rep = Rep::create(d, l); | |
1230 | } | |
1231 | for (int i = 0; i < l; i++) | |
1232 | d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend | |
1233 | ||
1234 | return *this; | |
1235 | } | |
1236 | ||
1237 | bool UString::is8Bit() const | |
1238 | { | |
1239 | const UChar* u = data(); | |
1240 | const UChar* limit = u + size(); | |
1241 | while (u < limit) { | |
1242 | if (u[0] > 0xFF) | |
1243 | return false; | |
1244 | ++u; | |
1245 | } | |
1246 | ||
1247 | return true; | |
1248 | } | |
1249 | ||
1250 | UChar UString::operator[](int pos) const | |
1251 | { | |
1252 | if (pos >= size()) | |
1253 | return '\0'; | |
1254 | return data()[pos]; | |
1255 | } | |
1256 | ||
1257 | double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const | |
1258 | { | |
1259 | if (size() == 1) { | |
1260 | UChar c = data()[0]; | |
1261 | if (isASCIIDigit(c)) | |
1262 | return c - '0'; | |
1263 | if (isASCIISpace(c) && tolerateEmptyString) | |
1264 | return 0; | |
1265 | return NaN; | |
1266 | } | |
1267 | ||
1268 | // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk | |
1269 | // after the number, so this is too strict a check. | |
1270 | CStringBuffer s; | |
1271 | if (!getCString(s)) | |
1272 | return NaN; | |
1273 | const char* c = s.data(); | |
1274 | ||
1275 | // skip leading white space | |
1276 | while (isASCIISpace(*c)) | |
1277 | c++; | |
1278 | ||
1279 | // empty string ? | |
1280 | if (*c == '\0') | |
1281 | return tolerateEmptyString ? 0.0 : NaN; | |
1282 | ||
1283 | double d; | |
1284 | ||
1285 | // hex number ? | |
1286 | if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) { | |
1287 | const char* firstDigitPosition = c + 2; | |
1288 | c++; | |
1289 | d = 0.0; | |
1290 | while (*(++c)) { | |
1291 | if (*c >= '0' && *c <= '9') | |
1292 | d = d * 16.0 + *c - '0'; | |
1293 | else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f')) | |
1294 | d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0; | |
1295 | else | |
1296 | break; | |
1297 | } | |
1298 | ||
1299 | if (d >= mantissaOverflowLowerBound) | |
1300 | d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16); | |
1301 | } else { | |
1302 | // regular number ? | |
1303 | char* end; | |
1304 | d = WTF::strtod(c, &end); | |
1305 | if ((d != 0.0 || end != c) && d != Inf && d != -Inf) { | |
1306 | c = end; | |
1307 | } else { | |
1308 | double sign = 1.0; | |
1309 | ||
1310 | if (*c == '+') | |
1311 | c++; | |
1312 | else if (*c == '-') { | |
1313 | sign = -1.0; | |
1314 | c++; | |
1315 | } | |
1316 | ||
1317 | // We used strtod() to do the conversion. However, strtod() handles | |
1318 | // infinite values slightly differently than JavaScript in that it | |
1319 | // converts the string "inf" with any capitalization to infinity, | |
1320 | // whereas the ECMA spec requires that it be converted to NaN. | |
1321 | ||
1322 | if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') { | |
1323 | d = sign * Inf; | |
1324 | c += 8; | |
1325 | } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i') | |
1326 | c = end; | |
1327 | else | |
1328 | return NaN; | |
1329 | } | |
1330 | } | |
1331 | ||
1332 | // allow trailing white space | |
1333 | while (isASCIISpace(*c)) | |
1334 | c++; | |
1335 | // don't allow anything after - unless tolerant=true | |
1336 | if (!tolerateTrailingJunk && *c != '\0') | |
1337 | d = NaN; | |
1338 | ||
1339 | return d; | |
1340 | } | |
1341 | ||
1342 | double UString::toDouble(bool tolerateTrailingJunk) const | |
1343 | { | |
1344 | return toDouble(tolerateTrailingJunk, true); | |
1345 | } | |
1346 | ||
1347 | double UString::toDouble() const | |
1348 | { | |
1349 | return toDouble(false, true); | |
1350 | } | |
1351 | ||
1352 | uint32_t UString::toUInt32(bool* ok) const | |
1353 | { | |
1354 | double d = toDouble(); | |
1355 | bool b = true; | |
1356 | ||
1357 | if (d != static_cast<uint32_t>(d)) { | |
1358 | b = false; | |
1359 | d = 0; | |
1360 | } | |
1361 | ||
1362 | if (ok) | |
1363 | *ok = b; | |
1364 | ||
1365 | return static_cast<uint32_t>(d); | |
1366 | } | |
1367 | ||
1368 | uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const | |
1369 | { | |
1370 | double d = toDouble(false, tolerateEmptyString); | |
1371 | bool b = true; | |
1372 | ||
1373 | if (d != static_cast<uint32_t>(d)) { | |
1374 | b = false; | |
1375 | d = 0; | |
1376 | } | |
1377 | ||
1378 | if (ok) | |
1379 | *ok = b; | |
1380 | ||
1381 | return static_cast<uint32_t>(d); | |
1382 | } | |
1383 | ||
1384 | uint32_t UString::toStrictUInt32(bool* ok) const | |
1385 | { | |
1386 | if (ok) | |
1387 | *ok = false; | |
1388 | ||
1389 | // Empty string is not OK. | |
1390 | int len = m_rep->len; | |
1391 | if (len == 0) | |
1392 | return 0; | |
1393 | const UChar* p = m_rep->data(); | |
1394 | unsigned short c = p[0]; | |
1395 | ||
1396 | // If the first digit is 0, only 0 itself is OK. | |
1397 | if (c == '0') { | |
1398 | if (len == 1 && ok) | |
1399 | *ok = true; | |
1400 | return 0; | |
1401 | } | |
1402 | ||
1403 | // Convert to UInt32, checking for overflow. | |
1404 | uint32_t i = 0; | |
1405 | while (1) { | |
1406 | // Process character, turning it into a digit. | |
1407 | if (c < '0' || c > '9') | |
1408 | return 0; | |
1409 | const unsigned d = c - '0'; | |
1410 | ||
1411 | // Multiply by 10, checking for overflow out of 32 bits. | |
1412 | if (i > 0xFFFFFFFFU / 10) | |
1413 | return 0; | |
1414 | i *= 10; | |
1415 | ||
1416 | // Add in the digit, checking for overflow out of 32 bits. | |
1417 | const unsigned max = 0xFFFFFFFFU - d; | |
1418 | if (i > max) | |
1419 | return 0; | |
1420 | i += d; | |
1421 | ||
1422 | // Handle end of string. | |
1423 | if (--len == 0) { | |
1424 | if (ok) | |
1425 | *ok = true; | |
1426 | return i; | |
1427 | } | |
1428 | ||
1429 | // Get next character. | |
1430 | c = *(++p); | |
1431 | } | |
1432 | } | |
1433 | ||
1434 | int UString::find(const UString& f, int pos) const | |
1435 | { | |
1436 | int fsz = f.size(); | |
1437 | ||
1438 | if (pos < 0) | |
1439 | pos = 0; | |
1440 | ||
1441 | if (fsz == 1) { | |
1442 | UChar ch = f[0]; | |
1443 | const UChar* end = data() + size(); | |
1444 | for (const UChar* c = data() + pos; c < end; c++) { | |
1445 | if (*c == ch) | |
1446 | return static_cast<int>(c - data()); | |
1447 | } | |
1448 | return -1; | |
1449 | } | |
1450 | ||
1451 | int sz = size(); | |
1452 | if (sz < fsz) | |
1453 | return -1; | |
1454 | if (fsz == 0) | |
1455 | return pos; | |
1456 | const UChar* end = data() + sz - fsz; | |
1457 | int fsizeminusone = (fsz - 1) * sizeof(UChar); | |
1458 | const UChar* fdata = f.data(); | |
1459 | unsigned short fchar = fdata[0]; | |
1460 | ++fdata; | |
1461 | for (const UChar* c = data() + pos; c <= end; c++) { | |
1462 | if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone)) | |
1463 | return static_cast<int>(c - data()); | |
1464 | } | |
1465 | ||
1466 | return -1; | |
1467 | } | |
1468 | ||
1469 | int UString::find(UChar ch, int pos) const | |
1470 | { | |
1471 | if (pos < 0) | |
1472 | pos = 0; | |
1473 | const UChar* end = data() + size(); | |
1474 | for (const UChar* c = data() + pos; c < end; c++) { | |
1475 | if (*c == ch) | |
1476 | return static_cast<int>(c - data()); | |
1477 | } | |
1478 | ||
1479 | return -1; | |
1480 | } | |
1481 | ||
1482 | int UString::rfind(const UString& f, int pos) const | |
1483 | { | |
1484 | int sz = size(); | |
1485 | int fsz = f.size(); | |
1486 | if (sz < fsz) | |
1487 | return -1; | |
1488 | if (pos < 0) | |
1489 | pos = 0; | |
1490 | if (pos > sz - fsz) | |
1491 | pos = sz - fsz; | |
1492 | if (fsz == 0) | |
1493 | return pos; | |
1494 | int fsizeminusone = (fsz - 1) * sizeof(UChar); | |
1495 | const UChar* fdata = f.data(); | |
1496 | for (const UChar* c = data() + pos; c >= data(); c--) { | |
1497 | if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone)) | |
1498 | return static_cast<int>(c - data()); | |
1499 | } | |
1500 | ||
1501 | return -1; | |
1502 | } | |
1503 | ||
1504 | int UString::rfind(UChar ch, int pos) const | |
1505 | { | |
1506 | if (isEmpty()) | |
1507 | return -1; | |
1508 | if (pos + 1 >= size()) | |
1509 | pos = size() - 1; | |
1510 | for (const UChar* c = data() + pos; c >= data(); c--) { | |
1511 | if (*c == ch) | |
1512 | return static_cast<int>(c - data()); | |
1513 | } | |
1514 | ||
1515 | return -1; | |
1516 | } | |
1517 | ||
1518 | UString UString::substr(int pos, int len) const | |
1519 | { | |
1520 | int s = size(); | |
1521 | ||
1522 | if (pos < 0) | |
1523 | pos = 0; | |
1524 | else if (pos >= s) | |
1525 | pos = s; | |
1526 | if (len < 0) | |
1527 | len = s; | |
1528 | if (pos + len >= s) | |
1529 | len = s - pos; | |
1530 | ||
1531 | if (pos == 0 && len == s) | |
1532 | return *this; | |
1533 | ||
1534 | return UString(Rep::create(m_rep, pos, len)); | |
1535 | } | |
1536 | ||
1537 | bool operator==(const UString& s1, const UString& s2) | |
1538 | { | |
1539 | int size = s1.size(); | |
1540 | switch (size) { | |
1541 | case 0: | |
1542 | return !s2.size(); | |
1543 | case 1: | |
1544 | return s2.size() == 1 && s1.data()[0] == s2.data()[0]; | |
1545 | default: | |
1546 | return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0; | |
1547 | } | |
1548 | } | |
1549 | ||
1550 | bool operator==(const UString& s1, const char *s2) | |
1551 | { | |
1552 | if (s2 == 0) | |
1553 | return s1.isEmpty(); | |
1554 | ||
1555 | const UChar* u = s1.data(); | |
1556 | const UChar* uend = u + s1.size(); | |
1557 | while (u != uend && *s2) { | |
1558 | if (u[0] != (unsigned char)*s2) | |
1559 | return false; | |
1560 | s2++; | |
1561 | u++; | |
1562 | } | |
1563 | ||
1564 | return u == uend && *s2 == 0; | |
1565 | } | |
1566 | ||
1567 | bool operator<(const UString& s1, const UString& s2) | |
1568 | { | |
1569 | const int l1 = s1.size(); | |
1570 | const int l2 = s2.size(); | |
1571 | const int lmin = l1 < l2 ? l1 : l2; | |
1572 | const UChar* c1 = s1.data(); | |
1573 | const UChar* c2 = s2.data(); | |
1574 | int l = 0; | |
1575 | while (l < lmin && *c1 == *c2) { | |
1576 | c1++; | |
1577 | c2++; | |
1578 | l++; | |
1579 | } | |
1580 | if (l < lmin) | |
1581 | return (c1[0] < c2[0]); | |
1582 | ||
1583 | return (l1 < l2); | |
1584 | } | |
1585 | ||
1586 | bool operator>(const UString& s1, const UString& s2) | |
1587 | { | |
1588 | const int l1 = s1.size(); | |
1589 | const int l2 = s2.size(); | |
1590 | const int lmin = l1 < l2 ? l1 : l2; | |
1591 | const UChar* c1 = s1.data(); | |
1592 | const UChar* c2 = s2.data(); | |
1593 | int l = 0; | |
1594 | while (l < lmin && *c1 == *c2) { | |
1595 | c1++; | |
1596 | c2++; | |
1597 | l++; | |
1598 | } | |
1599 | if (l < lmin) | |
1600 | return (c1[0] > c2[0]); | |
1601 | ||
1602 | return (l1 > l2); | |
1603 | } | |
1604 | ||
1605 | int compare(const UString& s1, const UString& s2) | |
1606 | { | |
1607 | const int l1 = s1.size(); | |
1608 | const int l2 = s2.size(); | |
1609 | const int lmin = l1 < l2 ? l1 : l2; | |
1610 | const UChar* c1 = s1.data(); | |
1611 | const UChar* c2 = s2.data(); | |
1612 | int l = 0; | |
1613 | while (l < lmin && *c1 == *c2) { | |
1614 | c1++; | |
1615 | c2++; | |
1616 | l++; | |
1617 | } | |
1618 | ||
1619 | if (l < lmin) | |
1620 | return (c1[0] > c2[0]) ? 1 : -1; | |
1621 | ||
1622 | if (l1 == l2) | |
1623 | return 0; | |
1624 | ||
1625 | return (l1 > l2) ? 1 : -1; | |
1626 | } | |
1627 | ||
1628 | bool equal(const UString::Rep* r, const UString::Rep* b) | |
1629 | { | |
1630 | int length = r->len; | |
1631 | if (length != b->len) | |
1632 | return false; | |
1633 | const UChar* d = r->data(); | |
1634 | const UChar* s = b->data(); | |
1635 | for (int i = 0; i != length; ++i) { | |
1636 | if (d[i] != s[i]) | |
1637 | return false; | |
1638 | } | |
1639 | return true; | |
1640 | } | |
1641 | ||
1642 | CString UString::UTF8String(bool strict) const | |
1643 | { | |
1644 | // Allocate a buffer big enough to hold all the characters. | |
1645 | const int length = size(); | |
1646 | Vector<char, 1024> buffer(length * 3); | |
1647 | ||
1648 | // Convert to runs of 8-bit characters. | |
1649 | char* p = buffer.data(); | |
1650 | const UChar* d = reinterpret_cast<const UChar*>(&data()[0]); | |
1651 | ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict); | |
1652 | if (result != conversionOK) | |
1653 | return CString(); | |
1654 | ||
1655 | return CString(buffer.data(), p - buffer.data()); | |
1656 | } | |
1657 | ||
1658 | // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X. | |
1659 | NEVER_INLINE void UString::makeNull() | |
1660 | { | |
1661 | m_rep = &Rep::null(); | |
1662 | } | |
1663 | ||
1664 | // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X. | |
1665 | NEVER_INLINE UString::Rep* UString::nullRep() | |
1666 | { | |
1667 | return &Rep::null(); | |
1668 | } | |
1669 | ||
1670 | } // namespace JSC |