2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
26 #include "Collector.h"
29 #include <wtf/Assertions.h>
30 #include <wtf/CrossThreadRefCounted.h>
31 #include <wtf/OwnFastMallocPtr.h>
32 #include <wtf/PassRefPtr.h>
33 #include <wtf/PtrAndFlags.h>
34 #include <wtf/RefPtr.h>
35 #include <wtf/Vector.h>
36 #include <wtf/unicode/Unicode.h>
40 using WTF::PlacementNewAdoptType
;
41 using WTF::PlacementNewAdopt
;
43 class IdentifierTable
;
54 CString(const char*, size_t);
55 CString(const CString
&);
59 static CString
adopt(char*, size_t); // buffer should be allocated with new[].
61 CString
& append(const CString
&);
62 CString
& operator=(const char* c
);
63 CString
& operator=(const CString
&);
64 CString
& operator+=(const CString
& c
) { return append(c
); }
66 size_t size() const { return m_length
; }
67 const char* c_str() const { return m_data
; }
74 typedef Vector
<char, 32> CStringBuffer
;
80 typedef CrossThreadRefCounted
<OwnFastMallocPtr
<UChar
> > SharedUChar
;
82 struct Rep
: Noncopyable
{
85 static PassRefPtr
<Rep
> create(UChar
* buffer
, int length
)
87 return adoptRef(new BaseString(buffer
, length
));
90 static PassRefPtr
<Rep
> createEmptyBuffer(size_t size
)
92 // Guard against integer overflow
93 if (size
< (std::numeric_limits
<size_t>::max() / sizeof(UChar
))) {
94 if (void * buf
= tryFastMalloc(size
* sizeof(UChar
)))
95 return adoptRef(new BaseString(static_cast<UChar
*>(buf
), 0, size
));
97 return adoptRef(new BaseString(0, 0, 0));
100 static PassRefPtr
<Rep
> createCopying(const UChar
*, int);
101 static PassRefPtr
<Rep
> create(PassRefPtr
<Rep
> base
, int offset
, int length
);
103 // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h).
104 // Returns UString::Rep::null for null input or conversion failure.
105 static PassRefPtr
<Rep
> createFromUTF8(const char*);
107 // Uses SharedUChar to have joint ownership over the UChar*.
108 static PassRefPtr
<Rep
> create(UChar
*, int, PassRefPtr
<SharedUChar
>);
110 SharedUChar
* sharedBuffer();
113 bool baseIsSelf() const { return m_identifierTableAndFlags
.isFlagSet(BaseStringFlag
); }
115 int size() const { return len
; }
117 unsigned hash() const { if (_hash
== 0) _hash
= computeHash(data(), len
); return _hash
; }
118 unsigned computedHash() const { ASSERT(_hash
); return _hash
; } // fast path for Identifiers
120 static unsigned computeHash(const UChar
*, int length
);
121 static unsigned computeHash(const char*, int length
);
122 static unsigned computeHash(const char* s
) { return computeHash(s
, strlen(s
)); }
124 IdentifierTable
* identifierTable() const { return m_identifierTableAndFlags
.get(); }
125 void setIdentifierTable(IdentifierTable
* table
) { ASSERT(!isStatic()); m_identifierTableAndFlags
.set(table
); }
127 bool isStatic() const { return m_identifierTableAndFlags
.isFlagSet(StaticFlag
); }
128 void setStatic(bool);
129 void setBaseString(PassRefPtr
<BaseString
>);
130 BaseString
* baseString();
131 const BaseString
* baseString() const;
133 Rep
* ref() { ++rc
; return this; }
134 ALWAYS_INLINE
void deref() { if (--rc
== 0) destroy(); }
136 void checkConsistency() const;
145 int rc
; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted.
146 mutable unsigned _hash
;
147 PtrAndFlags
<IdentifierTable
, UStringFlags
> m_identifierTableAndFlags
;
149 static BaseString
& null() { return *nullBaseString
; }
150 static BaseString
& empty() { return *emptyBaseString
; }
152 bool reserveCapacity(int capacity
);
155 // Constructor for use by BaseString subclass; they use the union with m_baseString for another purpose.
165 Rep(PassRefPtr
<BaseString
> base
, int offsetInBase
, int length
)
166 : offset(offsetInBase
)
170 , m_baseString(base
.releaseRef())
177 BaseString
* m_baseString
;
179 SharedUChar
* m_sharedBuffer
;
183 // For SmallStringStorage which allocates an array and does initialization manually.
186 friend class SmallStringsStorage
;
187 friend void initializeUString();
188 JS_EXPORTDATA
static BaseString
* nullBaseString
;
189 JS_EXPORTDATA
static BaseString
* emptyBaseString
;
193 struct BaseString
: public Rep
{
194 bool isShared() { return rc
!= 1 || isBufferReadOnly(); }
195 void setSharedBuffer(PassRefPtr
<SharedUChar
>);
197 bool isBufferReadOnly()
201 return slowIsBufferReadOnly();
204 // potentially shared data.
214 BaseString(UChar
* buffer
, int length
, int additionalCapacity
= 0)
219 , capacity(length
+ additionalCapacity
)
220 , usedCapacity(length
)
223 m_identifierTableAndFlags
.setFlag(BaseStringFlag
);
227 SharedUChar
* sharedBuffer();
228 bool slowIsBufferReadOnly();
231 friend class SmallStringsStorage
;
232 friend void initializeUString();
237 UString(const char*);
238 UString(const UChar
*, int length
);
239 UString(UChar
*, int length
, bool copy
);
241 UString(const UString
& s
)
246 UString(const Vector
<UChar
>& buffer
);
252 // Special constructor for cases where we overwrite an object in place.
253 UString(PlacementNewAdoptType
)
254 : m_rep(PlacementNewAdopt
)
258 static UString
from(int);
259 static UString
from(unsigned int);
260 static UString
from(long);
261 static UString
from(double);
265 Range(int pos
, int len
)
279 UString
spliceSubstringsWithSeparators(const Range
* substringRanges
, int rangeCount
, const UString
* separators
, int separatorCount
) const;
281 UString
replaceRange(int rangeStart
, int RangeEnd
, const UString
& replacement
) const;
283 UString
& append(const UString
&);
284 UString
& append(const char*);
285 UString
& append(UChar
);
286 UString
& append(char c
) { return append(static_cast<UChar
>(static_cast<unsigned char>(c
))); }
287 UString
& append(const UChar
*, int size
);
288 UString
& appendNumeric(int);
289 UString
& appendNumeric(double);
291 bool getCString(CStringBuffer
&) const;
293 // NOTE: This method should only be used for *debugging* purposes as it
294 // is neither Unicode safe nor free from side effects nor thread-safe.
298 * Convert the string to UTF-8, assuming it is UTF-16 encoded.
299 * In non-strict mode, this function is tolerant of badly formed UTF-16, it
300 * can create UTF-8 strings that are invalid because they have characters in
301 * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is
302 * guaranteed to be otherwise valid.
303 * In strict mode, error is returned as null CString.
305 CString
UTF8String(bool strict
= false) const;
307 UString
& operator=(const char*c
);
309 UString
& operator+=(const UString
& s
) { return append(s
); }
310 UString
& operator+=(const char* s
) { return append(s
); }
312 const UChar
* data() const { return m_rep
->data(); }
314 bool isNull() const { return (m_rep
== &Rep::null()); }
315 bool isEmpty() const { return (!m_rep
->len
); }
319 int size() const { return m_rep
->size(); }
321 UChar
operator[](int pos
) const;
323 double toDouble(bool tolerateTrailingJunk
, bool tolerateEmptyString
) const;
324 double toDouble(bool tolerateTrailingJunk
) const;
325 double toDouble() const;
327 uint32_t toUInt32(bool* ok
= 0) const;
328 uint32_t toUInt32(bool* ok
, bool tolerateEmptyString
) const;
329 uint32_t toStrictUInt32(bool* ok
= 0) const;
331 unsigned toArrayIndex(bool* ok
= 0) const;
333 int find(const UString
& f
, int pos
= 0) const;
334 int find(UChar
, int pos
= 0) const;
335 int rfind(const UString
& f
, int pos
) const;
336 int rfind(UChar
, int pos
) const;
338 UString
substr(int pos
= 0, int len
= -1) const;
340 static const UString
& null() { return *nullUString
; }
342 Rep
* rep() const { return m_rep
.get(); }
343 static Rep
* nullRep();
345 UString(PassRefPtr
<Rep
> r
)
353 // Attempt to grow this string such that it can grow to a total length of 'capacity'
354 // without reallocation. This may fail a number of reasons - if the BasicString is
355 // shared and another string is using part of the capacity beyond our end point, if
356 // the realloc fails, or if this string is empty and has no storage.
358 // This method returns a boolean indicating success.
359 bool reserveCapacity(int capacity
)
361 return m_rep
->reserveCapacity(capacity
);
365 void expandCapacity(int requiredLength
);
366 void expandPreCapacity(int requiredPreCap
);
370 static UString
* nullUString
;
372 friend void initializeUString();
373 friend bool operator==(const UString
&, const UString
&);
374 friend PassRefPtr
<Rep
> concatenate(Rep
*, Rep
*); // returns 0 if out of memory
376 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
*, UString::Rep
*);
377 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
*, int);
378 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
*, double);
380 inline bool operator==(const UString
& s1
, const UString
& s2
)
382 int size
= s1
.size();
387 return s2
.size() == 1 && s1
.data()[0] == s2
.data()[0];
391 const UChar
* d1
= s1
.data();
392 const UChar
* d2
= s2
.data();
393 return (d1
[0] == d2
[0]) & (d1
[1] == d2
[1]);
396 return s2
.size() == size
&& memcmp(s1
.data(), s2
.data(), size
* sizeof(UChar
)) == 0;
401 inline bool operator!=(const UString
& s1
, const UString
& s2
)
403 return !JSC::operator==(s1
, s2
);
406 bool operator<(const UString
& s1
, const UString
& s2
);
407 bool operator>(const UString
& s1
, const UString
& s2
);
409 bool operator==(const UString
& s1
, const char* s2
);
411 inline bool operator!=(const UString
& s1
, const char* s2
)
413 return !JSC::operator==(s1
, s2
);
416 inline bool operator==(const char *s1
, const UString
& s2
)
418 return operator==(s2
, s1
);
421 inline bool operator!=(const char *s1
, const UString
& s2
)
423 return !JSC::operator==(s1
, s2
);
426 bool operator==(const CString
&, const CString
&);
428 inline UString
operator+(const UString
& s1
, const UString
& s2
)
430 RefPtr
<UString::Rep
> result
= concatenate(s1
.rep(), s2
.rep());
431 return UString(result
? result
.release() : UString::nullRep());
434 int compare(const UString
&, const UString
&);
436 bool equal(const UString::Rep
*, const UString::Rep
*);
438 inline PassRefPtr
<UString::Rep
> UString::Rep::create(PassRefPtr
<UString::Rep
> rep
, int offset
, int length
)
441 rep
->checkConsistency();
443 int repOffset
= rep
->offset
;
445 PassRefPtr
<BaseString
> base
= rep
->baseString();
447 ASSERT(-(offset
+ repOffset
) <= base
->usedPreCapacity
);
448 ASSERT(offset
+ repOffset
+ length
<= base
->usedCapacity
);
450 // Steal the single reference this Rep was created with.
451 return adoptRef(new Rep(base
, repOffset
+ offset
, length
));
454 inline UChar
* UString::Rep::data() const
456 const BaseString
* base
= baseString();
457 return base
->buf
+ base
->preCapacity
+ offset
;
460 inline void UString::Rep::setStatic(bool v
)
462 ASSERT(!identifierTable());
464 m_identifierTableAndFlags
.setFlag(StaticFlag
);
466 m_identifierTableAndFlags
.clearFlag(StaticFlag
);
469 inline void UString::Rep::setBaseString(PassRefPtr
<BaseString
> base
)
471 ASSERT(base
!= this);
472 ASSERT(!baseIsSelf());
473 m_baseString
= base
.releaseRef();
476 inline UString::BaseString
* UString::Rep::baseString()
478 return !baseIsSelf() ? m_baseString
: reinterpret_cast<BaseString
*>(this) ;
481 inline const UString::BaseString
* UString::Rep::baseString() const
483 return const_cast<Rep
*>(this)->baseString();
487 inline void UString::Rep::checkConsistency() const
492 inline UString::UString()
493 : m_rep(&Rep::null())
497 // Rule from ECMA 15.2 about what an array index is.
498 // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
499 inline unsigned UString::toArrayIndex(bool* ok
) const
501 unsigned i
= toStrictUInt32(ok
);
502 if (ok
&& i
>= 0xFFFFFFFFU
)
507 // We'd rather not do shared substring append for small strings, since
508 // this runs too much risk of a tiny initial string holding down a
510 // FIXME: this should be size_t but that would cause warnings until we
511 // fix UString sizes to be size_t instead of int
512 static const int minShareSize
= Heap::minExtraCostSize
/ sizeof(UChar
);
514 inline size_t UString::cost() const
516 BaseString
* base
= m_rep
->baseString();
517 size_t capacity
= (base
->capacity
+ base
->preCapacity
) * sizeof(UChar
);
518 size_t reportedCost
= base
->reportedCost
;
519 ASSERT(capacity
>= reportedCost
);
521 size_t capacityDelta
= capacity
- reportedCost
;
523 if (capacityDelta
< static_cast<size_t>(minShareSize
))
526 base
->reportedCost
= capacity
;
528 return capacityDelta
;
531 struct IdentifierRepHash
: PtrHash
<RefPtr
<JSC::UString::Rep
> > {
532 static unsigned hash(const RefPtr
<JSC::UString::Rep
>& key
) { return key
->computedHash(); }
533 static unsigned hash(JSC::UString::Rep
* key
) { return key
->computedHash(); }
536 void initializeUString();
541 template<typename T
> struct DefaultHash
;
542 template<typename T
> struct StrHash
;
544 template<> struct StrHash
<JSC::UString::Rep
*> {
545 static unsigned hash(const JSC::UString::Rep
* key
) { return key
->hash(); }
546 static bool equal(const JSC::UString::Rep
* a
, const JSC::UString::Rep
* b
) { return JSC::equal(a
, b
); }
547 static const bool safeToCompareToEmptyOrDeleted
= false;
550 template<> struct StrHash
<RefPtr
<JSC::UString::Rep
> > : public StrHash
<JSC::UString::Rep
*> {
551 using StrHash
<JSC::UString::Rep
*>::hash
;
552 static unsigned hash(const RefPtr
<JSC::UString::Rep
>& key
) { return key
->hash(); }
553 using StrHash
<JSC::UString::Rep
*>::equal
;
554 static bool equal(const RefPtr
<JSC::UString::Rep
>& a
, const RefPtr
<JSC::UString::Rep
>& b
) { return JSC::equal(a
.get(), b
.get()); }
555 static bool equal(const JSC::UString::Rep
* a
, const RefPtr
<JSC::UString::Rep
>& b
) { return JSC::equal(a
, b
.get()); }
556 static bool equal(const RefPtr
<JSC::UString::Rep
>& a
, const JSC::UString::Rep
* b
) { return JSC::equal(a
.get(), b
); }
558 static const bool safeToCompareToEmptyOrDeleted
= false;
561 template<> struct DefaultHash
<JSC::UString::Rep
*> {
562 typedef StrHash
<JSC::UString::Rep
*> Hash
;
565 template<> struct DefaultHash
<RefPtr
<JSC::UString::Rep
> > {
566 typedef StrHash
<RefPtr
<JSC::UString::Rep
> > Hash
;