2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (c) 2009, Google Inc. All rights reserved.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
26 #include "Collector.h"
29 #include <wtf/Assertions.h>
30 #include <wtf/PassRefPtr.h>
31 #include <wtf/PtrAndFlags.h>
32 #include <wtf/RefPtr.h>
33 #include <wtf/Vector.h>
34 #include <wtf/unicode/Unicode.h>
38 using WTF::PlacementNewAdoptType
;
39 using WTF::PlacementNewAdopt
;
41 class IdentifierTable
;
52 CString(const char*, size_t);
53 CString(const CString
&);
57 static CString
adopt(char*, size_t); // buffer should be allocated with new[].
59 CString
& append(const CString
&);
60 CString
& operator=(const char* c
);
61 CString
& operator=(const CString
&);
62 CString
& operator+=(const CString
& c
) { return append(c
); }
64 size_t size() const { return m_length
; }
65 const char* c_str() const { return m_data
; }
72 typedef Vector
<char, 32> CStringBuffer
;
79 struct Rep
: Noncopyable
{
82 static PassRefPtr
<Rep
> create(UChar
*, int);
83 static PassRefPtr
<Rep
> createCopying(const UChar
*, int);
84 static PassRefPtr
<Rep
> create(PassRefPtr
<Rep
> base
, int offset
, int length
);
86 // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h).
87 // Returns UString::Rep::null for null input or conversion failure.
88 static PassRefPtr
<Rep
> createFromUTF8(const char*);
92 bool baseIsSelf() const { return m_identifierTableAndFlags
.isFlagSet(BaseStringFlag
); }
94 int size() const { return len
; }
96 unsigned hash() const { if (_hash
== 0) _hash
= computeHash(data(), len
); return _hash
; }
97 unsigned computedHash() const { ASSERT(_hash
); return _hash
; } // fast path for Identifiers
99 static unsigned computeHash(const UChar
*, int length
);
100 static unsigned computeHash(const char*, int length
);
101 static unsigned computeHash(const char* s
) { return computeHash(s
, strlen(s
)); }
103 IdentifierTable
* identifierTable() const { return m_identifierTableAndFlags
.get(); }
104 void setIdentifierTable(IdentifierTable
* table
) { ASSERT(!isStatic()); m_identifierTableAndFlags
.set(table
); }
106 bool isStatic() const { return m_identifierTableAndFlags
.isFlagSet(StaticFlag
); }
107 void setStatic(bool);
108 void setBaseString(PassRefPtr
<BaseString
>);
109 BaseString
* baseString();
110 const BaseString
* baseString() const;
112 Rep
* ref() { ++rc
; return this; }
113 ALWAYS_INLINE
void deref() { if (--rc
== 0) destroy(); }
115 void checkConsistency() const;
124 int rc
; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted.
125 mutable unsigned _hash
;
126 PtrAndFlags
<IdentifierTable
, UStringFlags
> m_identifierTableAndFlags
;
127 void* m_baseString
; // If "this" is a BaseString instance, it is 0. BaseString* otherwise.
129 static BaseString
& null() { return *nullBaseString
; }
130 static BaseString
& empty() { return *emptyBaseString
; }
133 friend void initializeUString();
134 static BaseString
* nullBaseString
;
135 static BaseString
* emptyBaseString
;
138 struct BaseString
: public Rep
{
141 m_identifierTableAndFlags
.setFlag(BaseStringFlag
);
144 // potentially shared data.
156 UString(const char*);
157 UString(const UChar
*, int length
);
158 UString(UChar
*, int length
, bool copy
);
160 UString(const UString
& s
)
165 UString(const Vector
<UChar
>& buffer
);
171 // Special constructor for cases where we overwrite an object in place.
172 UString(PlacementNewAdoptType
)
173 : m_rep(PlacementNewAdopt
)
177 static UString
from(int);
178 static UString
from(unsigned int);
179 static UString
from(long);
180 static UString
from(double);
184 Range(int pos
, int len
)
198 UString
spliceSubstringsWithSeparators(const Range
* substringRanges
, int rangeCount
, const UString
* separators
, int separatorCount
) const;
200 UString
& append(const UString
&);
201 UString
& append(const char*);
202 UString
& append(UChar
);
203 UString
& append(char c
) { return append(static_cast<UChar
>(static_cast<unsigned char>(c
))); }
204 UString
& append(const UChar
*, int size
);
206 bool getCString(CStringBuffer
&) const;
208 // NOTE: This method should only be used for *debugging* purposes as it
209 // is neither Unicode safe nor free from side effects nor thread-safe.
213 * Convert the string to UTF-8, assuming it is UTF-16 encoded.
214 * In non-strict mode, this function is tolerant of badly formed UTF-16, it
215 * can create UTF-8 strings that are invalid because they have characters in
216 * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is
217 * guaranteed to be otherwise valid.
218 * In strict mode, error is returned as null CString.
220 CString
UTF8String(bool strict
= false) const;
222 UString
& operator=(const char*c
);
224 UString
& operator+=(const UString
& s
) { return append(s
); }
225 UString
& operator+=(const char* s
) { return append(s
); }
227 const UChar
* data() const { return m_rep
->data(); }
229 bool isNull() const { return (m_rep
== &Rep::null()); }
230 bool isEmpty() const { return (!m_rep
->len
); }
234 int size() const { return m_rep
->size(); }
236 UChar
operator[](int pos
) const;
238 double toDouble(bool tolerateTrailingJunk
, bool tolerateEmptyString
) const;
239 double toDouble(bool tolerateTrailingJunk
) const;
240 double toDouble() const;
242 uint32_t toUInt32(bool* ok
= 0) const;
243 uint32_t toUInt32(bool* ok
, bool tolerateEmptyString
) const;
244 uint32_t toStrictUInt32(bool* ok
= 0) const;
246 unsigned toArrayIndex(bool* ok
= 0) const;
248 int find(const UString
& f
, int pos
= 0) const;
249 int find(UChar
, int pos
= 0) const;
250 int rfind(const UString
& f
, int pos
) const;
251 int rfind(UChar
, int pos
) const;
253 UString
substr(int pos
= 0, int len
= -1) const;
255 static const UString
& null() { return *nullUString
; }
257 Rep
* rep() const { return m_rep
.get(); }
258 static Rep
* nullRep();
260 UString(PassRefPtr
<Rep
> r
)
269 void expandCapacity(int requiredLength
);
270 void expandPreCapacity(int requiredPreCap
);
274 static UString
* nullUString
;
276 friend void initializeUString();
277 friend bool operator==(const UString
&, const UString
&);
278 friend PassRefPtr
<Rep
> concatenate(Rep
*, Rep
*); // returns 0 if out of memory
280 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
*, UString::Rep
*);
281 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
*, int);
282 PassRefPtr
<UString::Rep
> concatenate(UString::Rep
*, double);
284 bool operator==(const UString
&, const UString
&);
286 inline bool operator!=(const UString
& s1
, const UString
& s2
)
288 return !JSC::operator==(s1
, s2
);
291 bool operator<(const UString
& s1
, const UString
& s2
);
292 bool operator>(const UString
& s1
, const UString
& s2
);
294 bool operator==(const UString
& s1
, const char* s2
);
296 inline bool operator!=(const UString
& s1
, const char* s2
)
298 return !JSC::operator==(s1
, s2
);
301 inline bool operator==(const char *s1
, const UString
& s2
)
303 return operator==(s2
, s1
);
306 inline bool operator!=(const char *s1
, const UString
& s2
)
308 return !JSC::operator==(s1
, s2
);
311 bool operator==(const CString
&, const CString
&);
313 inline UString
operator+(const UString
& s1
, const UString
& s2
)
315 RefPtr
<UString::Rep
> result
= concatenate(s1
.rep(), s2
.rep());
316 return UString(result
? result
.release() : UString::nullRep());
319 int compare(const UString
&, const UString
&);
321 bool equal(const UString::Rep
*, const UString::Rep
*);
323 inline UChar
* UString::Rep::data() const
325 const BaseString
* base
= baseString();
326 return base
->buf
+ base
->preCapacity
+ offset
;
329 inline void UString::Rep::setStatic(bool v
)
331 ASSERT(!identifierTable());
333 m_identifierTableAndFlags
.setFlag(StaticFlag
);
335 m_identifierTableAndFlags
.clearFlag(StaticFlag
);
338 inline void UString::Rep::setBaseString(PassRefPtr
<BaseString
> base
)
340 ASSERT(base
!= this);
341 m_baseString
= base
.releaseRef();
344 inline UString::BaseString
* UString::Rep::baseString()
346 return reinterpret_cast<BaseString
*>(baseIsSelf() ? this : m_baseString
);
349 inline const UString::BaseString
* UString::Rep::baseString() const
351 return const_cast<const BaseString
*>(const_cast<Rep
*>(this)->baseString());
355 inline void UString::Rep::checkConsistency() const
360 inline UString::UString()
361 : m_rep(&Rep::null())
365 // Rule from ECMA 15.2 about what an array index is.
366 // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
367 inline unsigned UString::toArrayIndex(bool* ok
) const
369 unsigned i
= toStrictUInt32(ok
);
370 if (ok
&& i
>= 0xFFFFFFFFU
)
375 // We'd rather not do shared substring append for small strings, since
376 // this runs too much risk of a tiny initial string holding down a
378 // FIXME: this should be size_t but that would cause warnings until we
379 // fix UString sizes to be size_t instead of int
380 static const int minShareSize
= Heap::minExtraCostSize
/ sizeof(UChar
);
382 inline size_t UString::cost() const
384 BaseString
* base
= m_rep
->baseString();
385 size_t capacity
= (base
->capacity
+ base
->preCapacity
) * sizeof(UChar
);
386 size_t reportedCost
= base
->reportedCost
;
387 ASSERT(capacity
>= reportedCost
);
389 size_t capacityDelta
= capacity
- reportedCost
;
391 if (capacityDelta
< static_cast<size_t>(minShareSize
))
394 base
->reportedCost
= capacity
;
396 return capacityDelta
;
399 struct IdentifierRepHash
: PtrHash
<RefPtr
<JSC::UString::Rep
> > {
400 static unsigned hash(const RefPtr
<JSC::UString::Rep
>& key
) { return key
->computedHash(); }
401 static unsigned hash(JSC::UString::Rep
* key
) { return key
->computedHash(); }
404 void initializeUString();
409 template<typename T
> struct DefaultHash
;
410 template<typename T
> struct StrHash
;
412 template<> struct StrHash
<JSC::UString::Rep
*> {
413 static unsigned hash(const JSC::UString::Rep
* key
) { return key
->hash(); }
414 static bool equal(const JSC::UString::Rep
* a
, const JSC::UString::Rep
* b
) { return JSC::equal(a
, b
); }
415 static const bool safeToCompareToEmptyOrDeleted
= false;
418 template<> struct StrHash
<RefPtr
<JSC::UString::Rep
> > : public StrHash
<JSC::UString::Rep
*> {
419 using StrHash
<JSC::UString::Rep
*>::hash
;
420 static unsigned hash(const RefPtr
<JSC::UString::Rep
>& key
) { return key
->hash(); }
421 using StrHash
<JSC::UString::Rep
*>::equal
;
422 static bool equal(const RefPtr
<JSC::UString::Rep
>& a
, const RefPtr
<JSC::UString::Rep
>& b
) { return JSC::equal(a
.get(), b
.get()); }
423 static bool equal(const JSC::UString::Rep
* a
, const RefPtr
<JSC::UString::Rep
>& b
) { return JSC::equal(a
, b
.get()); }
424 static bool equal(const RefPtr
<JSC::UString::Rep
>& a
, const JSC::UString::Rep
* b
) { return JSC::equal(a
.get(), b
); }
426 static const bool safeToCompareToEmptyOrDeleted
= false;
429 template<> struct DefaultHash
<JSC::UString::Rep
*> {
430 typedef StrHash
<JSC::UString::Rep
*> Hash
;
433 template<> struct DefaultHash
<RefPtr
<JSC::UString::Rep
> > {
434 typedef StrHash
<RefPtr
<JSC::UString::Rep
> > Hash
;