]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/UString.h
59a7665059ffb34d0e3f0219f6b20b33ffe7cf5e
[apple/javascriptcore.git] / runtime / UString.h
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (c) 2009, Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #ifndef UString_h
24 #define UString_h
25
26 #include "Collector.h"
27 #include <stdint.h>
28 #include <string.h>
29 #include <wtf/Assertions.h>
30 #include <wtf/PassRefPtr.h>
31 #include <wtf/PtrAndFlags.h>
32 #include <wtf/RefPtr.h>
33 #include <wtf/Vector.h>
34 #include <wtf/unicode/Unicode.h>
35
36 namespace JSC {
37
38 using WTF::PlacementNewAdoptType;
39 using WTF::PlacementNewAdopt;
40
41 class IdentifierTable;
42
43 class CString {
44 public:
45 CString()
46 : m_length(0)
47 , m_data(0)
48 {
49 }
50
51 CString(const char*);
52 CString(const char*, size_t);
53 CString(const CString&);
54
55 ~CString();
56
57 static CString adopt(char*, size_t); // buffer should be allocated with new[].
58
59 CString& append(const CString&);
60 CString& operator=(const char* c);
61 CString& operator=(const CString&);
62 CString& operator+=(const CString& c) { return append(c); }
63
64 size_t size() const { return m_length; }
65 const char* c_str() const { return m_data; }
66
67 private:
68 size_t m_length;
69 char* m_data;
70 };
71
72 typedef Vector<char, 32> CStringBuffer;
73
74 class UString {
75 friend class JIT;
76
77 public:
78 struct BaseString;
79 struct Rep : Noncopyable {
80 friend class JIT;
81
82 static PassRefPtr<Rep> create(UChar*, int);
83 static PassRefPtr<Rep> createCopying(const UChar*, int);
84 static PassRefPtr<Rep> create(PassRefPtr<Rep> base, int offset, int length);
85
86 // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h).
87 // Returns UString::Rep::null for null input or conversion failure.
88 static PassRefPtr<Rep> createFromUTF8(const char*);
89
90 void destroy();
91
92 bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); }
93 UChar* data() const;
94 int size() const { return len; }
95
96 unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; }
97 unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers
98
99 static unsigned computeHash(const UChar*, int length);
100 static unsigned computeHash(const char*, int length);
101 static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); }
102
103 IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); }
104 void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); }
105
106 bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); }
107 void setStatic(bool);
108 void setBaseString(PassRefPtr<BaseString>);
109 BaseString* baseString();
110 const BaseString* baseString() const;
111
112 Rep* ref() { ++rc; return this; }
113 ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); }
114
115 void checkConsistency() const;
116 enum UStringFlags {
117 StaticFlag,
118 BaseStringFlag
119 };
120
121 // unshared data
122 int offset;
123 int len;
124 int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted.
125 mutable unsigned _hash;
126 PtrAndFlags<IdentifierTable, UStringFlags> m_identifierTableAndFlags;
127 void* m_baseString; // If "this" is a BaseString instance, it is 0. BaseString* otherwise.
128
129 static BaseString& null() { return *nullBaseString; }
130 static BaseString& empty() { return *emptyBaseString; }
131
132 private:
133 friend void initializeUString();
134 static BaseString* nullBaseString;
135 static BaseString* emptyBaseString;
136 };
137
138 struct BaseString : public Rep {
139 BaseString()
140 {
141 m_identifierTableAndFlags.setFlag(BaseStringFlag);
142 }
143
144 // potentially shared data.
145 UChar* buf;
146 int preCapacity;
147 int usedPreCapacity;
148 int capacity;
149 int usedCapacity;
150
151 size_t reportedCost;
152 };
153
154 public:
155 UString();
156 UString(const char*);
157 UString(const UChar*, int length);
158 UString(UChar*, int length, bool copy);
159
160 UString(const UString& s)
161 : m_rep(s.m_rep)
162 {
163 }
164
165 UString(const Vector<UChar>& buffer);
166
167 ~UString()
168 {
169 }
170
171 // Special constructor for cases where we overwrite an object in place.
172 UString(PlacementNewAdoptType)
173 : m_rep(PlacementNewAdopt)
174 {
175 }
176
177 static UString from(int);
178 static UString from(unsigned int);
179 static UString from(long);
180 static UString from(double);
181
182 struct Range {
183 public:
184 Range(int pos, int len)
185 : position(pos)
186 , length(len)
187 {
188 }
189
190 Range()
191 {
192 }
193
194 int position;
195 int length;
196 };
197
198 UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const;
199
200 UString& append(const UString&);
201 UString& append(const char*);
202 UString& append(UChar);
203 UString& append(char c) { return append(static_cast<UChar>(static_cast<unsigned char>(c))); }
204 UString& append(const UChar*, int size);
205
206 bool getCString(CStringBuffer&) const;
207
208 // NOTE: This method should only be used for *debugging* purposes as it
209 // is neither Unicode safe nor free from side effects nor thread-safe.
210 char* ascii() const;
211
212 /**
213 * Convert the string to UTF-8, assuming it is UTF-16 encoded.
214 * In non-strict mode, this function is tolerant of badly formed UTF-16, it
215 * can create UTF-8 strings that are invalid because they have characters in
216 * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is
217 * guaranteed to be otherwise valid.
218 * In strict mode, error is returned as null CString.
219 */
220 CString UTF8String(bool strict = false) const;
221
222 UString& operator=(const char*c);
223
224 UString& operator+=(const UString& s) { return append(s); }
225 UString& operator+=(const char* s) { return append(s); }
226
227 const UChar* data() const { return m_rep->data(); }
228
229 bool isNull() const { return (m_rep == &Rep::null()); }
230 bool isEmpty() const { return (!m_rep->len); }
231
232 bool is8Bit() const;
233
234 int size() const { return m_rep->size(); }
235
236 UChar operator[](int pos) const;
237
238 double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
239 double toDouble(bool tolerateTrailingJunk) const;
240 double toDouble() const;
241
242 uint32_t toUInt32(bool* ok = 0) const;
243 uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const;
244 uint32_t toStrictUInt32(bool* ok = 0) const;
245
246 unsigned toArrayIndex(bool* ok = 0) const;
247
248 int find(const UString& f, int pos = 0) const;
249 int find(UChar, int pos = 0) const;
250 int rfind(const UString& f, int pos) const;
251 int rfind(UChar, int pos) const;
252
253 UString substr(int pos = 0, int len = -1) const;
254
255 static const UString& null() { return *nullUString; }
256
257 Rep* rep() const { return m_rep.get(); }
258 static Rep* nullRep();
259
260 UString(PassRefPtr<Rep> r)
261 : m_rep(r)
262 {
263 ASSERT(m_rep);
264 }
265
266 size_t cost() const;
267
268 private:
269 void expandCapacity(int requiredLength);
270 void expandPreCapacity(int requiredPreCap);
271 void makeNull();
272
273 RefPtr<Rep> m_rep;
274 static UString* nullUString;
275
276 friend void initializeUString();
277 friend bool operator==(const UString&, const UString&);
278 friend PassRefPtr<Rep> concatenate(Rep*, Rep*); // returns 0 if out of memory
279 };
280 PassRefPtr<UString::Rep> concatenate(UString::Rep*, UString::Rep*);
281 PassRefPtr<UString::Rep> concatenate(UString::Rep*, int);
282 PassRefPtr<UString::Rep> concatenate(UString::Rep*, double);
283
284 bool operator==(const UString&, const UString&);
285
286 inline bool operator!=(const UString& s1, const UString& s2)
287 {
288 return !JSC::operator==(s1, s2);
289 }
290
291 bool operator<(const UString& s1, const UString& s2);
292 bool operator>(const UString& s1, const UString& s2);
293
294 bool operator==(const UString& s1, const char* s2);
295
296 inline bool operator!=(const UString& s1, const char* s2)
297 {
298 return !JSC::operator==(s1, s2);
299 }
300
301 inline bool operator==(const char *s1, const UString& s2)
302 {
303 return operator==(s2, s1);
304 }
305
306 inline bool operator!=(const char *s1, const UString& s2)
307 {
308 return !JSC::operator==(s1, s2);
309 }
310
311 bool operator==(const CString&, const CString&);
312
313 inline UString operator+(const UString& s1, const UString& s2)
314 {
315 RefPtr<UString::Rep> result = concatenate(s1.rep(), s2.rep());
316 return UString(result ? result.release() : UString::nullRep());
317 }
318
319 int compare(const UString&, const UString&);
320
321 bool equal(const UString::Rep*, const UString::Rep*);
322
323 inline UChar* UString::Rep::data() const
324 {
325 const BaseString* base = baseString();
326 return base->buf + base->preCapacity + offset;
327 }
328
329 inline void UString::Rep::setStatic(bool v)
330 {
331 ASSERT(!identifierTable());
332 if (v)
333 m_identifierTableAndFlags.setFlag(StaticFlag);
334 else
335 m_identifierTableAndFlags.clearFlag(StaticFlag);
336 }
337
338 inline void UString::Rep::setBaseString(PassRefPtr<BaseString> base)
339 {
340 ASSERT(base != this);
341 m_baseString = base.releaseRef();
342 }
343
344 inline UString::BaseString* UString::Rep::baseString()
345 {
346 return reinterpret_cast<BaseString*>(baseIsSelf() ? this : m_baseString);
347 }
348
349 inline const UString::BaseString* UString::Rep::baseString() const
350 {
351 return const_cast<const BaseString*>(const_cast<Rep*>(this)->baseString());
352 }
353
354 #ifdef NDEBUG
355 inline void UString::Rep::checkConsistency() const
356 {
357 }
358 #endif
359
360 inline UString::UString()
361 : m_rep(&Rep::null())
362 {
363 }
364
365 // Rule from ECMA 15.2 about what an array index is.
366 // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
367 inline unsigned UString::toArrayIndex(bool* ok) const
368 {
369 unsigned i = toStrictUInt32(ok);
370 if (ok && i >= 0xFFFFFFFFU)
371 *ok = false;
372 return i;
373 }
374
375 // We'd rather not do shared substring append for small strings, since
376 // this runs too much risk of a tiny initial string holding down a
377 // huge buffer.
378 // FIXME: this should be size_t but that would cause warnings until we
379 // fix UString sizes to be size_t instead of int
380 static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar);
381
382 inline size_t UString::cost() const
383 {
384 BaseString* base = m_rep->baseString();
385 size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar);
386 size_t reportedCost = base->reportedCost;
387 ASSERT(capacity >= reportedCost);
388
389 size_t capacityDelta = capacity - reportedCost;
390
391 if (capacityDelta < static_cast<size_t>(minShareSize))
392 return 0;
393
394 base->reportedCost = capacity;
395
396 return capacityDelta;
397 }
398
399 struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > {
400 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->computedHash(); }
401 static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); }
402 };
403
404 void initializeUString();
405 } // namespace JSC
406
407 namespace WTF {
408
409 template<typename T> struct DefaultHash;
410 template<typename T> struct StrHash;
411
412 template<> struct StrHash<JSC::UString::Rep*> {
413 static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); }
414 static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); }
415 static const bool safeToCompareToEmptyOrDeleted = false;
416 };
417
418 template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> {
419 using StrHash<JSC::UString::Rep*>::hash;
420 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); }
421 using StrHash<JSC::UString::Rep*>::equal;
422 static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); }
423 static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); }
424 static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); }
425
426 static const bool safeToCompareToEmptyOrDeleted = false;
427 };
428
429 template<> struct DefaultHash<JSC::UString::Rep*> {
430 typedef StrHash<JSC::UString::Rep*> Hash;
431 };
432
433 template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > {
434 typedef StrHash<RefPtr<JSC::UString::Rep> > Hash;
435
436 };
437
438 } // namespace WTF
439
440 #endif