]> git.saurik.com Git - apple/javascriptcore.git/blob - runtime/UString.h
d01b75de7e3bdbce0bf023f405d636664fc647cf
[apple/javascriptcore.git] / runtime / UString.h
1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #ifndef UString_h
24 #define UString_h
25
26 #include "Collector.h"
27 #include <stdint.h>
28 #include <string.h>
29 #include <wtf/Assertions.h>
30 #include <wtf/CrossThreadRefCounted.h>
31 #include <wtf/OwnFastMallocPtr.h>
32 #include <wtf/PassRefPtr.h>
33 #include <wtf/PtrAndFlags.h>
34 #include <wtf/RefPtr.h>
35 #include <wtf/Vector.h>
36 #include <wtf/unicode/Unicode.h>
37
38 namespace JSC {
39
40 using WTF::PlacementNewAdoptType;
41 using WTF::PlacementNewAdopt;
42
43 class IdentifierTable;
44
45 class CString {
46 public:
47 CString()
48 : m_length(0)
49 , m_data(0)
50 {
51 }
52
53 CString(const char*);
54 CString(const char*, size_t);
55 CString(const CString&);
56
57 ~CString();
58
59 static CString adopt(char*, size_t); // buffer should be allocated with new[].
60
61 CString& append(const CString&);
62 CString& operator=(const char* c);
63 CString& operator=(const CString&);
64 CString& operator+=(const CString& c) { return append(c); }
65
66 size_t size() const { return m_length; }
67 const char* c_str() const { return m_data; }
68
69 private:
70 size_t m_length;
71 char* m_data;
72 };
73
74 typedef Vector<char, 32> CStringBuffer;
75
76 class UString {
77 friend class JIT;
78
79 public:
80 typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar;
81 struct BaseString;
82 struct Rep : Noncopyable {
83 friend class JIT;
84
85 static PassRefPtr<Rep> create(UChar* buffer, int length)
86 {
87 return adoptRef(new BaseString(buffer, length));
88 }
89
90 static PassRefPtr<Rep> createEmptyBuffer(size_t size)
91 {
92 // Guard against integer overflow
93 if (size < (std::numeric_limits<size_t>::max() / sizeof(UChar))) {
94 if (void * buf = tryFastMalloc(size * sizeof(UChar)))
95 return adoptRef(new BaseString(static_cast<UChar*>(buf), 0, size));
96 }
97 return adoptRef(new BaseString(0, 0, 0));
98 }
99
100 static PassRefPtr<Rep> createCopying(const UChar*, int);
101 static PassRefPtr<Rep> create(PassRefPtr<Rep> base, int offset, int length);
102
103 // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h).
104 // Returns UString::Rep::null for null input or conversion failure.
105 static PassRefPtr<Rep> createFromUTF8(const char*);
106
107 // Uses SharedUChar to have joint ownership over the UChar*.
108 static PassRefPtr<Rep> create(UChar*, int, PassRefPtr<SharedUChar>);
109
110 SharedUChar* sharedBuffer();
111 void destroy();
112
113 bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); }
114 UChar* data() const;
115 int size() const { return len; }
116
117 unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; }
118 unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers
119
120 static unsigned computeHash(const UChar*, int length);
121 static unsigned computeHash(const char*, int length);
122 static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); }
123
124 IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); }
125 void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); }
126
127 bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); }
128 void setStatic(bool);
129 void setBaseString(PassRefPtr<BaseString>);
130 BaseString* baseString();
131 const BaseString* baseString() const;
132
133 Rep* ref() { ++rc; return this; }
134 ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); }
135
136 void checkConsistency() const;
137 enum UStringFlags {
138 StaticFlag,
139 BaseStringFlag
140 };
141
142 // unshared data
143 int offset;
144 int len;
145 int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted.
146 mutable unsigned _hash;
147 PtrAndFlags<IdentifierTable, UStringFlags> m_identifierTableAndFlags;
148
149 static BaseString& null() { return *nullBaseString; }
150 static BaseString& empty() { return *emptyBaseString; }
151
152 bool reserveCapacity(int capacity);
153
154 protected:
155 // Constructor for use by BaseString subclass; they use the union with m_baseString for another purpose.
156 Rep(int length)
157 : offset(0)
158 , len(length)
159 , rc(1)
160 , _hash(0)
161 , m_baseString(0)
162 {
163 }
164
165 Rep(PassRefPtr<BaseString> base, int offsetInBase, int length)
166 : offset(offsetInBase)
167 , len(length)
168 , rc(1)
169 , _hash(0)
170 , m_baseString(base.releaseRef())
171 {
172 checkConsistency();
173 }
174
175 union {
176 // If !baseIsSelf()
177 BaseString* m_baseString;
178 // If baseIsSelf()
179 SharedUChar* m_sharedBuffer;
180 };
181
182 private:
183 // For SmallStringStorage which allocates an array and does initialization manually.
184 Rep() { }
185
186 friend class SmallStringsStorage;
187 friend void initializeUString();
188 JS_EXPORTDATA static BaseString* nullBaseString;
189 JS_EXPORTDATA static BaseString* emptyBaseString;
190 };
191
192
193 struct BaseString : public Rep {
194 bool isShared() { return rc != 1 || isBufferReadOnly(); }
195 void setSharedBuffer(PassRefPtr<SharedUChar>);
196
197 bool isBufferReadOnly()
198 {
199 if (!m_sharedBuffer)
200 return false;
201 return slowIsBufferReadOnly();
202 }
203
204 // potentially shared data.
205 UChar* buf;
206 int preCapacity;
207 int usedPreCapacity;
208 int capacity;
209 int usedCapacity;
210
211 size_t reportedCost;
212
213 private:
214 BaseString(UChar* buffer, int length, int additionalCapacity = 0)
215 : Rep(length)
216 , buf(buffer)
217 , preCapacity(0)
218 , usedPreCapacity(0)
219 , capacity(length + additionalCapacity)
220 , usedCapacity(length)
221 , reportedCost(0)
222 {
223 m_identifierTableAndFlags.setFlag(BaseStringFlag);
224 checkConsistency();
225 }
226
227 SharedUChar* sharedBuffer();
228 bool slowIsBufferReadOnly();
229
230 friend struct Rep;
231 friend class SmallStringsStorage;
232 friend void initializeUString();
233 };
234
235 public:
236 UString();
237 UString(const char*);
238 UString(const UChar*, int length);
239 UString(UChar*, int length, bool copy);
240
241 UString(const UString& s)
242 : m_rep(s.m_rep)
243 {
244 }
245
246 UString(const Vector<UChar>& buffer);
247
248 ~UString()
249 {
250 }
251
252 // Special constructor for cases where we overwrite an object in place.
253 UString(PlacementNewAdoptType)
254 : m_rep(PlacementNewAdopt)
255 {
256 }
257
258 static UString from(int);
259 static UString from(unsigned int);
260 static UString from(long);
261 static UString from(double);
262
263 struct Range {
264 public:
265 Range(int pos, int len)
266 : position(pos)
267 , length(len)
268 {
269 }
270
271 Range()
272 {
273 }
274
275 int position;
276 int length;
277 };
278
279 UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const;
280
281 UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const;
282
283 UString& append(const UString&);
284 UString& append(const char*);
285 UString& append(UChar);
286 UString& append(char c) { return append(static_cast<UChar>(static_cast<unsigned char>(c))); }
287 UString& append(const UChar*, int size);
288 UString& appendNumeric(int);
289 UString& appendNumeric(double);
290
291 bool getCString(CStringBuffer&) const;
292
293 // NOTE: This method should only be used for *debugging* purposes as it
294 // is neither Unicode safe nor free from side effects nor thread-safe.
295 char* ascii() const;
296
297 /**
298 * Convert the string to UTF-8, assuming it is UTF-16 encoded.
299 * In non-strict mode, this function is tolerant of badly formed UTF-16, it
300 * can create UTF-8 strings that are invalid because they have characters in
301 * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is
302 * guaranteed to be otherwise valid.
303 * In strict mode, error is returned as null CString.
304 */
305 CString UTF8String(bool strict = false) const;
306
307 UString& operator=(const char*c);
308
309 UString& operator+=(const UString& s) { return append(s); }
310 UString& operator+=(const char* s) { return append(s); }
311
312 const UChar* data() const { return m_rep->data(); }
313
314 bool isNull() const { return (m_rep == &Rep::null()); }
315 bool isEmpty() const { return (!m_rep->len); }
316
317 bool is8Bit() const;
318
319 int size() const { return m_rep->size(); }
320
321 UChar operator[](int pos) const;
322
323 double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
324 double toDouble(bool tolerateTrailingJunk) const;
325 double toDouble() const;
326
327 uint32_t toUInt32(bool* ok = 0) const;
328 uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const;
329 uint32_t toStrictUInt32(bool* ok = 0) const;
330
331 unsigned toArrayIndex(bool* ok = 0) const;
332
333 int find(const UString& f, int pos = 0) const;
334 int find(UChar, int pos = 0) const;
335 int rfind(const UString& f, int pos) const;
336 int rfind(UChar, int pos) const;
337
338 UString substr(int pos = 0, int len = -1) const;
339
340 static const UString& null() { return *nullUString; }
341
342 Rep* rep() const { return m_rep.get(); }
343 static Rep* nullRep();
344
345 UString(PassRefPtr<Rep> r)
346 : m_rep(r)
347 {
348 ASSERT(m_rep);
349 }
350
351 size_t cost() const;
352
353 // Attempt to grow this string such that it can grow to a total length of 'capacity'
354 // without reallocation. This may fail a number of reasons - if the BasicString is
355 // shared and another string is using part of the capacity beyond our end point, if
356 // the realloc fails, or if this string is empty and has no storage.
357 //
358 // This method returns a boolean indicating success.
359 bool reserveCapacity(int capacity)
360 {
361 return m_rep->reserveCapacity(capacity);
362 }
363
364 private:
365 void expandCapacity(int requiredLength);
366 void expandPreCapacity(int requiredPreCap);
367 void makeNull();
368
369 RefPtr<Rep> m_rep;
370 static UString* nullUString;
371
372 friend void initializeUString();
373 friend bool operator==(const UString&, const UString&);
374 friend PassRefPtr<Rep> concatenate(Rep*, Rep*); // returns 0 if out of memory
375 };
376 PassRefPtr<UString::Rep> concatenate(UString::Rep*, UString::Rep*);
377 PassRefPtr<UString::Rep> concatenate(UString::Rep*, int);
378 PassRefPtr<UString::Rep> concatenate(UString::Rep*, double);
379
380 inline bool operator==(const UString& s1, const UString& s2)
381 {
382 int size = s1.size();
383 switch (size) {
384 case 0:
385 return !s2.size();
386 case 1:
387 return s2.size() == 1 && s1.data()[0] == s2.data()[0];
388 case 2: {
389 if (s2.size() != 2)
390 return false;
391 const UChar* d1 = s1.data();
392 const UChar* d2 = s2.data();
393 return (d1[0] == d2[0]) & (d1[1] == d2[1]);
394 }
395 default:
396 return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0;
397 }
398 }
399
400
401 inline bool operator!=(const UString& s1, const UString& s2)
402 {
403 return !JSC::operator==(s1, s2);
404 }
405
406 bool operator<(const UString& s1, const UString& s2);
407 bool operator>(const UString& s1, const UString& s2);
408
409 bool operator==(const UString& s1, const char* s2);
410
411 inline bool operator!=(const UString& s1, const char* s2)
412 {
413 return !JSC::operator==(s1, s2);
414 }
415
416 inline bool operator==(const char *s1, const UString& s2)
417 {
418 return operator==(s2, s1);
419 }
420
421 inline bool operator!=(const char *s1, const UString& s2)
422 {
423 return !JSC::operator==(s1, s2);
424 }
425
426 bool operator==(const CString&, const CString&);
427
428 inline UString operator+(const UString& s1, const UString& s2)
429 {
430 RefPtr<UString::Rep> result = concatenate(s1.rep(), s2.rep());
431 return UString(result ? result.release() : UString::nullRep());
432 }
433
434 int compare(const UString&, const UString&);
435
436 bool equal(const UString::Rep*, const UString::Rep*);
437
438 inline PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<UString::Rep> rep, int offset, int length)
439 {
440 ASSERT(rep);
441 rep->checkConsistency();
442
443 int repOffset = rep->offset;
444
445 PassRefPtr<BaseString> base = rep->baseString();
446
447 ASSERT(-(offset + repOffset) <= base->usedPreCapacity);
448 ASSERT(offset + repOffset + length <= base->usedCapacity);
449
450 // Steal the single reference this Rep was created with.
451 return adoptRef(new Rep(base, repOffset + offset, length));
452 }
453
454 inline UChar* UString::Rep::data() const
455 {
456 const BaseString* base = baseString();
457 return base->buf + base->preCapacity + offset;
458 }
459
460 inline void UString::Rep::setStatic(bool v)
461 {
462 ASSERT(!identifierTable());
463 if (v)
464 m_identifierTableAndFlags.setFlag(StaticFlag);
465 else
466 m_identifierTableAndFlags.clearFlag(StaticFlag);
467 }
468
469 inline void UString::Rep::setBaseString(PassRefPtr<BaseString> base)
470 {
471 ASSERT(base != this);
472 ASSERT(!baseIsSelf());
473 m_baseString = base.releaseRef();
474 }
475
476 inline UString::BaseString* UString::Rep::baseString()
477 {
478 return !baseIsSelf() ? m_baseString : reinterpret_cast<BaseString*>(this) ;
479 }
480
481 inline const UString::BaseString* UString::Rep::baseString() const
482 {
483 return const_cast<Rep*>(this)->baseString();
484 }
485
486 #ifdef NDEBUG
487 inline void UString::Rep::checkConsistency() const
488 {
489 }
490 #endif
491
492 inline UString::UString()
493 : m_rep(&Rep::null())
494 {
495 }
496
497 // Rule from ECMA 15.2 about what an array index is.
498 // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
499 inline unsigned UString::toArrayIndex(bool* ok) const
500 {
501 unsigned i = toStrictUInt32(ok);
502 if (ok && i >= 0xFFFFFFFFU)
503 *ok = false;
504 return i;
505 }
506
507 // We'd rather not do shared substring append for small strings, since
508 // this runs too much risk of a tiny initial string holding down a
509 // huge buffer.
510 // FIXME: this should be size_t but that would cause warnings until we
511 // fix UString sizes to be size_t instead of int
512 static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar);
513
514 inline size_t UString::cost() const
515 {
516 BaseString* base = m_rep->baseString();
517 size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar);
518 size_t reportedCost = base->reportedCost;
519 ASSERT(capacity >= reportedCost);
520
521 size_t capacityDelta = capacity - reportedCost;
522
523 if (capacityDelta < static_cast<size_t>(minShareSize))
524 return 0;
525
526 base->reportedCost = capacity;
527
528 return capacityDelta;
529 }
530
531 struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > {
532 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->computedHash(); }
533 static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); }
534 };
535
536 void initializeUString();
537 } // namespace JSC
538
539 namespace WTF {
540
541 template<typename T> struct DefaultHash;
542 template<typename T> struct StrHash;
543
544 template<> struct StrHash<JSC::UString::Rep*> {
545 static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); }
546 static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); }
547 static const bool safeToCompareToEmptyOrDeleted = false;
548 };
549
550 template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> {
551 using StrHash<JSC::UString::Rep*>::hash;
552 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); }
553 using StrHash<JSC::UString::Rep*>::equal;
554 static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); }
555 static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); }
556 static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); }
557
558 static const bool safeToCompareToEmptyOrDeleted = false;
559 };
560
561 template<> struct DefaultHash<JSC::UString::Rep*> {
562 typedef StrHash<JSC::UString::Rep*> Hash;
563 };
564
565 template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > {
566 typedef StrHash<RefPtr<JSC::UString::Rep> > Hash;
567
568 };
569
570 } // namespace WTF
571
572 #endif