]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ****************************************************************************** | |
73c04bcf | 3 | * Copyright (C) 1999-2005, International Business Machines Corporation and * |
b75a7d8f A |
4 | * others. All Rights Reserved. * |
5 | ****************************************************************************** | |
6 | * | |
7 | * File unistr.cpp | |
8 | * | |
9 | * Modification History: | |
10 | * | |
11 | * Date Name Description | |
12 | * 09/25/98 stephen Creation. | |
13 | * 04/20/99 stephen Overhauled per 4/16 code review. | |
14 | * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX | |
15 | * 11/18/99 aliu Added handleReplaceBetween() to make inherit from | |
16 | * Replaceable. | |
17 | * 06/25/01 grhoten Removed the dependency on iostream | |
18 | ****************************************************************************** | |
19 | */ | |
20 | ||
21 | #include "unicode/utypes.h" | |
22 | #include "unicode/putil.h" | |
b75a7d8f A |
23 | #include "cstring.h" |
24 | #include "cmemory.h" | |
25 | #include "unicode/ustring.h" | |
26 | #include "unicode/unistr.h" | |
b75a7d8f A |
27 | #include "uhash.h" |
28 | #include "ustr_imp.h" | |
b75a7d8f A |
29 | #include "umutex.h" |
30 | ||
31 | #if 0 | |
32 | ||
33 | #if U_IOSTREAM_SOURCE >= 199711 | |
34 | #include <iostream> | |
35 | using namespace std; | |
36 | #elif U_IOSTREAM_SOURCE >= 198506 | |
37 | #include <iostream.h> | |
38 | #endif | |
39 | ||
40 | //DEBUGGING | |
41 | void | |
42 | print(const UnicodeString& s, | |
43 | const char *name) | |
44 | { | |
45 | UChar c; | |
46 | cout << name << ":|"; | |
47 | for(int i = 0; i < s.length(); ++i) { | |
48 | c = s[i]; | |
49 | if(c>= 0x007E || c < 0x0020) | |
50 | cout << "[0x" << hex << s[i] << "]"; | |
51 | else | |
52 | cout << (char) s[i]; | |
53 | } | |
54 | cout << '|' << endl; | |
55 | } | |
56 | ||
57 | void | |
58 | print(const UChar *s, | |
59 | int32_t len, | |
60 | const char *name) | |
61 | { | |
62 | UChar c; | |
63 | cout << name << ":|"; | |
64 | for(int i = 0; i < len; ++i) { | |
65 | c = s[i]; | |
66 | if(c>= 0x007E || c < 0x0020) | |
67 | cout << "[0x" << hex << s[i] << "]"; | |
68 | else | |
69 | cout << (char) s[i]; | |
70 | } | |
71 | cout << '|' << endl; | |
72 | } | |
73 | // END DEBUGGING | |
74 | #endif | |
75 | ||
76 | // Local function definitions for now | |
77 | ||
78 | // need to copy areas that may overlap | |
79 | static | |
80 | inline void | |
81 | us_arrayCopy(const UChar *src, int32_t srcStart, | |
82 | UChar *dst, int32_t dstStart, int32_t count) | |
83 | { | |
84 | if(count>0) { | |
85 | uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); | |
86 | } | |
87 | } | |
88 | ||
89 | // u_unescapeAt() callback to get a UChar from a UnicodeString | |
90 | U_CDECL_BEGIN | |
91 | static UChar U_CALLCONV | |
92 | UnicodeString_charAt(int32_t offset, void *context) { | |
93 | return ((UnicodeString*) context)->charAt(offset); | |
94 | } | |
95 | U_CDECL_END | |
96 | ||
97 | U_NAMESPACE_BEGIN | |
98 | ||
374ca955 A |
99 | /* The Replaceable virtual destructor can't be defined in the header |
100 | due to how AIX works with multiple definitions of virtual functions. | |
101 | */ | |
102 | Replaceable::~Replaceable() {} | |
103 | Replaceable::Replaceable() {} | |
104 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) | |
105 | ||
106 | UnicodeString U_EXPORT2 | |
107 | operator+ (const UnicodeString &s1, const UnicodeString &s2) { | |
108 | return | |
109 | UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). | |
110 | append(s1). | |
111 | append(s2); | |
112 | } | |
b75a7d8f A |
113 | |
114 | //======================================== | |
115 | // Reference Counting functions, put at top of file so that optimizing compilers | |
116 | // have a chance to automatically inline. | |
117 | //======================================== | |
118 | ||
119 | void | |
120 | UnicodeString::addRef() | |
121 | { umtx_atomic_inc((int32_t *)fArray - 1);} | |
122 | ||
123 | int32_t | |
124 | UnicodeString::removeRef() | |
125 | { return umtx_atomic_dec((int32_t *)fArray - 1);} | |
126 | ||
127 | int32_t | |
128 | UnicodeString::refCount() const | |
129 | { | |
130 | umtx_lock(NULL); | |
131 | // Note: without the lock to force a memory barrier, we might see a very | |
132 | // stale value on some multi-processor systems. | |
133 | int32_t count = *((int32_t *)fArray - 1); | |
134 | umtx_unlock(NULL); | |
135 | return count; | |
136 | } | |
137 | ||
138 | void | |
139 | UnicodeString::releaseArray() { | |
140 | if((fFlags & kRefCounted) && removeRef() == 0) { | |
141 | uprv_free((int32_t *)fArray - 1); | |
142 | } | |
143 | } | |
144 | ||
145 | ||
146 | ||
147 | //======================================== | |
148 | // Constructors | |
149 | //======================================== | |
150 | UnicodeString::UnicodeString() | |
151 | : fLength(0), | |
152 | fCapacity(US_STACKBUF_SIZE), | |
153 | fArray(fStackBuffer), | |
154 | fFlags(kShortString) | |
155 | {} | |
156 | ||
157 | UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) | |
158 | : fLength(0), | |
159 | fCapacity(US_STACKBUF_SIZE), | |
160 | fArray(0), | |
161 | fFlags(0) | |
162 | { | |
163 | if(count <= 0 || (uint32_t)c > 0x10ffff) { | |
164 | // just allocate and do not do anything else | |
165 | allocate(capacity); | |
166 | } else { | |
167 | // count > 0, allocate and fill the new string with count c's | |
168 | int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount; | |
169 | if(capacity < length) { | |
170 | capacity = length; | |
171 | } | |
172 | if(allocate(capacity)) { | |
173 | int32_t i = 0; | |
174 | ||
175 | // fill the new string with c | |
176 | if(unitCount == 1) { | |
177 | // fill with length UChars | |
178 | while(i < length) { | |
179 | fArray[i++] = (UChar)c; | |
180 | } | |
181 | } else { | |
182 | // get the code units for c | |
183 | UChar units[UTF_MAX_CHAR_LENGTH]; | |
184 | UTF_APPEND_CHAR_UNSAFE(units, i, c); | |
185 | ||
186 | // now it must be i==unitCount | |
187 | i = 0; | |
188 | ||
189 | // for Unicode, unitCount can only be 1, 2, 3, or 4 | |
190 | // 1 is handled above | |
191 | while(i < length) { | |
192 | int32_t unitIdx = 0; | |
193 | while(unitIdx < unitCount) { | |
194 | fArray[i++]=units[unitIdx++]; | |
195 | } | |
196 | } | |
197 | } | |
198 | } | |
199 | fLength = length; | |
200 | } | |
201 | } | |
202 | ||
203 | UnicodeString::UnicodeString(UChar ch) | |
204 | : fLength(1), | |
205 | fCapacity(US_STACKBUF_SIZE), | |
206 | fArray(fStackBuffer), | |
207 | fFlags(kShortString) | |
208 | { | |
209 | fStackBuffer[0] = ch; | |
210 | } | |
211 | ||
212 | UnicodeString::UnicodeString(UChar32 ch) | |
213 | : fLength(1), | |
214 | fCapacity(US_STACKBUF_SIZE), | |
215 | fArray(fStackBuffer), | |
216 | fFlags(kShortString) | |
217 | { | |
218 | int32_t i = 0; | |
219 | UBool isError = FALSE; | |
220 | U16_APPEND(fStackBuffer, i, US_STACKBUF_SIZE, ch, isError); | |
221 | fLength = i; | |
222 | } | |
223 | ||
224 | UnicodeString::UnicodeString(const UChar *text) | |
225 | : fLength(0), | |
226 | fCapacity(US_STACKBUF_SIZE), | |
227 | fArray(fStackBuffer), | |
228 | fFlags(kShortString) | |
229 | { | |
230 | doReplace(0, 0, text, 0, -1); | |
231 | } | |
232 | ||
233 | UnicodeString::UnicodeString(const UChar *text, | |
234 | int32_t textLength) | |
235 | : fLength(0), | |
236 | fCapacity(US_STACKBUF_SIZE), | |
237 | fArray(fStackBuffer), | |
238 | fFlags(kShortString) | |
239 | { | |
240 | doReplace(0, 0, text, 0, textLength); | |
241 | } | |
242 | ||
243 | UnicodeString::UnicodeString(UBool isTerminated, | |
244 | const UChar *text, | |
245 | int32_t textLength) | |
246 | : fLength(textLength), | |
247 | fCapacity(isTerminated ? textLength + 1 : textLength), | |
248 | fArray((UChar *)text), | |
249 | fFlags(kReadonlyAlias) | |
250 | { | |
251 | if(text == NULL) { | |
252 | // treat as an empty string, do not alias | |
253 | fLength = 0; | |
254 | fCapacity = US_STACKBUF_SIZE; | |
255 | fArray = fStackBuffer; | |
256 | fFlags = kShortString; | |
257 | } else if(textLength < -1 || | |
258 | (textLength == -1 && !isTerminated) || | |
259 | (textLength >= 0 && isTerminated && text[textLength] != 0) | |
260 | ) { | |
261 | setToBogus(); | |
262 | } else if(textLength == -1) { | |
263 | // text is terminated, or else it would have failed the above test | |
264 | fLength = u_strlen(text); | |
265 | fCapacity = fLength + 1; | |
266 | } | |
267 | } | |
268 | ||
269 | UnicodeString::UnicodeString(UChar *buff, | |
270 | int32_t buffLength, | |
271 | int32_t buffCapacity) | |
272 | : fLength(buffLength), | |
273 | fCapacity(buffCapacity), | |
274 | fArray(buff), | |
275 | fFlags(kWritableAlias) | |
276 | { | |
277 | if(buff == NULL) { | |
278 | // treat as an empty string, do not alias | |
279 | fLength = 0; | |
280 | fCapacity = US_STACKBUF_SIZE; | |
281 | fArray = fStackBuffer; | |
282 | fFlags = kShortString; | |
374ca955 | 283 | } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { |
b75a7d8f A |
284 | setToBogus(); |
285 | } else if(buffLength == -1) { | |
286 | // fLength = u_strlen(buff); but do not look beyond buffCapacity | |
287 | const UChar *p = buff, *limit = buff + buffCapacity; | |
288 | while(p != limit && *p != 0) { | |
289 | ++p; | |
290 | } | |
291 | fLength = (int32_t)(p - buff); | |
292 | } | |
293 | } | |
294 | ||
374ca955 | 295 | UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) |
b75a7d8f A |
296 | : fLength(0), |
297 | fCapacity(US_STACKBUF_SIZE), | |
298 | fArray(fStackBuffer), | |
299 | fFlags(kShortString) | |
300 | { | |
374ca955 A |
301 | if(src==NULL) { |
302 | // treat as an empty string | |
303 | } else { | |
304 | if(length<0) { | |
73c04bcf | 305 | length=(int32_t)uprv_strlen(src); |
b75a7d8f | 306 | } |
374ca955 A |
307 | if(cloneArrayIfNeeded(length, length, FALSE)) { |
308 | u_charsToUChars(src, getArrayStart(), length); | |
309 | fLength = length; | |
310 | } else { | |
b75a7d8f A |
311 | setToBogus(); |
312 | } | |
313 | } | |
314 | } | |
315 | ||
316 | UnicodeString::UnicodeString(const UnicodeString& that) | |
317 | : Replaceable(), | |
318 | fLength(0), | |
319 | fCapacity(US_STACKBUF_SIZE), | |
320 | fArray(fStackBuffer), | |
321 | fFlags(kShortString) | |
322 | { | |
323 | copyFrom(that); | |
324 | } | |
325 | ||
326 | UnicodeString::UnicodeString(const UnicodeString& that, | |
327 | int32_t srcStart) | |
328 | : Replaceable(), | |
329 | fLength(0), | |
330 | fCapacity(US_STACKBUF_SIZE), | |
331 | fArray(fStackBuffer), | |
332 | fFlags(kShortString) | |
333 | { | |
334 | setTo(that, srcStart); | |
335 | } | |
336 | ||
337 | UnicodeString::UnicodeString(const UnicodeString& that, | |
338 | int32_t srcStart, | |
339 | int32_t srcLength) | |
340 | : Replaceable(), | |
341 | fLength(0), | |
342 | fCapacity(US_STACKBUF_SIZE), | |
343 | fArray(fStackBuffer), | |
344 | fFlags(kShortString) | |
345 | { | |
346 | setTo(that, srcStart, srcLength); | |
347 | } | |
348 | ||
349 | // Replaceable base class clone() default implementation, does not clone | |
350 | Replaceable * | |
351 | Replaceable::clone() const { | |
352 | return NULL; | |
353 | } | |
354 | ||
355 | // UnicodeString overrides clone() with a real implementation | |
356 | Replaceable * | |
357 | UnicodeString::clone() const { | |
358 | return new UnicodeString(*this); | |
359 | } | |
360 | ||
361 | //======================================== | |
362 | // array allocation | |
363 | //======================================== | |
364 | ||
365 | UBool | |
366 | UnicodeString::allocate(int32_t capacity) { | |
367 | if(capacity <= US_STACKBUF_SIZE) { | |
368 | fArray = fStackBuffer; | |
369 | fCapacity = US_STACKBUF_SIZE; | |
370 | fFlags = kShortString; | |
371 | } else { | |
372 | // count bytes for the refCounter and the string capacity, and | |
373 | // round up to a multiple of 16; then divide by 4 and allocate int32_t's | |
374 | // to be safely aligned for the refCount | |
375 | int32_t words = (int32_t)(((sizeof(int32_t) + capacity * U_SIZEOF_UCHAR + 15) & ~15) >> 2); | |
376 | int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words ); | |
377 | if(array != 0) { | |
378 | // set initial refCount and point behind the refCount | |
379 | *array++ = 1; | |
380 | ||
381 | // have fArray point to the first UChar | |
382 | fArray = (UChar *)array; | |
383 | fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); | |
384 | fFlags = kLongString; | |
385 | } else { | |
386 | fLength = 0; | |
387 | fCapacity = 0; | |
388 | fFlags = kIsBogus; | |
389 | return FALSE; | |
390 | } | |
391 | } | |
392 | return TRUE; | |
393 | } | |
394 | ||
395 | //======================================== | |
396 | // Destructor | |
397 | //======================================== | |
398 | UnicodeString::~UnicodeString() | |
399 | { | |
400 | releaseArray(); | |
401 | } | |
402 | ||
403 | ||
404 | //======================================== | |
405 | // Assignment | |
406 | //======================================== | |
407 | ||
408 | UnicodeString & | |
409 | UnicodeString::operator=(const UnicodeString &src) { | |
410 | return copyFrom(src); | |
411 | } | |
412 | ||
413 | UnicodeString & | |
414 | UnicodeString::fastCopyFrom(const UnicodeString &src) { | |
415 | return copyFrom(src, TRUE); | |
416 | } | |
417 | ||
418 | UnicodeString & | |
419 | UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { | |
420 | // if assigning to ourselves, do nothing | |
421 | if(this == 0 || this == &src) { | |
422 | return *this; | |
423 | } | |
424 | ||
425 | // is the right side bogus? | |
426 | if(&src == 0 || src.isBogus()) { | |
427 | setToBogus(); | |
428 | return *this; | |
429 | } | |
430 | ||
431 | // delete the current contents | |
432 | releaseArray(); | |
433 | ||
434 | // we always copy the length | |
435 | fLength = src.fLength; | |
436 | if(fLength == 0) { | |
437 | // empty string - use the stack buffer | |
438 | fArray = fStackBuffer; | |
439 | fCapacity = US_STACKBUF_SIZE; | |
440 | fFlags = kShortString; | |
441 | return *this; | |
442 | } | |
443 | ||
444 | // fLength>0 and not an "open" src.getBuffer(minCapacity) | |
445 | switch(src.fFlags) { | |
446 | case kShortString: | |
447 | // short string using the stack buffer, do the same | |
448 | fArray = fStackBuffer; | |
449 | fCapacity = US_STACKBUF_SIZE; | |
450 | fFlags = kShortString; | |
451 | uprv_memcpy(fStackBuffer, src.fArray, fLength * U_SIZEOF_UCHAR); | |
452 | break; | |
453 | case kLongString: | |
454 | // src uses a refCounted string buffer, use that buffer with refCount | |
455 | // src is const, use a cast - we don't really change it | |
456 | ((UnicodeString &)src).addRef(); | |
457 | // copy all fields, share the reference-counted buffer | |
458 | fArray = src.fArray; | |
459 | fCapacity = src.fCapacity; | |
460 | fFlags = src.fFlags; | |
461 | break; | |
462 | case kReadonlyAlias: | |
463 | if(fastCopy) { | |
464 | // src is a readonly alias, do the same | |
465 | // -> maintain the readonly alias as such | |
466 | fArray = src.fArray; | |
467 | fCapacity = src.fCapacity; | |
468 | fFlags = src.fFlags; | |
469 | break; | |
470 | } | |
471 | // else if(!fastCopy) fall through to case kWritableAlias | |
472 | // -> allocate a new buffer and copy the contents | |
473 | case kWritableAlias: | |
474 | // src is a writable alias; we make a copy of that instead | |
475 | if(allocate(fLength)) { | |
476 | uprv_memcpy(fArray, src.fArray, fLength * U_SIZEOF_UCHAR); | |
477 | break; | |
478 | } | |
479 | // if there is not enough memory, then fall through to setting to bogus | |
480 | default: | |
481 | // if src is bogus, set ourselves to bogus | |
482 | // do not call setToBogus() here because fArray and fFlags are not consistent here | |
483 | fArray = 0; | |
484 | fLength = 0; | |
485 | fCapacity = 0; | |
486 | fFlags = kIsBogus; | |
487 | break; | |
488 | } | |
489 | ||
490 | return *this; | |
491 | } | |
492 | ||
493 | //======================================== | |
494 | // Miscellaneous operations | |
495 | //======================================== | |
496 | ||
497 | UnicodeString UnicodeString::unescape() const { | |
498 | UnicodeString result; | |
499 | for (int32_t i=0; i<length(); ) { | |
500 | UChar32 c = charAt(i++); | |
501 | if (c == 0x005C /*'\\'*/) { | |
502 | c = unescapeAt(i); // advances i | |
503 | if (c == (UChar32)0xFFFFFFFF) { | |
504 | result.remove(); // return empty string | |
505 | break; // invalid escape sequence | |
506 | } | |
507 | } | |
508 | result.append(c); | |
509 | } | |
510 | return result; | |
511 | } | |
512 | ||
513 | UChar32 UnicodeString::unescapeAt(int32_t &offset) const { | |
514 | return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this); | |
515 | } | |
516 | ||
517 | //======================================== | |
518 | // Read-only implementation | |
519 | //======================================== | |
520 | int8_t | |
521 | UnicodeString::doCompare( int32_t start, | |
522 | int32_t length, | |
523 | const UChar *srcChars, | |
524 | int32_t srcStart, | |
525 | int32_t srcLength) const | |
526 | { | |
527 | // compare illegal string values | |
528 | // treat const UChar *srcChars==NULL as an empty string | |
529 | if(isBogus()) { | |
530 | return -1; | |
531 | } | |
532 | ||
533 | // pin indices to legal values | |
534 | pinIndices(start, length); | |
535 | ||
536 | if(srcChars == NULL) { | |
537 | srcStart = srcLength = 0; | |
538 | } | |
539 | ||
540 | // get the correct pointer | |
541 | const UChar *chars = getArrayStart(); | |
542 | ||
543 | chars += start; | |
544 | srcChars += srcStart; | |
545 | ||
546 | int32_t minLength; | |
547 | int8_t lengthResult; | |
548 | ||
549 | // get the srcLength if necessary | |
550 | if(srcLength < 0) { | |
551 | srcLength = u_strlen(srcChars + srcStart); | |
552 | } | |
553 | ||
554 | // are we comparing different lengths? | |
555 | if(length != srcLength) { | |
556 | if(length < srcLength) { | |
557 | minLength = length; | |
558 | lengthResult = -1; | |
559 | } else { | |
560 | minLength = srcLength; | |
561 | lengthResult = 1; | |
562 | } | |
563 | } else { | |
564 | minLength = length; | |
565 | lengthResult = 0; | |
566 | } | |
567 | ||
568 | /* | |
569 | * note that uprv_memcmp() returns an int but we return an int8_t; | |
570 | * we need to take care not to truncate the result - | |
571 | * one way to do this is to right-shift the value to | |
572 | * move the sign bit into the lower 8 bits and making sure that this | |
573 | * does not become 0 itself | |
574 | */ | |
575 | ||
576 | if(minLength > 0 && chars != srcChars) { | |
577 | int32_t result; | |
578 | ||
579 | # if U_IS_BIG_ENDIAN | |
580 | // big-endian: byte comparison works | |
581 | result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); | |
582 | if(result != 0) { | |
583 | return (int8_t)(result >> 15 | 1); | |
584 | } | |
585 | # else | |
586 | // little-endian: compare UChar units | |
587 | do { | |
588 | result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); | |
589 | if(result != 0) { | |
590 | return (int8_t)(result >> 15 | 1); | |
591 | } | |
592 | } while(--minLength > 0); | |
593 | # endif | |
594 | } | |
595 | return lengthResult; | |
596 | } | |
597 | ||
598 | /* String compare in code point order - doCompare() compares in code unit order. */ | |
599 | int8_t | |
600 | UnicodeString::doCompareCodePointOrder(int32_t start, | |
601 | int32_t length, | |
602 | const UChar *srcChars, | |
603 | int32_t srcStart, | |
604 | int32_t srcLength) const | |
605 | { | |
606 | // compare illegal string values | |
607 | // treat const UChar *srcChars==NULL as an empty string | |
608 | if(isBogus()) { | |
609 | return -1; | |
610 | } | |
611 | ||
612 | // pin indices to legal values | |
613 | pinIndices(start, length); | |
614 | ||
615 | if(srcChars == NULL) { | |
616 | srcStart = srcLength = 0; | |
617 | } | |
618 | ||
619 | int32_t diff = uprv_strCompare(fArray + start, length, srcChars + srcStart, srcLength, FALSE, TRUE); | |
620 | /* translate the 32-bit result into an 8-bit one */ | |
621 | if(diff!=0) { | |
622 | return (int8_t)(diff >> 15 | 1); | |
623 | } else { | |
624 | return 0; | |
625 | } | |
626 | } | |
627 | ||
b75a7d8f A |
628 | int32_t |
629 | UnicodeString::getLength() const { | |
630 | return length(); | |
631 | } | |
632 | ||
633 | UChar | |
634 | UnicodeString::getCharAt(int32_t offset) const { | |
635 | return charAt(offset); | |
636 | } | |
637 | ||
638 | UChar32 | |
639 | UnicodeString::getChar32At(int32_t offset) const { | |
640 | return char32At(offset); | |
641 | } | |
642 | ||
643 | int32_t | |
644 | UnicodeString::countChar32(int32_t start, int32_t length) const { | |
645 | pinIndices(start, length); | |
646 | // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL | |
647 | return u_countChar32(fArray+start, length); | |
648 | } | |
649 | ||
650 | UBool | |
651 | UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { | |
652 | pinIndices(start, length); | |
653 | // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL | |
654 | return u_strHasMoreChar32Than(fArray+start, length, number); | |
655 | } | |
656 | ||
657 | int32_t | |
658 | UnicodeString::moveIndex32(int32_t index, int32_t delta) const { | |
659 | // pin index | |
660 | if(index<0) { | |
661 | index=0; | |
662 | } else if(index>fLength) { | |
663 | index=fLength; | |
664 | } | |
665 | ||
666 | if(delta>0) { | |
667 | UTF_FWD_N(fArray, index, fLength, delta); | |
668 | } else { | |
669 | UTF_BACK_N(fArray, 0, index, -delta); | |
670 | } | |
671 | ||
672 | return index; | |
673 | } | |
674 | ||
675 | void | |
676 | UnicodeString::doExtract(int32_t start, | |
677 | int32_t length, | |
678 | UChar *dst, | |
679 | int32_t dstStart) const | |
680 | { | |
681 | // pin indices to legal values | |
682 | pinIndices(start, length); | |
683 | ||
684 | // do not copy anything if we alias dst itself | |
685 | if(fArray + start != dst + dstStart) { | |
686 | us_arrayCopy(getArrayStart(), start, dst, dstStart, length); | |
687 | } | |
688 | } | |
689 | ||
690 | int32_t | |
691 | UnicodeString::extract(UChar *dest, int32_t destCapacity, | |
692 | UErrorCode &errorCode) const { | |
693 | if(U_SUCCESS(errorCode)) { | |
694 | if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { | |
695 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
696 | } else { | |
697 | if(fLength>0 && fLength<=destCapacity && fArray!=dest) { | |
698 | uprv_memcpy(dest, fArray, fLength*U_SIZEOF_UCHAR); | |
699 | } | |
700 | return u_terminateUChars(dest, destCapacity, fLength, &errorCode); | |
701 | } | |
702 | } | |
703 | ||
704 | return fLength; | |
705 | } | |
706 | ||
374ca955 A |
707 | int32_t |
708 | UnicodeString::extract(int32_t start, | |
709 | int32_t length, | |
710 | char *target, | |
711 | int32_t targetCapacity, | |
712 | enum EInvariant) const | |
713 | { | |
714 | // if the arguments are illegal, then do nothing | |
715 | if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { | |
716 | return 0; | |
717 | } | |
718 | ||
719 | // pin the indices to legal values | |
720 | pinIndices(start, length); | |
721 | ||
722 | if(length <= targetCapacity) { | |
723 | u_UCharsToChars(getArrayStart() + start, target, length); | |
724 | } | |
725 | UErrorCode status = U_ZERO_ERROR; | |
726 | return u_terminateChars(target, targetCapacity, length, &status); | |
727 | } | |
728 | ||
729 | void | |
730 | UnicodeString::extractBetween(int32_t start, | |
731 | int32_t limit, | |
732 | UnicodeString& target) const { | |
733 | pinIndex(start); | |
734 | pinIndex(limit); | |
735 | doExtract(start, limit - start, target); | |
736 | } | |
737 | ||
b75a7d8f A |
738 | int32_t |
739 | UnicodeString::indexOf(const UChar *srcChars, | |
740 | int32_t srcStart, | |
741 | int32_t srcLength, | |
742 | int32_t start, | |
743 | int32_t length) const | |
744 | { | |
745 | if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { | |
746 | return -1; | |
747 | } | |
748 | ||
749 | // UnicodeString does not find empty substrings | |
750 | if(srcLength < 0 && srcChars[srcStart] == 0) { | |
751 | return -1; | |
752 | } | |
753 | ||
754 | // get the indices within bounds | |
755 | pinIndices(start, length); | |
756 | ||
757 | // find the first occurrence of the substring | |
758 | const UChar *match = u_strFindFirst(fArray + start, length, srcChars + srcStart, srcLength); | |
759 | if(match == NULL) { | |
760 | return -1; | |
761 | } else { | |
73c04bcf | 762 | return (int32_t)(match - fArray); |
b75a7d8f A |
763 | } |
764 | } | |
765 | ||
766 | int32_t | |
767 | UnicodeString::doIndexOf(UChar c, | |
768 | int32_t start, | |
769 | int32_t length) const | |
770 | { | |
771 | // pin indices | |
772 | pinIndices(start, length); | |
773 | ||
774 | // find the first occurrence of c | |
775 | const UChar *match = u_memchr(fArray + start, c, length); | |
776 | if(match == NULL) { | |
777 | return -1; | |
778 | } else { | |
73c04bcf | 779 | return (int32_t)(match - fArray); |
b75a7d8f A |
780 | } |
781 | } | |
782 | ||
783 | int32_t | |
784 | UnicodeString::doIndexOf(UChar32 c, | |
785 | int32_t start, | |
786 | int32_t length) const { | |
787 | // pin indices | |
788 | pinIndices(start, length); | |
789 | ||
790 | // find the first occurrence of c | |
791 | const UChar *match = u_memchr32(fArray + start, c, length); | |
792 | if(match == NULL) { | |
793 | return -1; | |
794 | } else { | |
73c04bcf | 795 | return (int32_t)(match - fArray); |
b75a7d8f A |
796 | } |
797 | } | |
798 | ||
799 | int32_t | |
800 | UnicodeString::lastIndexOf(const UChar *srcChars, | |
801 | int32_t srcStart, | |
802 | int32_t srcLength, | |
803 | int32_t start, | |
804 | int32_t length) const | |
805 | { | |
806 | if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { | |
807 | return -1; | |
808 | } | |
809 | ||
810 | // UnicodeString does not find empty substrings | |
811 | if(srcLength < 0 && srcChars[srcStart] == 0) { | |
812 | return -1; | |
813 | } | |
814 | ||
815 | // get the indices within bounds | |
816 | pinIndices(start, length); | |
817 | ||
818 | // find the last occurrence of the substring | |
819 | const UChar *match = u_strFindLast(fArray + start, length, srcChars + srcStart, srcLength); | |
820 | if(match == NULL) { | |
821 | return -1; | |
822 | } else { | |
73c04bcf | 823 | return (int32_t)(match - fArray); |
b75a7d8f A |
824 | } |
825 | } | |
826 | ||
827 | int32_t | |
828 | UnicodeString::doLastIndexOf(UChar c, | |
829 | int32_t start, | |
830 | int32_t length) const | |
831 | { | |
832 | if(isBogus()) { | |
833 | return -1; | |
834 | } | |
835 | ||
836 | // pin indices | |
837 | pinIndices(start, length); | |
838 | ||
839 | // find the last occurrence of c | |
840 | const UChar *match = u_memrchr(fArray + start, c, length); | |
841 | if(match == NULL) { | |
842 | return -1; | |
843 | } else { | |
73c04bcf | 844 | return (int32_t)(match - fArray); |
b75a7d8f A |
845 | } |
846 | } | |
847 | ||
848 | int32_t | |
849 | UnicodeString::doLastIndexOf(UChar32 c, | |
850 | int32_t start, | |
851 | int32_t length) const { | |
852 | // pin indices | |
853 | pinIndices(start, length); | |
854 | ||
855 | // find the last occurrence of c | |
856 | const UChar *match = u_memrchr32(fArray + start, c, length); | |
857 | if(match == NULL) { | |
858 | return -1; | |
859 | } else { | |
73c04bcf | 860 | return (int32_t)(match - fArray); |
b75a7d8f A |
861 | } |
862 | } | |
863 | ||
864 | //======================================== | |
865 | // Write implementation | |
866 | //======================================== | |
867 | ||
868 | UnicodeString& | |
869 | UnicodeString::findAndReplace(int32_t start, | |
870 | int32_t length, | |
871 | const UnicodeString& oldText, | |
872 | int32_t oldStart, | |
873 | int32_t oldLength, | |
874 | const UnicodeString& newText, | |
875 | int32_t newStart, | |
876 | int32_t newLength) | |
877 | { | |
878 | if(isBogus() || oldText.isBogus() || newText.isBogus()) { | |
879 | return *this; | |
880 | } | |
881 | ||
882 | pinIndices(start, length); | |
883 | oldText.pinIndices(oldStart, oldLength); | |
884 | newText.pinIndices(newStart, newLength); | |
885 | ||
886 | if(oldLength == 0) { | |
887 | return *this; | |
888 | } | |
889 | ||
890 | while(length > 0 && length >= oldLength) { | |
891 | int32_t pos = indexOf(oldText, oldStart, oldLength, start, length); | |
892 | if(pos < 0) { | |
893 | // no more oldText's here: done | |
894 | break; | |
895 | } else { | |
896 | // we found oldText, replace it by newText and go beyond it | |
897 | replace(pos, oldLength, newText, newStart, newLength); | |
898 | length -= pos + oldLength - start; | |
899 | start = pos + newLength; | |
900 | } | |
901 | } | |
902 | ||
903 | return *this; | |
904 | } | |
905 | ||
906 | ||
907 | void | |
908 | UnicodeString::setToBogus() | |
909 | { | |
910 | releaseArray(); | |
911 | ||
912 | fArray = 0; | |
913 | fCapacity = fLength = 0; | |
914 | fFlags = kIsBogus; | |
915 | } | |
916 | ||
917 | // turn a bogus string into an empty one | |
918 | void | |
919 | UnicodeString::unBogus() { | |
920 | if(fFlags & kIsBogus) { | |
921 | fArray = fStackBuffer; | |
922 | fLength = 0; | |
923 | fCapacity = US_STACKBUF_SIZE; | |
924 | fFlags = kShortString; | |
925 | } | |
926 | } | |
927 | ||
928 | // setTo() analogous to the readonly-aliasing constructor with the same signature | |
929 | UnicodeString & | |
930 | UnicodeString::setTo(UBool isTerminated, | |
931 | const UChar *text, | |
932 | int32_t textLength) | |
933 | { | |
934 | if(fFlags & kOpenGetBuffer) { | |
935 | // do not modify a string that has an "open" getBuffer(minCapacity) | |
936 | return *this; | |
937 | } | |
938 | ||
939 | if(text == NULL) { | |
940 | // treat as an empty string, do not alias | |
941 | releaseArray(); | |
942 | fLength = 0; | |
943 | fCapacity = US_STACKBUF_SIZE; | |
944 | fArray = fStackBuffer; | |
945 | fFlags = kShortString; | |
946 | return *this; | |
947 | } | |
948 | ||
949 | if( textLength < -1 || | |
950 | (textLength == -1 && !isTerminated) || | |
951 | (textLength >= 0 && isTerminated && text[textLength] != 0) | |
952 | ) { | |
953 | setToBogus(); | |
954 | return *this; | |
955 | } | |
956 | ||
957 | releaseArray(); | |
958 | ||
959 | fArray = (UChar *)text; | |
960 | if(textLength != -1) { | |
961 | fLength = textLength; | |
962 | fCapacity = isTerminated ? fLength + 1 : fLength; | |
963 | } else { | |
964 | // text is terminated, or else it would have failed the above test | |
965 | fLength = u_strlen(text); | |
966 | fCapacity = fLength + 1; | |
967 | } | |
968 | ||
969 | fFlags = kReadonlyAlias; | |
970 | return *this; | |
971 | } | |
972 | ||
973 | // setTo() analogous to the writable-aliasing constructor with the same signature | |
974 | UnicodeString & | |
975 | UnicodeString::setTo(UChar *buffer, | |
976 | int32_t buffLength, | |
977 | int32_t buffCapacity) { | |
978 | if(fFlags & kOpenGetBuffer) { | |
979 | // do not modify a string that has an "open" getBuffer(minCapacity) | |
980 | return *this; | |
981 | } | |
982 | ||
983 | if(buffer == NULL) { | |
984 | // treat as an empty string, do not alias | |
985 | releaseArray(); | |
986 | fLength = 0; | |
987 | fCapacity = US_STACKBUF_SIZE; | |
988 | fArray = fStackBuffer; | |
989 | fFlags = kShortString; | |
990 | return *this; | |
991 | } | |
992 | ||
374ca955 | 993 | if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { |
b75a7d8f A |
994 | setToBogus(); |
995 | return *this; | |
374ca955 A |
996 | } else if(buffLength == -1) { |
997 | // buffLength = u_strlen(buff); but do not look beyond buffCapacity | |
998 | const UChar *p = buffer, *limit = buffer + buffCapacity; | |
999 | while(p != limit && *p != 0) { | |
1000 | ++p; | |
1001 | } | |
1002 | buffLength = (int32_t)(p - buffer); | |
b75a7d8f A |
1003 | } |
1004 | ||
1005 | releaseArray(); | |
1006 | ||
1007 | fArray = buffer; | |
1008 | fLength = buffLength; | |
1009 | fCapacity = buffCapacity; | |
1010 | fFlags = kWritableAlias; | |
1011 | return *this; | |
1012 | } | |
1013 | ||
1014 | UnicodeString& | |
1015 | UnicodeString::setCharAt(int32_t offset, | |
1016 | UChar c) | |
1017 | { | |
1018 | if(cloneArrayIfNeeded() && fLength > 0) { | |
1019 | if(offset < 0) { | |
1020 | offset = 0; | |
1021 | } else if(offset >= fLength) { | |
1022 | offset = fLength - 1; | |
1023 | } | |
1024 | ||
1025 | fArray[offset] = c; | |
1026 | } | |
1027 | return *this; | |
1028 | } | |
1029 | ||
b75a7d8f A |
1030 | UnicodeString& |
1031 | UnicodeString::doReplace( int32_t start, | |
1032 | int32_t length, | |
1033 | const UnicodeString& src, | |
1034 | int32_t srcStart, | |
1035 | int32_t srcLength) | |
1036 | { | |
1037 | if(!src.isBogus()) { | |
1038 | // pin the indices to legal values | |
1039 | src.pinIndices(srcStart, srcLength); | |
1040 | ||
1041 | // get the characters from src | |
1042 | // and replace the range in ourselves with them | |
1043 | return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); | |
1044 | } else { | |
1045 | // remove the range | |
1046 | return doReplace(start, length, 0, 0, 0); | |
1047 | } | |
1048 | } | |
1049 | ||
1050 | UnicodeString& | |
1051 | UnicodeString::doReplace(int32_t start, | |
1052 | int32_t length, | |
1053 | const UChar *srcChars, | |
1054 | int32_t srcStart, | |
1055 | int32_t srcLength) | |
1056 | { | |
1057 | if(isBogus()) { | |
1058 | return *this; | |
1059 | } | |
1060 | ||
1061 | if(srcChars == 0) { | |
1062 | srcStart = srcLength = 0; | |
1063 | } else if(srcLength < 0) { | |
1064 | // get the srcLength if necessary | |
1065 | srcLength = u_strlen(srcChars + srcStart); | |
1066 | } | |
1067 | ||
1068 | int32_t *bufferToDelete = 0; | |
1069 | ||
1070 | // the following may change fArray but will not copy the current contents; | |
1071 | // therefore we need to keep the current fArray | |
1072 | UChar *oldArray = fArray; | |
1073 | int32_t oldLength = fLength; | |
1074 | ||
1075 | // pin the indices to legal values | |
1076 | pinIndices(start, length); | |
1077 | ||
1078 | // calculate the size of the string after the replace | |
1079 | int32_t newSize = oldLength - length + srcLength; | |
1080 | ||
1081 | // clone our array and allocate a bigger array if needed | |
1082 | if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize, | |
1083 | FALSE, &bufferToDelete) | |
1084 | ) { | |
1085 | return *this; | |
1086 | } | |
1087 | ||
1088 | // now do the replace | |
1089 | ||
1090 | if(fArray != oldArray) { | |
1091 | // if fArray changed, then we need to copy everything except what will change | |
1092 | us_arrayCopy(oldArray, 0, fArray, 0, start); | |
1093 | us_arrayCopy(oldArray, start + length, | |
1094 | fArray, start + srcLength, | |
1095 | oldLength - (start + length)); | |
1096 | } else if(length != srcLength) { | |
1097 | // fArray did not change; copy only the portion that isn't changing, leaving a hole | |
1098 | us_arrayCopy(oldArray, start + length, | |
1099 | fArray, start + srcLength, | |
1100 | oldLength - (start + length)); | |
1101 | } | |
1102 | ||
1103 | // now fill in the hole with the new string | |
1104 | us_arrayCopy(srcChars, srcStart, getArrayStart(), start, srcLength); | |
1105 | ||
1106 | fLength = newSize; | |
1107 | ||
1108 | // delayed delete in case srcChars == fArray when we started, and | |
1109 | // to keep oldArray alive for the above operations | |
1110 | if (bufferToDelete) { | |
1111 | uprv_free(bufferToDelete); | |
1112 | } | |
1113 | ||
1114 | return *this; | |
1115 | } | |
1116 | ||
1117 | /** | |
1118 | * Replaceable API | |
1119 | */ | |
1120 | void | |
1121 | UnicodeString::handleReplaceBetween(int32_t start, | |
1122 | int32_t limit, | |
1123 | const UnicodeString& text) { | |
1124 | replaceBetween(start, limit, text); | |
1125 | } | |
1126 | ||
1127 | /** | |
1128 | * Replaceable API | |
1129 | */ | |
1130 | void | |
1131 | UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { | |
1132 | if (limit <= start) { | |
1133 | return; // Nothing to do; avoid bogus malloc call | |
1134 | } | |
1135 | UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); | |
1136 | extractBetween(start, limit, text, 0); | |
1137 | insert(dest, text, 0, limit - start); | |
1138 | uprv_free(text); | |
1139 | } | |
1140 | ||
1141 | /** | |
1142 | * Replaceable API | |
1143 | * | |
1144 | * NOTE: This is for the Replaceable class. There is no rep.cpp, | |
1145 | * so we implement this function here. | |
1146 | */ | |
1147 | UBool Replaceable::hasMetaData() const { | |
1148 | return TRUE; | |
1149 | } | |
1150 | ||
1151 | /** | |
1152 | * Replaceable API | |
1153 | */ | |
1154 | UBool UnicodeString::hasMetaData() const { | |
1155 | return FALSE; | |
1156 | } | |
1157 | ||
1158 | UnicodeString& | |
1159 | UnicodeString::doReverse(int32_t start, | |
1160 | int32_t length) | |
1161 | { | |
1162 | if(fLength <= 1 || !cloneArrayIfNeeded()) { | |
1163 | return *this; | |
1164 | } | |
1165 | ||
1166 | // pin the indices to legal values | |
1167 | pinIndices(start, length); | |
1168 | ||
1169 | UChar *left = getArrayStart() + start; | |
1170 | UChar *right = getArrayStart() + start + length; | |
1171 | UChar swap; | |
1172 | UBool hasSupplementary = FALSE; | |
1173 | ||
1174 | while(left < --right) { | |
1175 | hasSupplementary |= (UBool)UTF_IS_LEAD(swap = *left); | |
1176 | hasSupplementary |= (UBool)UTF_IS_LEAD(*left++ = *right); | |
1177 | *right = swap; | |
1178 | } | |
1179 | ||
1180 | /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ | |
1181 | if(hasSupplementary) { | |
1182 | UChar swap2; | |
1183 | ||
1184 | left = getArrayStart() + start; | |
1185 | right = getArrayStart() + start + length - 1; // -1 so that we can look at *(left+1) if left<right | |
1186 | while(left < right) { | |
1187 | if(UTF_IS_TRAIL(swap = *left) && UTF_IS_LEAD(swap2 = *(left + 1))) { | |
1188 | *left++ = swap2; | |
1189 | *left++ = swap; | |
1190 | } else { | |
1191 | ++left; | |
1192 | } | |
1193 | } | |
1194 | } | |
1195 | ||
1196 | return *this; | |
1197 | } | |
1198 | ||
1199 | UBool | |
1200 | UnicodeString::padLeading(int32_t targetLength, | |
1201 | UChar padChar) | |
1202 | { | |
1203 | if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { | |
1204 | return FALSE; | |
1205 | } else { | |
1206 | // move contents up by padding width | |
1207 | int32_t start = targetLength - fLength; | |
1208 | us_arrayCopy(fArray, 0, fArray, start, fLength); | |
1209 | ||
1210 | // fill in padding character | |
1211 | while(--start >= 0) { | |
1212 | fArray[start] = padChar; | |
1213 | } | |
1214 | fLength = targetLength; | |
1215 | return TRUE; | |
1216 | } | |
1217 | } | |
1218 | ||
1219 | UBool | |
1220 | UnicodeString::padTrailing(int32_t targetLength, | |
1221 | UChar padChar) | |
1222 | { | |
1223 | if(fLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { | |
1224 | return FALSE; | |
1225 | } else { | |
1226 | // fill in padding character | |
1227 | int32_t length = targetLength; | |
1228 | while(--length >= fLength) { | |
1229 | fArray[length] = padChar; | |
1230 | } | |
1231 | fLength = targetLength; | |
1232 | return TRUE; | |
1233 | } | |
1234 | } | |
1235 | ||
b75a7d8f A |
1236 | //======================================== |
1237 | // Hashing | |
1238 | //======================================== | |
1239 | int32_t | |
1240 | UnicodeString::doHashCode() const | |
1241 | { | |
1242 | /* Delegate hash computation to uhash. This makes UnicodeString | |
1243 | * hashing consistent with UChar* hashing. */ | |
1244 | int32_t hashCode = uhash_hashUCharsN(getArrayStart(), fLength); | |
1245 | if (hashCode == kInvalidHashCode) { | |
1246 | hashCode = kEmptyHashCode; | |
1247 | } | |
1248 | return hashCode; | |
1249 | } | |
1250 | ||
b75a7d8f A |
1251 | //======================================== |
1252 | // External Buffer | |
1253 | //======================================== | |
1254 | ||
1255 | UChar * | |
1256 | UnicodeString::getBuffer(int32_t minCapacity) { | |
1257 | if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { | |
1258 | fFlags|=kOpenGetBuffer; | |
1259 | fLength=0; | |
1260 | return fArray; | |
1261 | } else { | |
1262 | return 0; | |
1263 | } | |
1264 | } | |
1265 | ||
1266 | void | |
1267 | UnicodeString::releaseBuffer(int32_t newLength) { | |
1268 | if(fFlags&kOpenGetBuffer && newLength>=-1) { | |
1269 | // set the new fLength | |
1270 | if(newLength==-1) { | |
1271 | // the new length is the string length, capped by fCapacity | |
1272 | const UChar *p=fArray, *limit=fArray+fCapacity; | |
1273 | while(p<limit && *p!=0) { | |
1274 | ++p; | |
1275 | } | |
1276 | fLength=(int32_t)(p-fArray); | |
1277 | } else if(newLength<=fCapacity) { | |
1278 | fLength=newLength; | |
1279 | } else { | |
1280 | fLength=fCapacity; | |
1281 | } | |
1282 | fFlags&=~kOpenGetBuffer; | |
1283 | } | |
1284 | } | |
1285 | ||
1286 | //======================================== | |
1287 | // Miscellaneous | |
1288 | //======================================== | |
1289 | UBool | |
1290 | UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, | |
1291 | int32_t growCapacity, | |
1292 | UBool doCopyArray, | |
1293 | int32_t **pBufferToDelete, | |
1294 | UBool forceClone) { | |
1295 | // default parameters need to be static, therefore | |
1296 | // the defaults are -1 to have convenience defaults | |
1297 | if(newCapacity == -1) { | |
1298 | newCapacity = fCapacity; | |
1299 | } | |
1300 | ||
1301 | // while a getBuffer(minCapacity) is "open", | |
1302 | // prevent any modifications of the string by returning FALSE here | |
1303 | // if the string is bogus, then only an assignment or similar can revive it | |
1304 | if((fFlags&(kOpenGetBuffer|kIsBogus))!=0) { | |
1305 | return FALSE; | |
1306 | } | |
1307 | ||
1308 | /* | |
1309 | * We need to make a copy of the array if | |
1310 | * the buffer is read-only, or | |
1311 | * the buffer is refCounted (shared), and refCount>1, or | |
1312 | * the buffer is too small. | |
1313 | * Return FALSE if memory could not be allocated. | |
1314 | */ | |
1315 | if(forceClone || | |
1316 | fFlags & kBufferIsReadonly || | |
1317 | fFlags & kRefCounted && refCount() > 1 || | |
1318 | newCapacity > fCapacity | |
1319 | ) { | |
1320 | // save old values | |
1321 | UChar *array = fArray; | |
1322 | uint16_t flags = fFlags; | |
1323 | ||
1324 | // check growCapacity for default value and use of the stack buffer | |
1325 | if(growCapacity == -1) { | |
1326 | growCapacity = newCapacity; | |
1327 | } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { | |
1328 | growCapacity = US_STACKBUF_SIZE; | |
1329 | } | |
1330 | ||
1331 | // allocate a new array | |
1332 | if(allocate(growCapacity) || | |
1333 | newCapacity < growCapacity && allocate(newCapacity) | |
1334 | ) { | |
1335 | if(doCopyArray) { | |
1336 | // copy the contents | |
1337 | // do not copy more than what fits - it may be smaller than before | |
1338 | if(fCapacity < fLength) { | |
1339 | fLength = fCapacity; | |
1340 | } | |
1341 | us_arrayCopy(array, 0, fArray, 0, fLength); | |
1342 | } else { | |
1343 | fLength = 0; | |
1344 | } | |
1345 | ||
1346 | // release the old array | |
1347 | if(flags & kRefCounted) { | |
1348 | // the array is refCounted; decrement and release if 0 | |
1349 | int32_t *pRefCount = ((int32_t *)array - 1); | |
1350 | if(umtx_atomic_dec(pRefCount) == 0) { | |
1351 | if(pBufferToDelete == 0) { | |
1352 | uprv_free(pRefCount); | |
1353 | } else { | |
1354 | // the caller requested to delete it himself | |
1355 | *pBufferToDelete = pRefCount; | |
1356 | } | |
1357 | } | |
1358 | } | |
1359 | } else { | |
1360 | // not enough memory for growCapacity and not even for the smaller newCapacity | |
1361 | // reset the old values for setToBogus() to release the array | |
1362 | fArray = array; | |
1363 | fFlags = flags; | |
1364 | setToBogus(); | |
1365 | return FALSE; | |
1366 | } | |
1367 | } | |
1368 | return TRUE; | |
1369 | } | |
1370 | U_NAMESPACE_END | |
73c04bcf A |
1371 | |
1372 | #ifdef U_STATIC_IMPLEMENTATION | |
1373 | /* | |
1374 | This should never be called. It is defined here to make sure that the | |
1375 | virtual vector deleting destructor is defined within unistr.cpp. | |
1376 | The vector deleting destructor is already a part of UObject, | |
1377 | but defining it here makes sure that it is included with this object file. | |
1378 | This makes sure that static library dependencies are kept to a minimum. | |
1379 | */ | |
1380 | static void uprv_UnicodeStringDummy(void) { | |
1381 | U_NAMESPACE_USE | |
1382 | delete [] (new UnicodeString[2]); | |
1383 | } | |
1384 | #endif | |
1385 |