icuSources/common/unistr.cpp

   1 /*
   2 ******************************************************************************
   3 * Copyright (C) 1999-2010, International Business Machines Corporation and   *
   4 * others. All Rights Reserved.                                               *
   5 ******************************************************************************
   6 *
   7 * File unistr.cpp
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   09/25/98    stephen     Creation.
  13 *   04/20/99    stephen     Overhauled per 4/16 code review.
  14 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
  15 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
  16 *                           Replaceable.
  17 *   06/25/01    grhoten     Removed the dependency on iostream
  18 ******************************************************************************
  19 */
  20
  21 #include "unicode/utypes.h"
  22 #include "unicode/putil.h"
  23 #include "cstring.h"
  24 #include "cmemory.h"
  25 #include "unicode/ustring.h"
  26 #include "unicode/unistr.h"
  27 #include "uhash.h"
  28 #include "ustr_imp.h"
  29 #include "umutex.h"
  30
  31 #if 0
  32
  33 #if U_IOSTREAM_SOURCE >= 199711
  34 #include <iostream>
  35 using namespace std;
  36 #elif U_IOSTREAM_SOURCE >= 198506
  37 #include <iostream.h>
  38 #endif
  39
  40 //DEBUGGING
  41 void
  42 print(const UnicodeString& s,
  43       const char *name)
  44 {
  45   UChar c;
  46   cout << name << ":|";
  47   for(int i = 0; i < s.length(); ++i) {
  48     c = s[i];
  49     if(c>= 0x007E || c < 0x0020)
  50       cout << "[0x" << hex << s[i] << "]";
  51     else
  52       cout << (char) s[i];
  53   }
  54   cout << '|' << endl;
  55 }
  56
  57 void
  58 print(const UChar *s,
  59       int32_t len,
  60       const char *name)
  61 {
  62   UChar c;
  63   cout << name << ":|";
  64   for(int i = 0; i < len; ++i) {
  65     c = s[i];
  66     if(c>= 0x007E || c < 0x0020)
  67       cout << "[0x" << hex << s[i] << "]";
  68     else
  69       cout << (char) s[i];
  70   }
  71   cout << '|' << endl;
  72 }
  73 // END DEBUGGING
  74 #endif
  75
  76 // Local function definitions for now
  77
  78 // need to copy areas that may overlap
  79 static
  80 inline void
  81 us_arrayCopy(const UChar *src, int32_t srcStart,
  82          UChar *dst, int32_t dstStart, int32_t count)
  83 {
  84   if(count>0) {
  85     uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
  86   }
  87 }
  88
  89 // u_unescapeAt() callback to get a UChar from a UnicodeString
  90 U_CDECL_BEGIN
  91 static UChar U_CALLCONV
  92 UnicodeString_charAt(int32_t offset, void *context) {
  93     return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
  94 }
  95 U_CDECL_END
  96
  97 U_NAMESPACE_BEGIN
  98
  99 /* The Replaceable virtual destructor can't be defined in the header
 100    due to how AIX works with multiple definitions of virtual functions.
 101 */
 102 Replaceable::~Replaceable() {}
 103 Replaceable::Replaceable() {}
 104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
 105
 106 UnicodeString U_EXPORT2
 107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
 108     return
 109         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
 110             append(s1).
 111                 append(s2);
 112 }
 113
 114 //========================================
 115 // Reference Counting functions, put at top of file so that optimizing compilers
 116 //                               have a chance to automatically inline.
 117 //========================================
 118
 119 void
 120 UnicodeString::addRef()
 121 {  umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
 122
 123 int32_t
 124 UnicodeString::removeRef()
 125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
 126
 127 int32_t
 128 UnicodeString::refCount() const
 129 {
 130     umtx_lock(NULL);
 131     // Note: without the lock to force a memory barrier, we might see a very
 132     //       stale value on some multi-processor systems.
 133     int32_t  count = *((int32_t *)fUnion.fFields.fArray - 1);
 134     umtx_unlock(NULL);
 135     return count;
 136  }
 137
 138 void
 139 UnicodeString::releaseArray() {
 140   if((fFlags & kRefCounted) && removeRef() == 0) {
 141     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
 142   }
 143 }
 144
 145
 146
 147 //========================================
 148 // Constructors
 149 //========================================
 150 UnicodeString::UnicodeString()
 151   : fShortLength(0),
 152     fFlags(kShortString)
 153 {}
 154
 155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
 156   : fShortLength(0),
 157     fFlags(0)
 158 {
 159   if(count <= 0 || (uint32_t)c > 0x10ffff) {
 160     // just allocate and do not do anything else
 161     allocate(capacity);
 162   } else {
 163     // count > 0, allocate and fill the new string with count c's
 164     int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
 165     if(capacity < length) {
 166       capacity = length;
 167     }
 168     if(allocate(capacity)) {
 169       UChar *array = getArrayStart();
 170       int32_t i = 0;
 171
 172       // fill the new string with c
 173       if(unitCount == 1) {
 174         // fill with length UChars
 175         while(i < length) {
 176           array[i++] = (UChar)c;
 177         }
 178       } else {
 179         // get the code units for c
 180         UChar units[UTF_MAX_CHAR_LENGTH];
 181         UTF_APPEND_CHAR_UNSAFE(units, i, c);
 182
 183         // now it must be i==unitCount
 184         i = 0;
 185
 186         // for Unicode, unitCount can only be 1, 2, 3, or 4
 187         // 1 is handled above
 188         while(i < length) {
 189           int32_t unitIdx = 0;
 190           while(unitIdx < unitCount) {
 191             array[i++]=units[unitIdx++];
 192           }
 193         }
 194       }
 195     }
 196     setLength(length);
 197   }
 198 }
 199
 200 UnicodeString::UnicodeString(UChar ch)
 201   : fShortLength(1),
 202     fFlags(kShortString)
 203 {
 204   fUnion.fStackBuffer[0] = ch;
 205 }
 206
 207 UnicodeString::UnicodeString(UChar32 ch)
 208   : fShortLength(0),
 209     fFlags(kShortString)
 210 {
 211   int32_t i = 0;
 212   UBool isError = FALSE;
 213   U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
 214   fShortLength = (int8_t)i;
 215 }
 216
 217 UnicodeString::UnicodeString(const UChar *text)
 218   : fShortLength(0),
 219     fFlags(kShortString)
 220 {
 221   doReplace(0, 0, text, 0, -1);
 222 }
 223
 224 UnicodeString::UnicodeString(const UChar *text,
 225                              int32_t textLength)
 226   : fShortLength(0),
 227     fFlags(kShortString)
 228 {
 229   doReplace(0, 0, text, 0, textLength);
 230 }
 231
 232 UnicodeString::UnicodeString(UBool isTerminated,
 233                              const UChar *text,
 234                              int32_t textLength)
 235   : fShortLength(0),
 236     fFlags(kReadonlyAlias)
 237 {
 238   if(text == NULL) {
 239     // treat as an empty string, do not alias
 240     setToEmpty();
 241   } else if(textLength < -1 ||
 242             (textLength == -1 && !isTerminated) ||
 243             (textLength >= 0 && isTerminated && text[textLength] != 0)
 244   ) {
 245     setToBogus();
 246   } else {
 247     if(textLength == -1) {
 248       // text is terminated, or else it would have failed the above test
 249       textLength = u_strlen(text);
 250     }
 251     setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
 252   }
 253 }
 254
 255 UnicodeString::UnicodeString(UChar *buff,
 256                              int32_t buffLength,
 257                              int32_t buffCapacity)
 258   : fShortLength(0),
 259     fFlags(kWritableAlias)
 260 {
 261   if(buff == NULL) {
 262     // treat as an empty string, do not alias
 263     setToEmpty();
 264   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
 265     setToBogus();
 266   } else {
 267     if(buffLength == -1) {
 268       // fLength = u_strlen(buff); but do not look beyond buffCapacity
 269       const UChar *p = buff, *limit = buff + buffCapacity;
 270       while(p != limit && *p != 0) {
 271         ++p;
 272       }
 273       buffLength = (int32_t)(p - buff);
 274     }
 275     setArray(buff, buffLength, buffCapacity);
 276   }
 277 }
 278
 279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
 280   : fShortLength(0),
 281     fFlags(kShortString)
 282 {
 283   if(src==NULL) {
 284     // treat as an empty string
 285   } else {
 286     if(length<0) {
 287       length=(int32_t)uprv_strlen(src);
 288     }
 289     if(cloneArrayIfNeeded(length, length, FALSE)) {
 290       u_charsToUChars(src, getArrayStart(), length);
 291       setLength(length);
 292     } else {
 293       setToBogus();
 294     }
 295   }
 296 }
 297
 298 #if U_CHARSET_IS_UTF8
 299
 300 UnicodeString::UnicodeString(const char *codepageData)
 301   : fShortLength(0),
 302     fFlags(kShortString) {
 303   if(codepageData != 0) {
 304     setToUTF8(codepageData);
 305   }
 306 }
 307
 308 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
 309   : fShortLength(0),
 310     fFlags(kShortString) {
 311   // if there's nothing to convert, do nothing
 312   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
 313     return;
 314   }
 315   if(dataLength == -1) {
 316     dataLength = (int32_t)uprv_strlen(codepageData);
 317   }
 318   setToUTF8(StringPiece(codepageData, dataLength));
 319 }
 320
 321 // else see unistr_cnv.cpp
 322 #endif
 323
 324 UnicodeString::UnicodeString(const UnicodeString& that)
 325   : Replaceable(),
 326     fShortLength(0),
 327     fFlags(kShortString)
 328 {
 329   copyFrom(that);
 330 }
 331
 332 UnicodeString::UnicodeString(const UnicodeString& that,
 333                              int32_t srcStart)
 334   : Replaceable(),
 335     fShortLength(0),
 336     fFlags(kShortString)
 337 {
 338   setTo(that, srcStart);
 339 }
 340
 341 UnicodeString::UnicodeString(const UnicodeString& that,
 342                              int32_t srcStart,
 343                              int32_t srcLength)
 344   : Replaceable(),
 345     fShortLength(0),
 346     fFlags(kShortString)
 347 {
 348   setTo(that, srcStart, srcLength);
 349 }
 350
 351 // Replaceable base class clone() default implementation, does not clone
 352 Replaceable *
 353 Replaceable::clone() const {
 354   return NULL;
 355 }
 356
 357 // UnicodeString overrides clone() with a real implementation
 358 Replaceable *
 359 UnicodeString::clone() const {
 360   return new UnicodeString(*this);
 361 }
 362
 363 //========================================
 364 // array allocation
 365 //========================================
 366
 367 UBool
 368 UnicodeString::allocate(int32_t capacity) {
 369   if(capacity <= US_STACKBUF_SIZE) {
 370     fFlags = kShortString;
 371   } else {
 372     // count bytes for the refCounter and the string capacity, and
 373     // round up to a multiple of 16; then divide by 4 and allocate int32_t's
 374     // to be safely aligned for the refCount
 375     // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
 376     int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
 377     int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
 378     if(array != 0) {
 379       // set initial refCount and point behind the refCount
 380       *array++ = 1;
 381
 382       // have fArray point to the first UChar
 383       fUnion.fFields.fArray = (UChar *)array;
 384       fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
 385       fFlags = kLongString;
 386     } else {
 387       fShortLength = 0;
 388       fUnion.fFields.fArray = 0;
 389       fUnion.fFields.fCapacity = 0;
 390       fFlags = kIsBogus;
 391       return FALSE;
 392     }
 393   }
 394   return TRUE;
 395 }
 396
 397 //========================================
 398 // Destructor
 399 //========================================
 400 UnicodeString::~UnicodeString()
 401 {
 402   releaseArray();
 403 }
 404
 405 //========================================
 406 // Factory methods
 407 //========================================
 408
 409 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
 410   UnicodeString result;
 411   result.setToUTF8(utf8);
 412   return result;
 413 }
 414
 415 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
 416   UnicodeString result;
 417   int32_t capacity;
 418   // Most UTF-32 strings will be BMP-only and result in a same-length
 419   // UTF-16 string. We overestimate the capacity just slightly,
 420   // just in case there are a few supplementary characters.
 421   if(length <= US_STACKBUF_SIZE) {
 422     capacity = US_STACKBUF_SIZE;
 423   } else {
 424     capacity = length + (length >> 4) + 4;
 425   }
 426   do {
 427     UChar *utf16 = result.getBuffer(capacity);
 428     int32_t length16;
 429     UErrorCode errorCode = U_ZERO_ERROR;
 430     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
 431         utf32, length,
 432         0xfffd,  // Substitution character.
 433         NULL,    // Don't care about number of substitutions.
 434         &errorCode);
 435     result.releaseBuffer(length16);
 436     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
 437       capacity = length16 + 1;  // +1 for the terminating NUL.
 438       continue;
 439     } else if(U_FAILURE(errorCode)) {
 440       result.setToBogus();
 441     }
 442     break;
 443   } while(TRUE);
 444   return result;
 445 }
 446
 447 //========================================
 448 // Assignment
 449 //========================================
 450
 451 UnicodeString &
 452 UnicodeString::operator=(const UnicodeString &src) {
 453   return copyFrom(src);
 454 }
 455
 456 UnicodeString &
 457 UnicodeString::fastCopyFrom(const UnicodeString &src) {
 458   return copyFrom(src, TRUE);
 459 }
 460
 461 UnicodeString &
 462 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
 463   // if assigning to ourselves, do nothing
 464   if(this == 0 || this == &src) {
 465     return *this;
 466   }
 467
 468   // is the right side bogus?
 469   if(&src == 0 || src.isBogus()) {
 470     setToBogus();
 471     return *this;
 472   }
 473
 474   // delete the current contents
 475   releaseArray();
 476
 477   if(src.isEmpty()) {
 478     // empty string - use the stack buffer
 479     setToEmpty();
 480     return *this;
 481   }
 482
 483   // we always copy the length
 484   int32_t srcLength = src.length();
 485   setLength(srcLength);
 486
 487   // fLength>0 and not an "open" src.getBuffer(minCapacity)
 488   switch(src.fFlags) {
 489   case kShortString:
 490     // short string using the stack buffer, do the same
 491     fFlags = kShortString;
 492     uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
 493     break;
 494   case kLongString:
 495     // src uses a refCounted string buffer, use that buffer with refCount
 496     // src is const, use a cast - we don't really change it
 497     ((UnicodeString &)src).addRef();
 498     // copy all fields, share the reference-counted buffer
 499     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
 500     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
 501     fFlags = src.fFlags;
 502     break;
 503   case kReadonlyAlias:
 504     if(fastCopy) {
 505       // src is a readonly alias, do the same
 506       // -> maintain the readonly alias as such
 507       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
 508       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
 509       fFlags = src.fFlags;
 510       break;
 511     }
 512     // else if(!fastCopy) fall through to case kWritableAlias
 513     // -> allocate a new buffer and copy the contents
 514   case kWritableAlias:
 515     // src is a writable alias; we make a copy of that instead
 516     if(allocate(srcLength)) {
 517       uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
 518       break;
 519     }
 520     // if there is not enough memory, then fall through to setting to bogus
 521   default:
 522     // if src is bogus, set ourselves to bogus
 523     // do not call setToBogus() here because fArray and fFlags are not consistent here
 524     fShortLength = 0;
 525     fUnion.fFields.fArray = 0;
 526     fUnion.fFields.fCapacity = 0;
 527     fFlags = kIsBogus;
 528     break;
 529   }
 530
 531   return *this;
 532 }
 533
 534 //========================================
 535 // Miscellaneous operations
 536 //========================================
 537
 538 UnicodeString UnicodeString::unescape() const {
 539     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
 540     const UChar *array = getBuffer();
 541     int32_t len = length();
 542     int32_t prev = 0;
 543     for (int32_t i=0;;) {
 544         if (i == len) {
 545             result.append(array, prev, len - prev);
 546             break;
 547         }
 548         if (array[i++] == 0x5C /*'\\'*/) {
 549             result.append(array, prev, (i - 1) - prev);
 550             UChar32 c = unescapeAt(i); // advances i
 551             if (c < 0) {
 552                 result.remove(); // return empty string
 553                 break; // invalid escape sequence
 554             }
 555             result.append(c);
 556             prev = i;
 557         }
 558     }
 559     return result;
 560 }
 561
 562 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
 563     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
 564 }
 565
 566 //========================================
 567 // Read-only implementation
 568 //========================================
 569 int8_t
 570 UnicodeString::doCompare( int32_t start,
 571               int32_t length,
 572               const UChar *srcChars,
 573               int32_t srcStart,
 574               int32_t srcLength) const
 575 {
 576   // compare illegal string values
 577   // treat const UChar *srcChars==NULL as an empty string
 578   if(isBogus()) {
 579     return -1;
 580   }
 581
 582   // pin indices to legal values
 583   pinIndices(start, length);
 584
 585   if(srcChars == NULL) {
 586     srcStart = srcLength = 0;
 587   }
 588
 589   // get the correct pointer
 590   const UChar *chars = getArrayStart();
 591
 592   chars += start;
 593   srcChars += srcStart;
 594
 595   int32_t minLength;
 596   int8_t lengthResult;
 597
 598   // get the srcLength if necessary
 599   if(srcLength < 0) {
 600     srcLength = u_strlen(srcChars + srcStart);
 601   }
 602
 603   // are we comparing different lengths?
 604   if(length != srcLength) {
 605     if(length < srcLength) {
 606       minLength = length;
 607       lengthResult = -1;
 608     } else {
 609       minLength = srcLength;
 610       lengthResult = 1;
 611     }
 612   } else {
 613     minLength = length;
 614     lengthResult = 0;
 615   }
 616
 617   /*
 618    * note that uprv_memcmp() returns an int but we return an int8_t;
 619    * we need to take care not to truncate the result -
 620    * one way to do this is to right-shift the value to
 621    * move the sign bit into the lower 8 bits and making sure that this
 622    * does not become 0 itself
 623    */
 624
 625   if(minLength > 0 && chars != srcChars) {
 626     int32_t result;
 627
 628 #   if U_IS_BIG_ENDIAN
 629       // big-endian: byte comparison works
 630       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
 631       if(result != 0) {
 632         return (int8_t)(result >> 15 | 1);
 633       }
 634 #   else
 635       // little-endian: compare UChar units
 636       do {
 637         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
 638         if(result != 0) {
 639           return (int8_t)(result >> 15 | 1);
 640         }
 641       } while(--minLength > 0);
 642 #   endif
 643   }
 644   return lengthResult;
 645 }
 646
 647 /* String compare in code point order - doCompare() compares in code unit order. */
 648 int8_t
 649 UnicodeString::doCompareCodePointOrder(int32_t start,
 650                                        int32_t length,
 651                                        const UChar *srcChars,
 652                                        int32_t srcStart,
 653                                        int32_t srcLength) const
 654 {
 655   // compare illegal string values
 656   // treat const UChar *srcChars==NULL as an empty string
 657   if(isBogus()) {
 658     return -1;
 659   }
 660
 661   // pin indices to legal values
 662   pinIndices(start, length);
 663
 664   if(srcChars == NULL) {
 665     srcStart = srcLength = 0;
 666   }
 667
 668   int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
 669   /* translate the 32-bit result into an 8-bit one */
 670   if(diff!=0) {
 671     return (int8_t)(diff >> 15 | 1);
 672   } else {
 673     return 0;
 674   }
 675 }
 676
 677 int32_t
 678 UnicodeString::getLength() const {
 679     return length();
 680 }
 681
 682 UChar
 683 UnicodeString::getCharAt(int32_t offset) const {
 684   return charAt(offset);
 685 }
 686
 687 UChar32
 688 UnicodeString::getChar32At(int32_t offset) const {
 689   return char32At(offset);
 690 }
 691
 692 int32_t
 693 UnicodeString::countChar32(int32_t start, int32_t length) const {
 694   pinIndices(start, length);
 695   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
 696   return u_countChar32(getArrayStart()+start, length);
 697 }
 698
 699 UBool
 700 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
 701   pinIndices(start, length);
 702   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
 703   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
 704 }
 705
 706 int32_t
 707 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
 708   // pin index
 709   int32_t len = length();
 710   if(index<0) {
 711     index=0;
 712   } else if(index>len) {
 713     index=len;
 714   }
 715
 716   const UChar *array = getArrayStart();
 717   if(delta>0) {
 718     UTF_FWD_N(array, index, len, delta);
 719   } else {
 720     UTF_BACK_N(array, 0, index, -delta);
 721   }
 722
 723   return index;
 724 }
 725
 726 void
 727 UnicodeString::doExtract(int32_t start,
 728              int32_t length,
 729              UChar *dst,
 730              int32_t dstStart) const
 731 {
 732   // pin indices to legal values
 733   pinIndices(start, length);
 734
 735   // do not copy anything if we alias dst itself
 736   const UChar *array = getArrayStart();
 737   if(array + start != dst + dstStart) {
 738     us_arrayCopy(array, start, dst, dstStart, length);
 739   }
 740 }
 741
 742 int32_t
 743 UnicodeString::extract(UChar *dest, int32_t destCapacity,
 744                        UErrorCode &errorCode) const {
 745   int32_t len = length();
 746   if(U_SUCCESS(errorCode)) {
 747     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
 748       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 749     } else {
 750       const UChar *array = getArrayStart();
 751       if(len>0 && len<=destCapacity && array!=dest) {
 752         uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
 753       }
 754       return u_terminateUChars(dest, destCapacity, len, &errorCode);
 755     }
 756   }
 757
 758   return len;
 759 }
 760
 761 int32_t
 762 UnicodeString::extract(int32_t start,
 763                        int32_t length,
 764                        char *target,
 765                        int32_t targetCapacity,
 766                        enum EInvariant) const
 767 {
 768   // if the arguments are illegal, then do nothing
 769   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
 770     return 0;
 771   }
 772
 773   // pin the indices to legal values
 774   pinIndices(start, length);
 775
 776   if(length <= targetCapacity) {
 777     u_UCharsToChars(getArrayStart() + start, target, length);
 778   }
 779   UErrorCode status = U_ZERO_ERROR;
 780   return u_terminateChars(target, targetCapacity, length, &status);
 781 }
 782
 783 UnicodeString
 784 UnicodeString::tempSubString(int32_t start, int32_t len) const {
 785   pinIndices(start, len);
 786   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
 787   if(array==NULL) {
 788     array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
 789     len=-2;  // bogus result string
 790   }
 791   return UnicodeString(FALSE, array + start, len);
 792 }
 793
 794 int32_t
 795 UnicodeString::toUTF8(int32_t start, int32_t len,
 796                       char *target, int32_t capacity) const {
 797   pinIndices(start, len);
 798   int32_t length8;
 799   UErrorCode errorCode = U_ZERO_ERROR;
 800   u_strToUTF8WithSub(target, capacity, &length8,
 801                      getBuffer() + start, len,
 802                      0xFFFD,  // Standard substitution character.
 803                      NULL,    // Don't care about number of substitutions.
 804                      &errorCode);
 805   return length8;
 806 }
 807
 808 #if U_CHARSET_IS_UTF8
 809
 810 int32_t
 811 UnicodeString::extract(int32_t start, int32_t len,
 812                        char *target, uint32_t dstSize) const {
 813   // if the arguments are illegal, then do nothing
 814   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
 815     return 0;
 816   }
 817   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
 818 }
 819
 820 // else see unistr_cnv.cpp
 821 #endif
 822
 823 void
 824 UnicodeString::extractBetween(int32_t start,
 825                   int32_t limit,
 826                   UnicodeString& target) const {
 827   pinIndex(start);
 828   pinIndex(limit);
 829   doExtract(start, limit - start, target);
 830 }
 831
 832 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
 833 // as many bytes as the source has UChars.
 834 // The "worst cases" are writing systems like Indic, Thai and CJK with
 835 // 3:1 bytes:UChars.
 836 void
 837 UnicodeString::toUTF8(ByteSink &sink) const {
 838   int32_t length16 = length();
 839   if(length16 != 0) {
 840     char stackBuffer[1024];
 841     int32_t capacity = (int32_t)sizeof(stackBuffer);
 842     UBool utf8IsOwned = FALSE;
 843     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
 844                                       3*length16,
 845                                       stackBuffer, capacity,
 846                                       &capacity);
 847     int32_t length8 = 0;
 848     UErrorCode errorCode = U_ZERO_ERROR;
 849     u_strToUTF8WithSub(utf8, capacity, &length8,
 850                        getBuffer(), length16,
 851                        0xFFFD,  // Standard substitution character.
 852                        NULL,    // Don't care about number of substitutions.
 853                        &errorCode);
 854     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
 855       utf8 = (char *)uprv_malloc(length8);
 856       if(utf8 != NULL) {
 857         utf8IsOwned = TRUE;
 858         errorCode = U_ZERO_ERROR;
 859         u_strToUTF8WithSub(utf8, length8, &length8,
 860                            getBuffer(), length16,
 861                            0xFFFD,  // Standard substitution character.
 862                            NULL,    // Don't care about number of substitutions.
 863                            &errorCode);
 864       } else {
 865         errorCode = U_MEMORY_ALLOCATION_ERROR;
 866       }
 867     }
 868     if(U_SUCCESS(errorCode)) {
 869       sink.Append(utf8, length8);
 870       sink.Flush();
 871     }
 872     if(utf8IsOwned) {
 873       uprv_free(utf8);
 874     }
 875   }
 876 }
 877
 878 int32_t
 879 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
 880   int32_t length32=0;
 881   if(U_SUCCESS(errorCode)) {
 882     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
 883     u_strToUTF32WithSub(utf32, capacity, &length32,
 884         getBuffer(), length(),
 885         0xfffd,  // Substitution character.
 886         NULL,    // Don't care about number of substitutions.
 887         &errorCode);
 888   }
 889   return length32;
 890 }
 891
 892 int32_t
 893 UnicodeString::indexOf(const UChar *srcChars,
 894                int32_t srcStart,
 895                int32_t srcLength,
 896                int32_t start,
 897                int32_t length) const
 898 {
 899   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
 900     return -1;
 901   }
 902
 903   // UnicodeString does not find empty substrings
 904   if(srcLength < 0 && srcChars[srcStart] == 0) {
 905     return -1;
 906   }
 907
 908   // get the indices within bounds
 909   pinIndices(start, length);
 910
 911   // find the first occurrence of the substring
 912   const UChar *array = getArrayStart();
 913   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
 914   if(match == NULL) {
 915     return -1;
 916   } else {
 917     return (int32_t)(match - array);
 918   }
 919 }
 920
 921 int32_t
 922 UnicodeString::doIndexOf(UChar c,
 923              int32_t start,
 924              int32_t length) const
 925 {
 926   // pin indices
 927   pinIndices(start, length);
 928
 929   // find the first occurrence of c
 930   const UChar *array = getArrayStart();
 931   const UChar *match = u_memchr(array + start, c, length);
 932   if(match == NULL) {
 933     return -1;
 934   } else {
 935     return (int32_t)(match - array);
 936   }
 937 }
 938
 939 int32_t
 940 UnicodeString::doIndexOf(UChar32 c,
 941                          int32_t start,
 942                          int32_t length) const {
 943   // pin indices
 944   pinIndices(start, length);
 945
 946   // find the first occurrence of c
 947   const UChar *array = getArrayStart();
 948   const UChar *match = u_memchr32(array + start, c, length);
 949   if(match == NULL) {
 950     return -1;
 951   } else {
 952     return (int32_t)(match - array);
 953   }
 954 }
 955
 956 int32_t
 957 UnicodeString::lastIndexOf(const UChar *srcChars,
 958                int32_t srcStart,
 959                int32_t srcLength,
 960                int32_t start,
 961                int32_t length) const
 962 {
 963   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
 964     return -1;
 965   }
 966
 967   // UnicodeString does not find empty substrings
 968   if(srcLength < 0 && srcChars[srcStart] == 0) {
 969     return -1;
 970   }
 971
 972   // get the indices within bounds
 973   pinIndices(start, length);
 974
 975   // find the last occurrence of the substring
 976   const UChar *array = getArrayStart();
 977   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
 978   if(match == NULL) {
 979     return -1;
 980   } else {
 981     return (int32_t)(match - array);
 982   }
 983 }
 984
 985 int32_t
 986 UnicodeString::doLastIndexOf(UChar c,
 987                  int32_t start,
 988                  int32_t length) const
 989 {
 990   if(isBogus()) {
 991     return -1;
 992   }
 993
 994   // pin indices
 995   pinIndices(start, length);
 996
 997   // find the last occurrence of c
 998   const UChar *array = getArrayStart();
 999   const UChar *match = u_memrchr(array + start, c, length);
1000   if(match == NULL) {
1001     return -1;
1002   } else {
1003     return (int32_t)(match - array);
1004   }
1005 }
1006
1007 int32_t
1008 UnicodeString::doLastIndexOf(UChar32 c,
1009                              int32_t start,
1010                              int32_t length) const {
1011   // pin indices
1012   pinIndices(start, length);
1013
1014   // find the last occurrence of c
1015   const UChar *array = getArrayStart();
1016   const UChar *match = u_memrchr32(array + start, c, length);
1017   if(match == NULL) {
1018     return -1;
1019   } else {
1020     return (int32_t)(match - array);
1021   }
1022 }
1023
1024 //========================================
1025 // Write implementation
1026 //========================================
1027
1028 UnicodeString&
1029 UnicodeString::findAndReplace(int32_t start,
1030                   int32_t length,
1031                   const UnicodeString& oldText,
1032                   int32_t oldStart,
1033                   int32_t oldLength,
1034                   const UnicodeString& newText,
1035                   int32_t newStart,
1036                   int32_t newLength)
1037 {
1038   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1039     return *this;
1040   }
1041
1042   pinIndices(start, length);
1043   oldText.pinIndices(oldStart, oldLength);
1044   newText.pinIndices(newStart, newLength);
1045
1046   if(oldLength == 0) {
1047     return *this;
1048   }
1049
1050   while(length > 0 && length >= oldLength) {
1051     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1052     if(pos < 0) {
1053       // no more oldText's here: done
1054       break;
1055     } else {
1056       // we found oldText, replace it by newText and go beyond it
1057       replace(pos, oldLength, newText, newStart, newLength);
1058       length -= pos + oldLength - start;
1059       start = pos + newLength;
1060     }
1061   }
1062
1063   return *this;
1064 }
1065
1066
1067 void
1068 UnicodeString::setToBogus()
1069 {
1070   releaseArray();
1071
1072   fShortLength = 0;
1073   fUnion.fFields.fArray = 0;
1074   fUnion.fFields.fCapacity = 0;
1075   fFlags = kIsBogus;
1076 }
1077
1078 // turn a bogus string into an empty one
1079 void
1080 UnicodeString::unBogus() {
1081   if(fFlags & kIsBogus) {
1082     setToEmpty();
1083   }
1084 }
1085
1086 // setTo() analogous to the readonly-aliasing constructor with the same signature
1087 UnicodeString &
1088 UnicodeString::setTo(UBool isTerminated,
1089                      const UChar *text,
1090                      int32_t textLength)
1091 {
1092   if(fFlags & kOpenGetBuffer) {
1093     // do not modify a string that has an "open" getBuffer(minCapacity)
1094     return *this;
1095   }
1096
1097   if(text == NULL) {
1098     // treat as an empty string, do not alias
1099     releaseArray();
1100     setToEmpty();
1101     return *this;
1102   }
1103
1104   if( textLength < -1 ||
1105       (textLength == -1 && !isTerminated) ||
1106       (textLength >= 0 && isTerminated && text[textLength] != 0)
1107   ) {
1108     setToBogus();
1109     return *this;
1110   }
1111
1112   releaseArray();
1113
1114   if(textLength == -1) {
1115     // text is terminated, or else it would have failed the above test
1116     textLength = u_strlen(text);
1117   }
1118   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1119
1120   fFlags = kReadonlyAlias;
1121   return *this;
1122 }
1123
1124 // setTo() analogous to the writable-aliasing constructor with the same signature
1125 UnicodeString &
1126 UnicodeString::setTo(UChar *buffer,
1127                      int32_t buffLength,
1128                      int32_t buffCapacity) {
1129   if(fFlags & kOpenGetBuffer) {
1130     // do not modify a string that has an "open" getBuffer(minCapacity)
1131     return *this;
1132   }
1133
1134   if(buffer == NULL) {
1135     // treat as an empty string, do not alias
1136     releaseArray();
1137     setToEmpty();
1138     return *this;
1139   }
1140
1141   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1142     setToBogus();
1143     return *this;
1144   } else if(buffLength == -1) {
1145     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1146     const UChar *p = buffer, *limit = buffer + buffCapacity;
1147     while(p != limit && *p != 0) {
1148       ++p;
1149     }
1150     buffLength = (int32_t)(p - buffer);
1151   }
1152
1153   releaseArray();
1154
1155   setArray(buffer, buffLength, buffCapacity);
1156   fFlags = kWritableAlias;
1157   return *this;
1158 }
1159
1160 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1161   unBogus();
1162   int32_t length = utf8.length();
1163   int32_t capacity;
1164   // The UTF-16 string will be at most as long as the UTF-8 string.
1165   if(length <= US_STACKBUF_SIZE) {
1166     capacity = US_STACKBUF_SIZE;
1167   } else {
1168     capacity = length + 1;  // +1 for the terminating NUL.
1169   }
1170   UChar *utf16 = getBuffer(capacity);
1171   int32_t length16;
1172   UErrorCode errorCode = U_ZERO_ERROR;
1173   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1174       utf8.data(), length,
1175       0xfffd,  // Substitution character.
1176       NULL,    // Don't care about number of substitutions.
1177       &errorCode);
1178   releaseBuffer(length16);
1179   if(U_FAILURE(errorCode)) {
1180     setToBogus();
1181   }
1182   return *this;
1183 }
1184
1185 UnicodeString&
1186 UnicodeString::setCharAt(int32_t offset,
1187              UChar c)
1188 {
1189   int32_t len = length();
1190   if(cloneArrayIfNeeded() && len > 0) {
1191     if(offset < 0) {
1192       offset = 0;
1193     } else if(offset >= len) {
1194       offset = len - 1;
1195     }
1196
1197     getArrayStart()[offset] = c;
1198   }
1199   return *this;
1200 }
1201
1202 UnicodeString&
1203 UnicodeString::doReplace( int32_t start,
1204               int32_t length,
1205               const UnicodeString& src,
1206               int32_t srcStart,
1207               int32_t srcLength)
1208 {
1209   if(!src.isBogus()) {
1210     // pin the indices to legal values
1211     src.pinIndices(srcStart, srcLength);
1212
1213     // get the characters from src
1214     // and replace the range in ourselves with them
1215     return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1216   } else {
1217     // remove the range
1218     return doReplace(start, length, 0, 0, 0);
1219   }
1220 }
1221
1222 UnicodeString&
1223 UnicodeString::doReplace(int32_t start,
1224              int32_t length,
1225              const UChar *srcChars,
1226              int32_t srcStart,
1227              int32_t srcLength)
1228 {
1229   if(!isWritable()) {
1230     return *this;
1231   }
1232
1233   int32_t oldLength = this->length();
1234
1235   // optimize (read-only alias).remove(0, start) and .remove(start, end)
1236   if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1237     if(start == 0) {
1238       // remove prefix by adjusting the array pointer
1239       pinIndex(length);
1240       fUnion.fFields.fArray += length;
1241       fUnion.fFields.fCapacity -= length;
1242       setLength(oldLength - length);
1243       return *this;
1244     } else {
1245       pinIndex(start);
1246       if(length >= (oldLength - start)) {
1247         // remove suffix by reducing the length (like truncate())
1248         setLength(start);
1249         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1250         return *this;
1251       }
1252     }
1253   }
1254
1255   if(srcChars == 0) {
1256     srcStart = srcLength = 0;
1257   } else if(srcLength < 0) {
1258     // get the srcLength if necessary
1259     srcLength = u_strlen(srcChars + srcStart);
1260   }
1261
1262   // calculate the size of the string after the replace
1263   int32_t newSize;
1264
1265   // optimize append() onto a large-enough, owned string
1266   if(start >= oldLength) {
1267     newSize = oldLength + srcLength;
1268     if(newSize <= getCapacity() && isBufferWritable()) {
1269       us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength);
1270       setLength(newSize);
1271       return *this;
1272     } else {
1273       // pin the indices to legal values
1274       start = oldLength;
1275       length = 0;
1276     }
1277   } else {
1278     // pin the indices to legal values
1279     pinIndices(start, length);
1280
1281     newSize = oldLength - length + srcLength;
1282   }
1283
1284   // the following may change fArray but will not copy the current contents;
1285   // therefore we need to keep the current fArray
1286   UChar oldStackBuffer[US_STACKBUF_SIZE];
1287   UChar *oldArray;
1288   if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) {
1289     // copy the stack buffer contents because it will be overwritten with
1290     // fUnion.fFields values
1291     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1292     oldArray = oldStackBuffer;
1293   } else {
1294     oldArray = getArrayStart();
1295   }
1296
1297   // clone our array and allocate a bigger array if needed
1298   int32_t *bufferToDelete = 0;
1299   if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
1300                          FALSE, &bufferToDelete)
1301   ) {
1302     return *this;
1303   }
1304
1305   // now do the replace
1306
1307   UChar *newArray = getArrayStart();
1308   if(newArray != oldArray) {
1309     // if fArray changed, then we need to copy everything except what will change
1310     us_arrayCopy(oldArray, 0, newArray, 0, start);
1311     us_arrayCopy(oldArray, start + length,
1312                  newArray, start + srcLength,
1313                  oldLength - (start + length));
1314   } else if(length != srcLength) {
1315     // fArray did not change; copy only the portion that isn't changing, leaving a hole
1316     us_arrayCopy(oldArray, start + length,
1317                  newArray, start + srcLength,
1318                  oldLength - (start + length));
1319   }
1320
1321   // now fill in the hole with the new string
1322   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1323
1324   setLength(newSize);
1325
1326   // delayed delete in case srcChars == fArray when we started, and
1327   // to keep oldArray alive for the above operations
1328   if (bufferToDelete) {
1329     uprv_free(bufferToDelete);
1330   }
1331
1332   return *this;
1333 }
1334
1335 /**
1336  * Replaceable API
1337  */
1338 void
1339 UnicodeString::handleReplaceBetween(int32_t start,
1340                                     int32_t limit,
1341                                     const UnicodeString& text) {
1342     replaceBetween(start, limit, text);
1343 }
1344
1345 /**
1346  * Replaceable API
1347  */
1348 void
1349 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1350     if (limit <= start) {
1351         return; // Nothing to do; avoid bogus malloc call
1352     }
1353     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1354     // Check to make sure text is not null.
1355     if (text != NULL) {
1356             extractBetween(start, limit, text, 0);
1357             insert(dest, text, 0, limit - start);
1358             uprv_free(text);
1359     }
1360 }
1361
1362 /**
1363  * Replaceable API
1364  *
1365  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1366  * so we implement this function here.
1367  */
1368 UBool Replaceable::hasMetaData() const {
1369     return TRUE;
1370 }
1371
1372 /**
1373  * Replaceable API
1374  */
1375 UBool UnicodeString::hasMetaData() const {
1376     return FALSE;
1377 }
1378
1379 UnicodeString&
1380 UnicodeString::doReverse(int32_t start, int32_t length) {
1381   if(length <= 1 || !cloneArrayIfNeeded()) {
1382     return *this;
1383   }
1384
1385   // pin the indices to legal values
1386   pinIndices(start, length);
1387   if(length <= 1) {  // pinIndices() might have shrunk the length
1388     return *this;
1389   }
1390
1391   UChar *left = getArrayStart() + start;
1392   UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1393   UChar swap;
1394   UBool hasSupplementary = FALSE;
1395
1396   // Before the loop we know left<right because length>=2.
1397   do {
1398     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1399     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1400     *right-- = swap;
1401   } while(left < right);
1402   // Make sure to test the middle code unit of an odd-length string.
1403   // Redundant if the length is even.
1404   hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1405
1406   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1407   if(hasSupplementary) {
1408     UChar swap2;
1409
1410     left = getArrayStart() + start;
1411     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1412     while(left < right) {
1413       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1414         *left++ = swap2;
1415         *left++ = swap;
1416       } else {
1417         ++left;
1418       }
1419     }
1420   }
1421
1422   return *this;
1423 }
1424
1425 UBool
1426 UnicodeString::padLeading(int32_t targetLength,
1427                           UChar padChar)
1428 {
1429   int32_t oldLength = length();
1430   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1431     return FALSE;
1432   } else {
1433     // move contents up by padding width
1434     UChar *array = getArrayStart();
1435     int32_t start = targetLength - oldLength;
1436     us_arrayCopy(array, 0, array, start, oldLength);
1437
1438     // fill in padding character
1439     while(--start >= 0) {
1440       array[start] = padChar;
1441     }
1442     setLength(targetLength);
1443     return TRUE;
1444   }
1445 }
1446
1447 UBool
1448 UnicodeString::padTrailing(int32_t targetLength,
1449                            UChar padChar)
1450 {
1451   int32_t oldLength = length();
1452   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1453     return FALSE;
1454   } else {
1455     // fill in padding character
1456     UChar *array = getArrayStart();
1457     int32_t length = targetLength;
1458     while(--length >= oldLength) {
1459       array[length] = padChar;
1460     }
1461     setLength(targetLength);
1462     return TRUE;
1463   }
1464 }
1465
1466 //========================================
1467 // Hashing
1468 //========================================
1469 int32_t
1470 UnicodeString::doHashCode() const
1471 {
1472     /* Delegate hash computation to uhash.  This makes UnicodeString
1473      * hashing consistent with UChar* hashing.  */
1474     int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
1475     if (hashCode == kInvalidHashCode) {
1476         hashCode = kEmptyHashCode;
1477     }
1478     return hashCode;
1479 }
1480
1481 //========================================
1482 // External Buffer
1483 //========================================
1484
1485 UChar *
1486 UnicodeString::getBuffer(int32_t minCapacity) {
1487   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1488     fFlags|=kOpenGetBuffer;
1489     fShortLength=0;
1490     return getArrayStart();
1491   } else {
1492     return 0;
1493   }
1494 }
1495
1496 void
1497 UnicodeString::releaseBuffer(int32_t newLength) {
1498   if(fFlags&kOpenGetBuffer && newLength>=-1) {
1499     // set the new fLength
1500     int32_t capacity=getCapacity();
1501     if(newLength==-1) {
1502       // the new length is the string length, capped by fCapacity
1503       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1504       while(p<limit && *p!=0) {
1505         ++p;
1506       }
1507       newLength=(int32_t)(p-array);
1508     } else if(newLength>capacity) {
1509       newLength=capacity;
1510     }
1511     setLength(newLength);
1512     fFlags&=~kOpenGetBuffer;
1513   }
1514 }
1515
1516 //========================================
1517 // Miscellaneous
1518 //========================================
1519 UBool
1520 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1521                                   int32_t growCapacity,
1522                                   UBool doCopyArray,
1523                                   int32_t **pBufferToDelete,
1524                                   UBool forceClone) {
1525   // default parameters need to be static, therefore
1526   // the defaults are -1 to have convenience defaults
1527   if(newCapacity == -1) {
1528     newCapacity = getCapacity();
1529   }
1530
1531   // while a getBuffer(minCapacity) is "open",
1532   // prevent any modifications of the string by returning FALSE here
1533   // if the string is bogus, then only an assignment or similar can revive it
1534   if(!isWritable()) {
1535     return FALSE;
1536   }
1537
1538   /*
1539    * We need to make a copy of the array if
1540    * the buffer is read-only, or
1541    * the buffer is refCounted (shared), and refCount>1, or
1542    * the buffer is too small.
1543    * Return FALSE if memory could not be allocated.
1544    */
1545   if(forceClone ||
1546      fFlags & kBufferIsReadonly ||
1547      (fFlags & kRefCounted && refCount() > 1) ||
1548      newCapacity > getCapacity()
1549   ) {
1550     // check growCapacity for default value and use of the stack buffer
1551     if(growCapacity == -1) {
1552       growCapacity = newCapacity;
1553     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1554       growCapacity = US_STACKBUF_SIZE;
1555     }
1556
1557     // save old values
1558     UChar oldStackBuffer[US_STACKBUF_SIZE];
1559     UChar *oldArray;
1560     uint8_t flags = fFlags;
1561
1562     if(flags&kUsingStackBuffer) {
1563       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1564         // copy the stack buffer contents because it will be overwritten with
1565         // fUnion.fFields values
1566         us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1567         oldArray = oldStackBuffer;
1568       } else {
1569         oldArray = 0; // no need to copy from stack buffer to itself
1570       }
1571     } else {
1572       oldArray = fUnion.fFields.fArray;
1573     }
1574
1575     // allocate a new array
1576     if(allocate(growCapacity) ||
1577        (newCapacity < growCapacity && allocate(newCapacity))
1578     ) {
1579       if(doCopyArray && oldArray != 0) {
1580         // copy the contents
1581         // do not copy more than what fits - it may be smaller than before
1582         int32_t minLength = length();
1583         newCapacity = getCapacity();
1584         if(newCapacity < minLength) {
1585           minLength = newCapacity;
1586           setLength(minLength);
1587         }
1588         us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1589       } else {
1590         fShortLength = 0;
1591       }
1592
1593       // release the old array
1594       if(flags & kRefCounted) {
1595         // the array is refCounted; decrement and release if 0
1596         int32_t *pRefCount = ((int32_t *)oldArray - 1);
1597         if(umtx_atomic_dec(pRefCount) == 0) {
1598           if(pBufferToDelete == 0) {
1599             uprv_free(pRefCount);
1600           } else {
1601             // the caller requested to delete it himself
1602             *pBufferToDelete = pRefCount;
1603           }
1604         }
1605       }
1606     } else {
1607       // not enough memory for growCapacity and not even for the smaller newCapacity
1608       // reset the old values for setToBogus() to release the array
1609       if(!(flags&kUsingStackBuffer)) {
1610         fUnion.fFields.fArray = oldArray;
1611       }
1612       fFlags = flags;
1613       setToBogus();
1614       return FALSE;
1615     }
1616   }
1617   return TRUE;
1618 }
1619 U_NAMESPACE_END
1620
1621 #ifdef U_STATIC_IMPLEMENTATION
1622 /*
1623 This should never be called. It is defined here to make sure that the
1624 virtual vector deleting destructor is defined within unistr.cpp.
1625 The vector deleting destructor is already a part of UObject,
1626 but defining it here makes sure that it is included with this object file.
1627 This makes sure that static library dependencies are kept to a minimum.
1628 */
1629 static void uprv_UnicodeStringDummy(void) {
1630     U_NAMESPACE_USE
1631     delete [] (new UnicodeString[2]);
1632 }
1633 #endif