icuSources/test/intltest/ustrtest.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /********************************************************************
   4  * COPYRIGHT:
   5  * Copyright (c) 1997-2016, International Business Machines Corporation and
   6  * others. All Rights Reserved.
   7  ********************************************************************/
   8
   9 #include "ustrtest.h"
  10 #include "unicode/appendable.h"
  11 #include "unicode/std_string.h"
  12 #include "unicode/unistr.h"
  13 #include "unicode/uchar.h"
  14 #include "unicode/ustring.h"
  15 #include "unicode/locid.h"
  16 #include "unicode/strenum.h"
  17 #include "unicode/ucnv.h"
  18 #include "unicode/uenum.h"
  19 #include "unicode/utf16.h"
  20 #include "cmemory.h"
  21 #include "charstr.h"
  22
  23 #if 0
  24 #include "unicode/ustream.h"
  25
  26 #include <iostream>
  27 using namespace std;
  28
  29 #endif
  30
  31 UnicodeStringTest::~UnicodeStringTest() {}
  32
  33 extern IntlTest *createStringCaseTest();
  34
  35 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
  36 {
  37     if (exec) logln("TestSuite UnicodeStringTest: ");
  38     TESTCASE_AUTO_BEGIN;
  39     TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
  40     TESTCASE_AUTO(TestBasicManipulation);
  41     TESTCASE_AUTO(TestCompare);
  42     TESTCASE_AUTO(TestExtract);
  43     TESTCASE_AUTO(TestRemoveReplace);
  44     TESTCASE_AUTO(TestSearching);
  45     TESTCASE_AUTO(TestSpacePadding);
  46     TESTCASE_AUTO(TestPrefixAndSuffix);
  47     TESTCASE_AUTO(TestFindAndReplace);
  48     TESTCASE_AUTO(TestBogus);
  49     TESTCASE_AUTO(TestReverse);
  50     TESTCASE_AUTO(TestMiscellaneous);
  51     TESTCASE_AUTO(TestStackAllocation);
  52     TESTCASE_AUTO(TestUnescape);
  53     TESTCASE_AUTO(TestCountChar32);
  54     TESTCASE_AUTO(TestStringEnumeration);
  55     TESTCASE_AUTO(TestNameSpace);
  56     TESTCASE_AUTO(TestUTF32);
  57     TESTCASE_AUTO(TestUTF8);
  58     TESTCASE_AUTO(TestReadOnlyAlias);
  59     TESTCASE_AUTO(TestAppendable);
  60     TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
  61     TESTCASE_AUTO(TestSizeofUnicodeString);
  62     TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
  63     TESTCASE_AUTO(TestMoveSwap);
  64     TESTCASE_AUTO(TestUInt16Pointers);
  65     TESTCASE_AUTO(TestWCharPointers);
  66     TESTCASE_AUTO(TestNullPointers);
  67     TESTCASE_AUTO_END;
  68 }
  69
  70 void
  71 UnicodeStringTest::TestBasicManipulation()
  72 {
  73     UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
  74     UnicodeString   expectedValue;
  75     UnicodeString   *c;
  76
  77     c=(UnicodeString *)test1.clone();
  78     test1.insert(24, "good ");
  79     expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
  80     if (test1 != expectedValue)
  81         errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
  82
  83     c->insert(24, "good ");
  84     if(*c != expectedValue) {
  85         errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
  86     }
  87     delete c;
  88
  89     test1.remove(41, 8);
  90     expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
  91     if (test1 != expectedValue)
  92         errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
  93
  94     test1.replace(58, 6, "ir country");
  95     expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
  96     if (test1 != expectedValue)
  97         errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
  98
  99     UChar     temp[80];
 100     test1.extract(0, 15, temp);
 101
 102     UnicodeString       test2(temp, 15);
 103
 104     expectedValue = "Now is the time";
 105     if (test2 != expectedValue)
 106         errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
 107
 108     test2 += " for me to go!\n";
 109     expectedValue = "Now is the time for me to go!\n";
 110     if (test2 != expectedValue)
 111         errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
 112
 113     if (test1.length() != 70)
 114         errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
 115     if (test2.length() != 30)
 116         errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
 117
 118     UnicodeString test3;
 119     test3.append((UChar32)0x20402);
 120     if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
 121         errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
 122     }
 123     if(test3.length() != 2){
 124         errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
 125     }
 126     test3.append((UChar32)0x0074);
 127     if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
 128         errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
 129     }
 130     if(test3.length() != 3){
 131         errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
 132     }
 133
 134     // test some UChar32 overloads
 135     if( test3.setTo((UChar32)0x10330).length() != 2 ||
 136         test3.insert(0, (UChar32)0x20100).length() != 4 ||
 137         test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
 138         (test3 = (UChar32)0x14001).length() != 2
 139     ) {
 140         errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
 141     }
 142
 143     {
 144         // test moveIndex32()
 145         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
 146
 147         if(
 148             s.moveIndex32(2, -1)!=0 ||
 149             s.moveIndex32(2, 1)!=4 ||
 150             s.moveIndex32(2, 2)!=5 ||
 151             s.moveIndex32(5, -2)!=2 ||
 152             s.moveIndex32(0, -1)!=0 ||
 153             s.moveIndex32(6, 1)!=6
 154         ) {
 155             errln("UnicodeString::moveIndex32() failed");
 156         }
 157
 158         if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
 159             errln("UnicodeString::getChar32Start() failed");
 160         }
 161
 162         if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
 163             errln("UnicodeString::getChar32Limit() failed");
 164         }
 165     }
 166
 167     {
 168         // test new 2.2 constructors and setTo function that parallel Java's substring function.
 169         UnicodeString src("Hello folks how are you?");
 170         UnicodeString target1("how are you?");
 171         if (target1 != UnicodeString(src, 12)) {
 172             errln("UnicodeString(const UnicodeString&, int32_t) failed");
 173         }
 174         UnicodeString target2("folks");
 175         if (target2 != UnicodeString(src, 6, 5)) {
 176             errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
 177         }
 178         if (target1 != target2.setTo(src, 12)) {
 179             errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
 180         }
 181     }
 182
 183     {
 184         // op+ is new in ICU 2.8
 185         UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
 186         if(s!=UnicodeString("abcdefghi", "")) {
 187             errln("operator+(UniStr, UniStr) failed");
 188         }
 189     }
 190
 191     {
 192         // tests for Jitterbug 2360
 193         // verify that APIs with source pointer + length accept length == -1
 194         // mostly test only where modified, only few functions did not already do this
 195         if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
 196             errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
 197         }
 198
 199         UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
 200         UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
 201
 202         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
 203             errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
 204         }
 205         if(t.length()!=u_strlen(buffer)) {
 206             errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
 207         }
 208
 209         if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
 210             errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
 211         }
 212         if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
 213             errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
 214         }
 215
 216         buffer[u_strlen(buffer)]=0xe4;
 217         UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
 218         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
 219             errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
 220         }
 221         if(u.length()!=UPRV_LENGTHOF(buffer)) {
 222             errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
 223         }
 224
 225         static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
 226         UConverter *cnv;
 227         UErrorCode errorCode=U_ZERO_ERROR;
 228
 229         cnv=ucnv_open("ISO-8859-1", &errorCode);
 230         UnicodeString v(cs, -1, cnv, errorCode);
 231         ucnv_close(cnv);
 232         if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
 233             errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
 234         }
 235     }
 236
 237 #if U_CHARSET_IS_UTF8
 238     {
 239         // Test the hardcoded-UTF-8 UnicodeString optimizations.
 240         static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
 241         static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
 242         UnicodeString from8a = UnicodeString((const char *)utf8);
 243         UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
 244         UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
 245         if(from8a != from16 || from8b != from16) {
 246             errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
 247         }
 248         char buffer[16];
 249         int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
 250         if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
 251             errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
 252         }
 253         length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
 254         if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
 255             errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
 256         }
 257     }
 258 #endif
 259 }
 260
 261 void
 262 UnicodeStringTest::TestCompare()
 263 {
 264     UnicodeString   test1("this is a test");
 265     UnicodeString   test2("this is a test");
 266     UnicodeString   test3("this is a test of the emergency broadcast system");
 267     UnicodeString   test4("never say, \"this is a test\"!!");
 268
 269     UnicodeString   test5((UChar)0x5000);
 270     UnicodeString   test6((UChar)0x5100);
 271
 272     UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
 273                  0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
 274     char            chars[] = "this is a test";
 275
 276     // test operator== and operator!=
 277     if (test1 != test2 || test1 == test3 || test1 == test4)
 278         errln("operator== or operator!= failed");
 279
 280     // test operator> and operator<
 281     if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
 282         !(test5 < test6)
 283     ) {
 284         errln("operator> or operator< failed");
 285     }
 286
 287     // test operator>= and operator<=
 288     if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
 289         errln("operator>= or operator<= failed");
 290
 291     // test compare(UnicodeString)
 292     if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
 293         errln("compare(UnicodeString) failed");
 294
 295     //test compare(offset, length, UnicodeString)
 296     if(test1.compare(0, 14, test2) != 0 ||
 297         test3.compare(0, 14, test2) != 0 ||
 298         test4.compare(12, 14, test2) != 0 ||
 299         test3.compare(0, 18, test1) <=0  )
 300         errln("compare(offset, length, UnicodeString) failes");
 301
 302     // test compare(UChar*)
 303     if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
 304         errln("compare(UChar*) failed");
 305
 306     // test compare(char*)
 307     if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
 308         errln("compare(char*) failed");
 309
 310     // test compare(UChar*, length)
 311     if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
 312         errln("compare(UChar*, length) failed");
 313
 314     // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
 315     if (test1.compare(0, 14, test2, 0, 14) != 0
 316     || test1.compare(0, 14, test3, 0, 14) != 0
 317     || test1.compare(0, 14, test4, 12, 14) != 0)
 318         errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
 319
 320     if (test1.compare(10, 4, test2, 0, 4) >= 0
 321     || test1.compare(10, 4, test3, 22, 9) <= 0
 322     || test1.compare(10, 4, test4, 22, 4) != 0)
 323         errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
 324
 325     // test compareBetween
 326     if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
 327                     || test1.compareBetween(0, 14, test4, 12, 26) != 0)
 328         errln("compareBetween failed");
 329
 330     if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
 331                     || test1.compareBetween(10, 14, test4, 22, 26) != 0)
 332         errln("compareBetween failed");
 333
 334     // test compare() etc. with strings that share a buffer but are not equal
 335     test2=test1; // share the buffer, length() too large for the stackBuffer
 336     test2.truncate(1); // change only the length, not the buffer
 337     if( test1==test2 || test1<=test2 ||
 338         test1.compare(test2)<=0 ||
 339         test1.compareCodePointOrder(test2)<=0 ||
 340         test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
 341         test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
 342         test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
 343         test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
 344     ) {
 345         errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
 346     }
 347
 348     /* test compareCodePointOrder() */
 349     {
 350         /* these strings are in ascending order */
 351         static const UChar strings[][4]={
 352             { 0x61, 0 },                    /* U+0061 */
 353             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
 354             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
 355             { 0xd800, 0 },                  /* U+d800 */
 356             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
 357             { 0xdfff, 0 },                  /* U+dfff */
 358             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
 359             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
 360             { 0xd800, 0xdc02, 0 },          /* U+10002 */
 361             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
 362         };
 363         UnicodeString u[20]; // must be at least as long as strings[]
 364         int32_t i;
 365
 366         for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
 367             u[i]=UnicodeString(TRUE, strings[i], -1);
 368         }
 369
 370         for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
 371             if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
 372                 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
 373             }
 374         }
 375     }
 376
 377     /* test caseCompare() */
 378     {
 379         static const UChar
 380         _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
 381         _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
 382         _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
 383         _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
 384
 385         UnicodeString
 386             mixed(TRUE, _mixed, -1),
 387             otherDefault(TRUE, _otherDefault, -1),
 388             otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
 389             different(TRUE, _different, -1);
 390
 391         int8_t result;
 392
 393         /* test caseCompare() */
 394         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
 395         if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
 396             errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
 397         }
 398         result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
 399         if(result!=0) {
 400             errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
 401         }
 402         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
 403         if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
 404             errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
 405         }
 406
 407         /* test caseCompare() */
 408         result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
 409         if(result<=0) {
 410             errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
 411         }
 412
 413         /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
 414         result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
 415         if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
 416             errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
 417         }
 418
 419         /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
 420         result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
 421         if(result<=0) {
 422             errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
 423         }
 424     }
 425
 426     // test that srcLength=-1 is handled in functions that
 427     // take input const UChar */int32_t srcLength (j785)
 428     {
 429         static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
 430         UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
 431
 432         if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
 433             errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
 434         }
 435
 436         if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
 437             errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
 438         }
 439
 440         if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
 441             errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
 442         }
 443
 444         if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
 445             errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
 446         }
 447
 448         if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
 449             errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
 450         }
 451
 452         UnicodeString s2, s3;
 453         s2.replace(0, 0, u+1, -1);
 454         s3.replace(0, 0, u, 1, -1);
 455         if(s.compare(1, 999, s2)!=0 || s2!=s3) {
 456             errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
 457         }
 458     }
 459 }
 460
 461 void
 462 UnicodeStringTest::TestExtract()
 463 {
 464     UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
 465     UnicodeString  test2;
 466     UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
 467     char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
 468     UnicodeString  test5;
 469     char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
 470
 471     test1.extract(11, 12, test2);
 472     test1.extract(11, 12, test3);
 473     if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
 474         errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
 475     }
 476
 477     // test proper pinning in extractBetween()
 478     test1.extractBetween(-3, 7, test5);
 479     if(test5!=UNICODE_STRING("Now is ", 7)) {
 480         errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
 481     }
 482
 483     test1.extractBetween(11, 23, test5);
 484     if (test1.extract(60, 71, test6) != 9) {
 485         errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
 486     }
 487     if (test1.extract(11, 12, test6) != 12) {
 488         errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
 489     }
 490
 491     // convert test4 back to Unicode for comparison
 492     UnicodeString test4b(test4, 12);
 493
 494     if (test1.extract(11, 12, (char *)NULL) != 12) {
 495         errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
 496     }
 497     if (test1.extract(11, -1, test6) != 0) {
 498         errln("UnicodeString.extract(-1) failed to stop reading the string.");
 499     }
 500
 501     for (int32_t i = 0; i < 12; i++) {
 502         if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
 503             errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
 504             break;
 505         }
 506         if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
 507             errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
 508             break;
 509         }
 510         if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
 511             errln(UnicodeString("extracting into an array of char failed at position ") + i);
 512             break;
 513         }
 514         if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
 515             errln(UnicodeString("extracting with extractBetween failed at position ") + i);
 516             break;
 517         }
 518     }
 519
 520     // test preflighting and overflows with invariant conversion
 521     if (test1.extract(0, 10, (char *)NULL, "") != 10) {
 522         errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
 523     }
 524
 525     test4[2] = (char)0xff;
 526     if (test1.extract(0, 10, test4, 2, "") != 10) {
 527         errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
 528     }
 529     if (test4[2] != (char)0xff) {
 530         errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
 531     }
 532
 533     {
 534         // test new, NUL-terminating extract() function
 535         UnicodeString s("terminate", "");
 536         UChar dest[20]={
 537             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
 538             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
 539         };
 540         UErrorCode errorCode;
 541         int32_t length;
 542
 543         errorCode=U_ZERO_ERROR;
 544         length=s.extract((UChar *)NULL, 0, errorCode);
 545         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
 546             errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
 547         }
 548
 549         errorCode=U_ZERO_ERROR;
 550         length=s.extract(dest, s.length()-1, errorCode);
 551         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
 552             errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
 553                 length, u_errorName(errorCode), s.length());
 554         }
 555
 556         errorCode=U_ZERO_ERROR;
 557         length=s.extract(dest, s.length(), errorCode);
 558         if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
 559             errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
 560                 length, u_errorName(errorCode), s.length());
 561         }
 562         if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
 563             errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
 564         }
 565
 566         errorCode=U_ZERO_ERROR;
 567         length=s.extract(dest, s.length()+1, errorCode);
 568         if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
 569             errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
 570                 length, u_errorName(errorCode), s.length());
 571         }
 572         if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
 573             errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
 574         }
 575     }
 576
 577     {
 578         // test new UConverter extract() and constructor
 579         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
 580         char buffer[32];
 581         static const char expect[]={
 582             (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
 583             (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
 584             (char)0xc3, (char)0x84,
 585             (char)0xe1, (char)0xbb, (char)0x90
 586         };
 587         UErrorCode errorCode=U_ZERO_ERROR;
 588         UConverter *cnv=ucnv_open("UTF-8", &errorCode);
 589         int32_t length;
 590
 591         if(U_SUCCESS(errorCode)) {
 592             // test preflighting
 593             if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
 594                 errorCode!=U_BUFFER_OVERFLOW_ERROR
 595             ) {
 596                 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
 597                       length, u_errorName(errorCode));
 598             }
 599             errorCode=U_ZERO_ERROR;
 600             if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
 601                 errorCode!=U_BUFFER_OVERFLOW_ERROR
 602             ) {
 603                 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
 604                       length, u_errorName(errorCode));
 605             }
 606
 607             // try error cases
 608             errorCode=U_ZERO_ERROR;
 609             if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
 610                 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
 611             }
 612             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 613             if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
 614                 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
 615             }
 616             errorCode=U_ZERO_ERROR;
 617
 618             // extract for real
 619             if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
 620                 uprv_memcmp(buffer, expect, 13)!=0 ||
 621                 buffer[13]!=0 ||
 622                 U_FAILURE(errorCode)
 623             ) {
 624                 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
 625                       length, u_errorName(errorCode));
 626             }
 627             // Test again with just the converter name.
 628             if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
 629                 uprv_memcmp(buffer, expect, 13)!=0 ||
 630                 buffer[13]!=0 ||
 631                 U_FAILURE(errorCode)
 632             ) {
 633                 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
 634                       length, u_errorName(errorCode));
 635             }
 636
 637             // try the constructor
 638             UnicodeString t(expect, sizeof(expect), cnv, errorCode);
 639             if(U_FAILURE(errorCode) || s!=t) {
 640                 errln("UnicodeString(UConverter) conversion failed (%s)",
 641                       u_errorName(errorCode));
 642             }
 643
 644             ucnv_close(cnv);
 645         }
 646     }
 647 }
 648
 649 void
 650 UnicodeStringTest::TestRemoveReplace()
 651 {
 652     UnicodeString   test1("The rain in Spain stays mainly on the plain");
 653     UnicodeString   test2("eat SPAMburgers!");
 654     UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
 655     char            test4[] = "SPAM";
 656     UnicodeString&  test5 = test1;
 657
 658     test1.replace(4, 4, test2, 4, 4);
 659     test1.replace(12, 5, test3, 4);
 660     test3[4] = 0;
 661     test1.replace(17, 4, test3);
 662     test1.replace(23, 4, test4);
 663     test1.replaceBetween(37, 42, test2, 4, 8);
 664
 665     if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
 666         errln("One of the replace methods failed:\n"
 667               "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
 668               "  got \"" + test1 + "\"");
 669
 670     test1.remove(21, 1);
 671     test1.removeBetween(26, 28);
 672
 673     if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
 674         errln("One of the remove methods failed:\n"
 675               "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
 676               "  got \"" + test1 + "\"");
 677
 678     for (int32_t i = 0; i < test1.length(); i++) {
 679         if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
 680             test1.setCharAt(i, 0x78);
 681         }
 682     }
 683
 684     if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
 685         errln("One of the remove methods failed:\n"
 686               "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
 687               "  got \"" + test1 + "\"");
 688
 689     test1.remove();
 690     if (test1.length() != 0)
 691         errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
 692 }
 693
 694 void
 695 UnicodeStringTest::TestSearching()
 696 {
 697     UnicodeString test1("test test ttest tetest testesteststt");
 698     UnicodeString test2("test");
 699     UChar testChar = 0x74;
 700
 701     UChar32 testChar32 = 0x20402;
 702     UChar testData[]={
 703         //   0       1       2       3       4       5       6       7
 704         0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
 705
 706         //   8       9      10      11      12      13      14      15
 707         0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
 708
 709         //  16      17      18      19
 710         0xdc02, 0xd841, 0x0073, 0x0000
 711     };
 712     UnicodeString test3(testData);
 713     UnicodeString test4(testChar32);
 714
 715     uint16_t occurrences = 0;
 716     int32_t startPos = 0;
 717     for ( ;
 718           startPos != -1 && startPos < test1.length();
 719           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
 720         ;
 721     if (occurrences != 6)
 722         errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
 723
 724     for ( occurrences = 0, startPos = 10;
 725           startPos != -1 && startPos < test1.length();
 726           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
 727         ;
 728     if (occurrences != 4)
 729         errln(UnicodeString("indexOf with starting offset failed: "
 730                             "expected to find 4 occurrences, found ") + occurrences);
 731
 732     int32_t endPos = 28;
 733     for ( occurrences = 0, startPos = 5;
 734           startPos != -1 && startPos < test1.length();
 735           (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
 736         ;
 737     if (occurrences != 4)
 738         errln(UnicodeString("indexOf with starting and ending offsets failed: "
 739                             "expected to find 4 occurrences, found ") + occurrences);
 740
 741     //using UChar32 string
 742     for ( startPos=0, occurrences=0;
 743           startPos != -1 && startPos < test3.length();
 744           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
 745         ;
 746     if (occurrences != 4)
 747         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
 748
 749     for ( startPos=10, occurrences=0;
 750           startPos != -1 && startPos < test3.length();
 751           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
 752         ;
 753     if (occurrences != 2)
 754         errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
 755     //---
 756
 757     for ( occurrences = 0, startPos = 0;
 758           startPos != -1 && startPos < test1.length();
 759           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 760         ;
 761     if (occurrences != 16)
 762         errln(UnicodeString("indexOf with character failed: "
 763                             "expected to find 16 occurrences, found ") + occurrences);
 764
 765     for ( occurrences = 0, startPos = 10;
 766           startPos != -1 && startPos < test1.length();
 767           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 768         ;
 769     if (occurrences != 12)
 770         errln(UnicodeString("indexOf with character & start offset failed: "
 771                             "expected to find 12 occurrences, found ") + occurrences);
 772
 773     for ( occurrences = 0, startPos = 5, endPos = 28;
 774           startPos != -1 && startPos < test1.length();
 775           (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 776         ;
 777     if (occurrences != 10)
 778         errln(UnicodeString("indexOf with character & start & end offsets failed: "
 779                             "expected to find 10 occurrences, found ") + occurrences);
 780
 781     //testing for UChar32
 782     UnicodeString subString;
 783     for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
 784         subString.append(test3, startPos, test3.length());
 785         if(subString.indexOf(testChar32) != -1 ){
 786              ++occurrences;
 787         }
 788         subString.remove();
 789     }
 790     if (occurrences != 14)
 791         errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
 792
 793     for ( occurrences = 0, startPos = 0;
 794           startPos != -1 && startPos < test3.length();
 795           (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 796         ;
 797     if (occurrences != 4)
 798         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
 799
 800     endPos=test3.length();
 801     for ( occurrences = 0, startPos = 5;
 802           startPos != -1 && startPos < test3.length();
 803           (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 804         ;
 805     if (occurrences != 3)
 806         errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
 807     //---
 808
 809     if(test1.lastIndexOf(test2)!=29) {
 810         errln("test1.lastIndexOf(test2)!=29");
 811     }
 812
 813     if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
 814         errln("test1.lastIndexOf(test2, start) failed");
 815     }
 816
 817     for ( occurrences = 0, startPos = 32;
 818           startPos != -1;
 819           (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
 820         ;
 821     if (occurrences != 4)
 822         errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
 823                             "expected to find 4 occurrences, found ") + occurrences);
 824
 825     for ( occurrences = 0, startPos = 32;
 826           startPos != -1;
 827           (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
 828         ;
 829     if (occurrences != 11)
 830         errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
 831                             "expected to find 11 occurrences, found ") + occurrences);
 832
 833     //testing UChar32
 834     startPos=test3.length();
 835     for ( occurrences = 0;
 836           startPos != -1;
 837           (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
 838         ;
 839     if (occurrences != 3)
 840         errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
 841
 842
 843     for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
 844         subString.remove();
 845         subString.append(test3, 0, endPos);
 846         if(subString.lastIndexOf(testChar32) != -1 ){
 847             ++occurrences;
 848         }
 849     }
 850     if (occurrences != 18)
 851         errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
 852     //---
 853
 854     // test that indexOf(UChar32) and lastIndexOf(UChar32)
 855     // do not find surrogate code points when they are part of matched pairs
 856     // (= part of supplementary code points)
 857     // Jitterbug 1542
 858     if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
 859         errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
 860     }
 861     if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
 862         UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
 863         test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
 864     ) {
 865         errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
 866     }
 867 }
 868
 869 void
 870 UnicodeStringTest::TestSpacePadding()
 871 {
 872     UnicodeString test1("hello");
 873     UnicodeString test2("   there");
 874     UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
 875     UnicodeString test4;
 876     UBool returnVal;
 877     UnicodeString expectedValue;
 878
 879     returnVal = test1.padLeading(15);
 880     expectedValue = "          hello";
 881     if (returnVal == FALSE || test1 != expectedValue)
 882         errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
 883
 884     returnVal = test2.padTrailing(15);
 885     expectedValue = "   there       ";
 886     if (returnVal == FALSE || test2 != expectedValue)
 887         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
 888
 889     expectedValue = test3;
 890     returnVal = test3.padTrailing(15);
 891     if (returnVal == TRUE || test3 != expectedValue)
 892         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
 893
 894     expectedValue = "hello";
 895     test4.setTo(test1).trim();
 896
 897     if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
 898         errln("trim(UnicodeString&) failed");
 899
 900     test1.trim();
 901     if (test1 != expectedValue)
 902         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
 903
 904     test2.trim();
 905     expectedValue = "there";
 906     if (test2 != expectedValue)
 907         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
 908
 909     test3.trim();
 910     expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
 911     if (test3 != expectedValue)
 912         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
 913
 914     returnVal = test1.truncate(15);
 915     expectedValue = "hello";
 916     if (returnVal == TRUE || test1 != expectedValue)
 917         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
 918
 919     returnVal = test2.truncate(15);
 920     expectedValue = "there";
 921     if (returnVal == TRUE || test2 != expectedValue)
 922         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
 923
 924     returnVal = test3.truncate(15);
 925     expectedValue = "Hi!  How ya doi";
 926     if (returnVal == FALSE || test3 != expectedValue)
 927         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
 928 }
 929
 930 void
 931 UnicodeStringTest::TestPrefixAndSuffix()
 932 {
 933     UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
 934     UnicodeString test2("Now");
 935     UnicodeString test3("country.");
 936     UnicodeString test4("count");
 937
 938     if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
 939         errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
 940     }
 941
 942     if (test1.startsWith(test3) ||
 943         test1.startsWith(test3.getBuffer(), test3.length()) ||
 944         test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
 945     ) {
 946         errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
 947     }
 948
 949     if (test1.endsWith(test2)) {
 950         errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
 951     }
 952
 953     if (!test1.endsWith(test3)) {
 954         errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 955     }
 956     if (!test1.endsWith(test3, 0, INT32_MAX)) {
 957         errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 958     }
 959
 960     if(!test1.endsWith(test3.getBuffer(), test3.length())) {
 961         errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 962     }
 963     if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
 964         errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 965     }
 966
 967     if (!test3.startsWith(test4)) {
 968         errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
 969     }
 970
 971     if (test4.startsWith(test3)) {
 972         errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
 973     }
 974 }
 975
 976 void
 977 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
 978     UnicodeString test("abcde");
 979     const UChar ab[] = { 0x61, 0x62, 0 };
 980     const UChar de[] = { 0x64, 0x65, 0 };
 981     assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
 982     assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
 983     assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
 984     assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
 985 }
 986
 987 void
 988 UnicodeStringTest::TestFindAndReplace()
 989 {
 990     UnicodeString test1("One potato, two potato, three potato, four\n");
 991     UnicodeString test2("potato");
 992     UnicodeString test3("MISSISSIPPI");
 993
 994     UnicodeString expectedValue;
 995
 996     test1.findAndReplace(test2, test3);
 997     expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
 998     if (test1 != expectedValue)
 999         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1000     test1.findAndReplace(2, 32, test3, test2);
1001     expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1002     if (test1 != expectedValue)
1003         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1004 }
1005
1006 void
1007 UnicodeStringTest::TestReverse()
1008 {
1009     UnicodeString test("backwards words say to used I");
1010
1011     test.reverse();
1012     test.reverse(2, 4);
1013     test.reverse(7, 2);
1014     test.reverse(10, 3);
1015     test.reverse(14, 5);
1016     test.reverse(20, 9);
1017
1018     if (test != "I used to say words backwards")
1019         errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1020             + test + "\"");
1021
1022     test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1023     test.reverse();
1024     if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1025         errln("reverse() failed with supplementary characters");
1026     }
1027
1028     // Test case for ticket #8091:
1029     // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1030     // an odd-length string that contains no other lead surrogates.
1031     test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1032     UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1033     test.reverse();
1034     if(test!=expected) {
1035         errln("reverse() failed with only lead surrogate in the middle");
1036     }
1037 }
1038
1039 void
1040 UnicodeStringTest::TestMiscellaneous()
1041 {
1042     UnicodeString   test1("This is a test");
1043     UnicodeString   test2("This is a test");
1044     UnicodeString   test3("Me too!");
1045
1046     // test getBuffer(minCapacity) and releaseBuffer()
1047     test1=UnicodeString(); // make sure that it starts with its stackBuffer
1048     UChar *p=test1.getBuffer(20);
1049     if(test1.getCapacity()<20) {
1050         errln("UnicodeString::getBuffer(20).getCapacity()<20");
1051     }
1052
1053     test1.append((UChar)7); // must not be able to modify the string here
1054     test1.setCharAt(3, 7);
1055     test1.reverse();
1056     if( test1.length()!=0 ||
1057         test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1058         test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1059     ) {
1060         errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1061     }
1062
1063     p[0]=1;
1064     p[1]=2;
1065     p[2]=3;
1066     test1.releaseBuffer(3);
1067     test1.append((UChar)4);
1068
1069     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1070         errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1071     }
1072
1073     // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1074     test1.releaseBuffer(1);
1075     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1076         errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1077     }
1078
1079     // test getBuffer(const)
1080     const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1081     if( test1.length()!=4 ||
1082         q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1083         r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1084     ) {
1085         errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1086     }
1087
1088     // test releaseBuffer() with a NUL-terminated buffer
1089     test1.getBuffer(20)[2]=0;
1090     test1.releaseBuffer(); // implicit -1
1091     if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1092         errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1093     }
1094
1095     // test releaseBuffer() with a non-NUL-terminated buffer
1096     p=test1.getBuffer(256);
1097     for(int32_t i=0; i<test1.getCapacity(); ++i) {
1098         p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1099     }
1100     test1.releaseBuffer();  // implicit -1
1101     if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1102         errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1103     }
1104
1105     // test getTerminatedBuffer()
1106     test1=UnicodeString("This is another test.", "");
1107     test2=UnicodeString("This is another test.", "");
1108     q=test1.getTerminatedBuffer();
1109     if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1110         errln("getTerminatedBuffer()[length]!=0");
1111     }
1112
1113     const UChar u[]={ 5, 6, 7, 8, 0 };
1114     test1.setTo(FALSE, u, 3);
1115     q=test1.getTerminatedBuffer();
1116     if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1117         errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1118     }
1119
1120     test1.setTo(TRUE, u, -1);
1121     q=test1.getTerminatedBuffer();
1122     if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1123         errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1124     }
1125
1126     test1=UNICODE_STRING("la", 2);
1127     test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
1128     if(test1!=UNICODE_STRING("la lila", 7)) {
1129         errln("UnicodeString::append(const UChar *, start, length) failed");
1130     }
1131
1132     test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX);
1133     if(test1!=UNICODE_STRING("la dudum lila", 13)) {
1134         errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
1135     }
1136
1137     static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1138     test1.insert(9, ucs, -1);
1139     if(test1!=UNICODE_STRING("la dudum hm lila", 16)) {
1140         errln("UnicodeString::insert(start, const UChar *, length) failed");
1141     }
1142
1143     test1.replace(9, 2, (UChar)0x2b);
1144     if(test1!=UNICODE_STRING("la dudum + lila", 15)) {
1145         errln("UnicodeString::replace(start, length, UChar) failed");
1146     }
1147
1148     if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1149         errln("UnicodeString::hasMetaData() returns TRUE");
1150     }
1151
1152     // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1153     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1154     test1.truncate(36);  // ensure length()<getCapacity()
1155     test2=test1;  // share the buffer
1156     test1.truncate(5);
1157     if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1158         errln("UnicodeString(shared buffer).truncate() failed");
1159     }
1160     if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1161         errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1162               "modified another copy of the string!");
1163     }
1164     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1165     test1.truncate(36);  // ensure length()<getCapacity()
1166     test2=test1;  // share the buffer
1167     test1.remove();
1168     if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1169         errln("UnicodeString(shared buffer).remove() failed");
1170     }
1171     if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1172         errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1173               "modified another copy of the string!");
1174     }
1175
1176     // ticket #9740
1177     test1.setTo(TRUE, ucs, 3);
1178     assertEquals("length of read-only alias", 3, test1.length());
1179     test1.trim();
1180     assertEquals("length of read-only alias after trim()", 2, test1.length());
1181     assertEquals("length of terminated buffer of read-only alias + trim()",
1182                  2, u_strlen(test1.getTerminatedBuffer()));
1183 }
1184
1185 void
1186 UnicodeStringTest::TestStackAllocation()
1187 {
1188     UChar           testString[] ={
1189         0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1190     UChar           guardWord = 0x4DED;
1191     UnicodeString*  test = 0;
1192
1193     test = new  UnicodeString(testString);
1194     if (*test != "This is a crazy test.")
1195         errln("Test string failed to initialize properly.");
1196     if (guardWord != 0x04DED)
1197         errln("Test string initialization overwrote guard word!");
1198
1199     test->insert(8, "only ");
1200     test->remove(15, 6);
1201     if (*test != "This is only a test.")
1202         errln("Manipulation of test string failed to work right.");
1203     if (guardWord != 0x4DED)
1204         errln("Manipulation of test string overwrote guard word!");
1205
1206     // we have to deinitialize and release the backing store by calling the destructor
1207     // explicitly, since we can't overload operator delete
1208     delete test;
1209
1210     UChar workingBuffer[] = {
1211         0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1212         0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1213         0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1214         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1215         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1216     UChar guardWord2 = 0x4DED;
1217
1218     test = new UnicodeString(workingBuffer, 35, 100);
1219     if (*test != "Now is the time for all men to come")
1220         errln("Stack-allocated backing store failed to initialize correctly.");
1221     if (guardWord2 != 0x4DED)
1222         errln("Stack-allocated backing store overwrote guard word!");
1223
1224     test->insert(24, "good ");
1225     if (*test != "Now is the time for all good men to come")
1226         errln("insert() on stack-allocated UnicodeString didn't work right");
1227     if (guardWord2 != 0x4DED)
1228         errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1229
1230     if (workingBuffer[24] != 0x67)
1231         errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1232
1233     *test += " to the aid of their country.";
1234     if (*test != "Now is the time for all good men to come to the aid of their country.")
1235         errln("Stack-allocated UnicodeString overflow didn't work");
1236     if (guardWord2 != 0x4DED)
1237         errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1238
1239     *test = "ha!";
1240     if (*test != "ha!")
1241         errln("Assignment to stack-allocated UnicodeString didn't work");
1242     if (workingBuffer[0] != 0x4e)
1243         errln("Change to UnicodeString after overflow are still affecting original buffer");
1244     if (guardWord2 != 0x4DED)
1245         errln("Change to UnicodeString after overflow overwrote guard word!");
1246
1247     // test read-only aliasing with setTo()
1248     workingBuffer[0] = 0x20ac;
1249     workingBuffer[1] = 0x125;
1250     workingBuffer[2] = 0;
1251     test->setTo(TRUE, workingBuffer, 2);
1252     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1253         errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1254     }
1255
1256     UnicodeString *c=(UnicodeString *)test->clone();
1257
1258     workingBuffer[1] = 0x109;
1259     if(test->charAt(1) != 0x109) {
1260         errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1261     }
1262
1263     if(c->length() != 2 || c->charAt(1) != 0x125) {
1264         errln("clone(alias) did not copy the buffer");
1265     }
1266     delete c;
1267
1268     test->setTo(TRUE, workingBuffer, -1);
1269     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1270         errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1271     }
1272
1273     test->setTo(FALSE, workingBuffer, -1);
1274     if(!test->isBogus()) {
1275         errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1276     }
1277
1278     delete test;
1279
1280     test=new UnicodeString();
1281     UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1282     test->setTo(buffer, 4, 10);
1283     if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1284         test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1285         errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1286     }
1287     delete test;
1288
1289
1290     // test the UChar32 constructor
1291     UnicodeString c32Test((UChar32)0x10ff2a);
1292     if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1293         c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1294     ) {
1295         errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1296     }
1297
1298     // test the (new) capacity constructor
1299     UnicodeString capTest(5, (UChar32)0x2a, 5);
1300     if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1301         capTest.char32At(0) != 0x2a ||
1302         capTest.char32At(4) != 0x2a
1303     ) {
1304         errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1305     }
1306
1307     capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1308     if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1309         capTest.char32At(0) != 0x10ff2a ||
1310         capTest.char32At(4) != 0x10ff2a
1311     ) {
1312         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1313     }
1314
1315     capTest = UnicodeString(5, (UChar32)0, 0);
1316     if(capTest.length() != 0) {
1317         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1318     }
1319 }
1320
1321 /**
1322  * Test the unescape() function.
1323  */
1324 void UnicodeStringTest::TestUnescape(void) {
1325     UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1326     UnicodeString OUT("abc");
1327     OUT.append((UChar)0x4567);
1328     OUT.append(" ");
1329     OUT.append((UChar)0xA);
1330     OUT.append((UChar)0xD);
1331     OUT.append(" ");
1332     OUT.append((UChar32)0x00101234);
1333     OUT.append("xyz");
1334     OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1335     UnicodeString result = IN.unescape();
1336     if (result != OUT) {
1337         errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1338               prettify(result) + ", expected " +
1339               prettify(OUT));
1340     }
1341
1342     // test that an empty string is returned in case of an error
1343     if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1344         errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1345     }
1346 }
1347
1348 /* test code point counting functions --------------------------------------- */
1349
1350 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1351 static int32_t
1352 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1353     int32_t count=s.countChar32(start, length);
1354     return count>number;
1355 }
1356
1357 /* compare the real function against the reference */
1358 void
1359 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1360     if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1361         errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1362                 start, length, number, s.hasMoreChar32Than(start, length, number));
1363     }
1364 }
1365
1366 void
1367 UnicodeStringTest::TestCountChar32(void) {
1368     {
1369         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1370
1371         // test countChar32()
1372         // note that this also calls and tests u_countChar32(length>=0)
1373         if(
1374             s.countChar32()!=4 ||
1375             s.countChar32(1)!=4 ||
1376             s.countChar32(2)!=3 ||
1377             s.countChar32(2, 3)!=2 ||
1378             s.countChar32(2, 0)!=0
1379         ) {
1380             errln("UnicodeString::countChar32() failed");
1381         }
1382
1383         // NUL-terminate the string buffer and test u_countChar32(length=-1)
1384         const UChar *buffer=s.getTerminatedBuffer();
1385         if(
1386             u_countChar32(buffer, -1)!=4 ||
1387             u_countChar32(buffer+1, -1)!=4 ||
1388             u_countChar32(buffer+2, -1)!=3 ||
1389             u_countChar32(buffer+3, -1)!=3 ||
1390             u_countChar32(buffer+4, -1)!=2 ||
1391             u_countChar32(buffer+5, -1)!=1 ||
1392             u_countChar32(buffer+6, -1)!=0
1393         ) {
1394             errln("u_countChar32(length=-1) failed");
1395         }
1396
1397         // test u_countChar32() with bad input
1398         if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1399             errln("u_countChar32(bad input) failed (returned non-zero counts)");
1400         }
1401     }
1402
1403     /* test data and variables for hasMoreChar32Than() */
1404     static const UChar str[]={
1405         0x61, 0x62, 0xd800, 0xdc00,
1406         0xd801, 0xdc01, 0x63, 0xd802,
1407         0x64, 0xdc03, 0x65, 0x66,
1408         0xd804, 0xdc04, 0xd805, 0xdc05,
1409         0x67
1410     };
1411     UnicodeString string(str, UPRV_LENGTHOF(str));
1412     int32_t start, length, number;
1413
1414     /* test hasMoreChar32Than() */
1415     for(length=string.length(); length>=0; --length) {
1416         for(start=0; start<=length; ++start) {
1417             for(number=-1; number<=((length-start)+2); ++number) {
1418                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1419             }
1420         }
1421     }
1422
1423     /* test hasMoreChar32Than() with pinning */
1424     for(start=-1; start<=string.length()+1; ++start) {
1425         for(number=-1; number<=((string.length()-start)+2); ++number) {
1426             _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1427         }
1428     }
1429
1430     /* test hasMoreChar32Than() with a bogus string */
1431     string.setToBogus();
1432     for(length=-1; length<=1; ++length) {
1433         for(start=-1; start<=length; ++start) {
1434             for(number=-1; number<=((length-start)+2); ++number) {
1435                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1436             }
1437         }
1438     }
1439 }
1440
1441 void
1442 UnicodeStringTest::TestBogus() {
1443     UnicodeString   test1("This is a test");
1444     UnicodeString   test2("This is a test");
1445     UnicodeString   test3("Me too!");
1446
1447     // test isBogus() and setToBogus()
1448     if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1449         errln("A string returned TRUE for isBogus()!");
1450     }
1451
1452     // NULL pointers are treated like empty strings
1453     // use other illegal arguments to make a bogus string
1454     test3.setTo(FALSE, test1.getBuffer(), -2);
1455     if(!test3.isBogus()) {
1456         errln("A bogus string returned FALSE for isBogus()!");
1457     }
1458     if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1459         errln("hashCode() failed");
1460     }
1461     if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1462         errln("bogus.getBuffer()!=0");
1463     }
1464     if (test1.indexOf(test3) != -1) {
1465         errln("bogus.indexOf() != -1");
1466     }
1467     if (test1.lastIndexOf(test3) != -1) {
1468         errln("bogus.lastIndexOf() != -1");
1469     }
1470     if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1471         errln("caseCompare() doesn't work with bogus strings");
1472     }
1473     if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1474         errln("compareCodePointOrder() doesn't work with bogus strings");
1475     }
1476
1477     // verify that non-assignment modifications fail and do not revive a bogus string
1478     test3.setToBogus();
1479     test3.append((UChar)0x61);
1480     if(!test3.isBogus() || test3.getBuffer()!=0) {
1481         errln("bogus.append('a') worked but must not");
1482     }
1483
1484     test3.setToBogus();
1485     test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1486     if(!test3.isBogus() || test3.getBuffer()!=0) {
1487         errln("bogus.findAndReplace() worked but must not");
1488     }
1489
1490     test3.setToBogus();
1491     test3.trim();
1492     if(!test3.isBogus() || test3.getBuffer()!=0) {
1493         errln("bogus.trim() revived bogus but must not");
1494     }
1495
1496     test3.setToBogus();
1497     test3.remove(1);
1498     if(!test3.isBogus() || test3.getBuffer()!=0) {
1499         errln("bogus.remove(1) revived bogus but must not");
1500     }
1501
1502     test3.setToBogus();
1503     if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1504         errln("bogus.setCharAt(0, 'b') worked but must not");
1505     }
1506
1507     test3.setToBogus();
1508     if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1509         errln("bogus.truncate(1) revived bogus but must not");
1510     }
1511
1512     // verify that assignments revive a bogus string
1513     test3.setToBogus();
1514     if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1515         errln("bogus.operator=() failed");
1516     }
1517
1518     test3.setToBogus();
1519     if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1520         errln("bogus.fastCopyFrom() failed");
1521     }
1522
1523     test3.setToBogus();
1524     if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1525         errln("bogus.setTo(UniStr) failed");
1526     }
1527
1528     test3.setToBogus();
1529     if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1530         errln("bogus.setTo(UniStr, 0) failed");
1531     }
1532
1533     test3.setToBogus();
1534     if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1535         errln("bogus.setTo(UniStr, 0, len) failed");
1536     }
1537
1538     test3.setToBogus();
1539     if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1540         errln("bogus.setTo(const UChar *, len) failed");
1541     }
1542
1543     test3.setToBogus();
1544     if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1545         errln("bogus.setTo(UChar) failed");
1546     }
1547
1548     test3.setToBogus();
1549     if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1550         errln("bogus.setTo(UChar32) failed");
1551     }
1552
1553     test3.setToBogus();
1554     if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1555         errln("bogus.setTo(readonly alias) failed");
1556     }
1557
1558     // writable alias to another string's buffer: very bad idea, just convenient for this test
1559     test3.setToBogus();
1560     if(!test3.isBogus() ||
1561             test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1562                         test1.length(), test1.getCapacity()).isBogus() ||
1563             test3!=test1) {
1564         errln("bogus.setTo(writable alias) failed");
1565     }
1566
1567     // verify simple, documented ways to turn a bogus string into an empty one
1568     test3.setToBogus();
1569     if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1570         errln("bogus.operator=(UnicodeString()) failed");
1571     }
1572
1573     test3.setToBogus();
1574     if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1575         errln("bogus.setTo(UnicodeString()) failed");
1576     }
1577
1578     test3.setToBogus();
1579     if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1580         errln("bogus.remove() failed");
1581     }
1582
1583     test3.setToBogus();
1584     if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1585         errln("bogus.remove(0, INT32_MAX) failed");
1586     }
1587
1588     test3.setToBogus();
1589     if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1590         errln("bogus.truncate(0) failed");
1591     }
1592
1593     test3.setToBogus();
1594     if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1595         errln("bogus.setTo((UChar32)-1) failed");
1596     }
1597
1598     static const UChar nul=0;
1599
1600     test3.setToBogus();
1601     if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1602         errln("bogus.setTo(&nul, 0) failed");
1603     }
1604
1605     test3.setToBogus();
1606     if(!test3.isBogus() || test3.getBuffer()!=0) {
1607         errln("setToBogus() failed to make a string bogus");
1608     }
1609
1610     test3.setToBogus();
1611     if(test1.isBogus() || !(test1=test3).isBogus()) {
1612         errln("normal=bogus failed to make the left string bogus");
1613     }
1614
1615     // test that NULL primitive input string values are treated like
1616     // empty strings, not errors (bogus)
1617     test2.setTo((UChar32)0x10005);
1618     if(test2.insert(1, nullptr, 1).length()!=2) {
1619         errln("UniStr.insert(...nullptr...) should not modify the string but does");
1620     }
1621
1622     UErrorCode errorCode=U_ZERO_ERROR;
1623     UnicodeString
1624         test4((const UChar *)NULL),
1625         test5(TRUE, (const UChar *)NULL, 1),
1626         test6((UChar *)NULL, 5, 5),
1627         test7((const char *)NULL, 3, NULL, errorCode);
1628     if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1629         errln("a constructor set to bogus for a NULL input string, should be empty");
1630     }
1631
1632     test4.setTo(NULL, 3);
1633     test5.setTo(TRUE, (const UChar *)NULL, 1);
1634     test6.setTo((UChar *)NULL, 5, 5);
1635     if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1636         errln("a setTo() set to bogus for a NULL input string, should be empty");
1637     }
1638
1639     // test that bogus==bogus<any
1640     if(test1!=test3 || test1.compare(test3)!=0) {
1641         errln("bogus==bogus failed");
1642     }
1643
1644     test2.remove();
1645     if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1646         errln("bogus<empty failed");
1647     }
1648 }
1649
1650 // StringEnumeration ------------------------------------------------------- ***
1651 // most of StringEnumeration is tested elsewhere
1652 // this test improves code coverage
1653
1654 static const char *const
1655 testEnumStrings[]={
1656     "a",
1657     "b",
1658     "c",
1659     "this is a long string which helps us test some buffer limits",
1660     "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1661 };
1662
1663 class TestEnumeration : public StringEnumeration {
1664 public:
1665     TestEnumeration() : i(0) {}
1666
1667     virtual int32_t count(UErrorCode& /*status*/) const {
1668         return UPRV_LENGTHOF(testEnumStrings);
1669     }
1670
1671     virtual const UnicodeString *snext(UErrorCode &status) {
1672         if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1673             unistr=UnicodeString(testEnumStrings[i++], "");
1674             return &unistr;
1675         }
1676
1677         return NULL;
1678     }
1679
1680     virtual void reset(UErrorCode& /*status*/) {
1681         i=0;
1682     }
1683
1684     static inline UClassID getStaticClassID() {
1685         return (UClassID)&fgClassID;
1686     }
1687     virtual UClassID getDynamicClassID() const {
1688         return getStaticClassID();
1689     }
1690
1691 private:
1692     static const char fgClassID;
1693
1694     int32_t i;
1695 };
1696
1697 const char TestEnumeration::fgClassID=0;
1698
1699 void
1700 UnicodeStringTest::TestStringEnumeration() {
1701     UnicodeString s;
1702     TestEnumeration ten;
1703     int32_t i, length;
1704     UErrorCode status;
1705
1706     const UChar *pu;
1707     const char *pc;
1708
1709     // test the next() default implementation and ensureCharsCapacity()
1710     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1711         status=U_ZERO_ERROR;
1712         pc=ten.next(&length, status);
1713         s=UnicodeString(testEnumStrings[i], "");
1714         if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1715             errln("StringEnumeration.next(%d) failed", i);
1716         }
1717     }
1718     status=U_ZERO_ERROR;
1719     if(ten.next(&length, status)!=NULL) {
1720         errln("StringEnumeration.next(done)!=NULL");
1721     }
1722
1723     // test the unext() default implementation
1724     ten.reset(status);
1725     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1726         status=U_ZERO_ERROR;
1727         pu=ten.unext(&length, status);
1728         s=UnicodeString(testEnumStrings[i], "");
1729         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1730             errln("StringEnumeration.unext(%d) failed", i);
1731         }
1732     }
1733     status=U_ZERO_ERROR;
1734     if(ten.unext(&length, status)!=NULL) {
1735         errln("StringEnumeration.unext(done)!=NULL");
1736     }
1737
1738     // test that the default clone() implementation works, and returns NULL
1739     if(ten.clone()!=NULL) {
1740         errln("StringEnumeration.clone()!=NULL");
1741     }
1742
1743     // test that uenum_openFromStringEnumeration() works
1744     // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1745     StringEnumeration *newTen = new TestEnumeration;
1746     status=U_ZERO_ERROR;
1747     UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1748     if (uten==NULL || U_FAILURE(status)) {
1749         errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1750         return;
1751     }
1752
1753     // test  uenum_next()
1754     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1755         status=U_ZERO_ERROR;
1756         pc=uenum_next(uten, &length, &status);
1757         if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1758             errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1759         }
1760     }
1761     status=U_ZERO_ERROR;
1762     if(uenum_next(uten, &length, &status)!=NULL) {
1763         errln("File %s, line %d, uenum_next(done)!=NULL");
1764     }
1765
1766     // test the uenum_unext()
1767     uenum_reset(uten, &status);
1768     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1769         status=U_ZERO_ERROR;
1770         pu=uenum_unext(uten, &length, &status);
1771         s=UnicodeString(testEnumStrings[i], "");
1772         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1773             errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1774         }
1775     }
1776     status=U_ZERO_ERROR;
1777     if(uenum_unext(uten, &length, &status)!=NULL) {
1778         errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1779     }
1780
1781     uenum_close(uten);
1782 }
1783
1784 /*
1785  * Namespace test, to make sure that macros like UNICODE_STRING include the
1786  * namespace qualifier.
1787  *
1788  * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1789  */
1790 namespace bogus {
1791     class UnicodeString {
1792     public:
1793         enum EInvariant { kInvariant };
1794         UnicodeString() : i(1) {}
1795         UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
1796         UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1797 ) : i(length) {}
1798     private:
1799         int32_t i;
1800     };
1801 }
1802
1803 void
1804 UnicodeStringTest::TestNameSpace() {
1805     // Provoke name collision unless the UnicodeString macros properly
1806     // qualify the icu::UnicodeString class.
1807     using namespace bogus;
1808
1809     // Use all UnicodeString macros from unistr.h.
1810     icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1811     icu::UnicodeString s2=UNICODE_STRING("def", 3);
1812     icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1813
1814     // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1815     icu::UnicodeString s4=s1+s2+s3;
1816     if(s4.length()!=9) {
1817         errln("Something wrong with UnicodeString::operator+().");
1818     }
1819 }
1820
1821 void
1822 UnicodeStringTest::TestUTF32() {
1823     // Input string length US_STACKBUF_SIZE to cause overflow of the
1824     // initially chosen fStackBuffer due to supplementary characters.
1825     static const UChar32 utf32[] = {
1826         0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1827         0x10000, 0x20000, 0xe0000, 0x10ffff
1828     };
1829     static const UChar expected_utf16[] = {
1830         0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1831         0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1832     };
1833     UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1834     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1835     if(from32 != expected) {
1836         errln("UnicodeString::fromUTF32() did not create the expected string.");
1837     }
1838
1839     static const UChar utf16[] = {
1840         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1841     };
1842     static const UChar32 expected_utf32[] = {
1843         0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1844     };
1845     UChar32 result32[16];
1846     UErrorCode errorCode = U_ZERO_ERROR;
1847     int32_t length32 =
1848         UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1849         toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1850     if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1851         0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1852         result32[length32] != 0
1853     ) {
1854         errln("UnicodeString::toUTF32() did not create the expected string.");
1855     }
1856 }
1857
1858 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1859 public:
1860     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1861             : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
1862     virtual void Flush() { calledFlush = TRUE; }
1863     UBool calledFlush;
1864 };
1865
1866 void
1867 UnicodeStringTest::TestUTF8() {
1868     static const uint8_t utf8[] = {
1869         // Code points:
1870         // 0x41, 0xd900,
1871         // 0x61, 0xdc00,
1872         // 0x110000, 0x5a,
1873         // 0x50000, 0x7a,
1874         // 0x10000, 0x20000,
1875         // 0xe0000, 0x10ffff
1876         0x41, 0xed, 0xa4, 0x80,
1877         0x61, 0xed, 0xb0, 0x80,
1878         0xf4, 0x90, 0x80, 0x80, 0x5a,
1879         0xf1, 0x90, 0x80, 0x80, 0x7a,
1880         0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1881         0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1882     };
1883     static const UChar expected_utf16[] = {
1884         0x41, 0xfffd, 0xfffd, 0xfffd,
1885         0x61, 0xfffd, 0xfffd, 0xfffd,
1886         0xfffd,  0xfffd, 0xfffd, 0xfffd,0x5a,
1887         0xd900, 0xdc00, 0x7a,
1888         0xd800, 0xdc00, 0xd840, 0xdc00,
1889         0xdb40, 0xdc00, 0xdbff, 0xdfff
1890     };
1891     UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1892     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1893
1894     if(from8 != expected) {
1895         errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1896     }
1897     std::string utf8_string((const char *)utf8, sizeof(utf8));
1898     UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1899     if(from8b != expected) {
1900         errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1901     }
1902
1903     static const UChar utf16[] = {
1904         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1905     };
1906     static const uint8_t expected_utf8[] = {
1907         0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1908         0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1909     };
1910     UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1911
1912     char buffer[64];
1913     TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1914     us.toUTF8(sink);
1915     if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1916         0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1917     ) {
1918         errln("UnicodeString::toUTF8() did not create the expected string.");
1919     }
1920     if(!sink.calledFlush) {
1921         errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1922     }
1923     // Initial contents for testing that toUTF8String() appends.
1924     std::string result8 = "-->";
1925     std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1926     // Use the return value just for testing.
1927     std::string &result8r = us.toUTF8String(result8);
1928     if(result8r != expected8 || &result8r != &result8) {
1929         errln("UnicodeString::toUTF8String() did not create the expected string.");
1930     }
1931 }
1932
1933 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
1934 static UnicodeString wrapUChars(const UChar *uchars) {
1935     return UnicodeString(TRUE, uchars, -1);
1936 }
1937
1938 void
1939 UnicodeStringTest::TestReadOnlyAlias() {
1940     UChar uchars[]={ 0x61, 0x62, 0 };
1941     UnicodeString alias(TRUE, uchars, 2);
1942     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1943         errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1944         return;
1945     }
1946     alias.truncate(1);
1947     if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1948         errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1949     }
1950     if(alias.getTerminatedBuffer()==uchars) {
1951         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1952               "did not allocate and copy as expected.");
1953     }
1954     if(uchars[1]!=0x62) {
1955         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1956               "modified the original buffer.");
1957     }
1958     if(1!=u_strlen(alias.getTerminatedBuffer())) {
1959         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1960               "does not return a buffer terminated at the proper length.");
1961     }
1962
1963     alias.setTo(TRUE, uchars, 2);
1964     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1965         errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1966         return;
1967     }
1968     alias.remove();
1969     if(alias.length()!=0) {
1970         errln("UnicodeString(read-only-alias).remove() did not work.");
1971     }
1972     if(alias.getTerminatedBuffer()==uchars) {
1973         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1974               "did not un-alias as expected.");
1975     }
1976     if(uchars[0]!=0x61) {
1977         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1978               "modified the original buffer.");
1979     }
1980     if(0!=u_strlen(alias.getTerminatedBuffer())) {
1981         errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1982               "does not return a buffer terminated at length 0.");
1983     }
1984
1985     UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1986     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1987     alias.remove(0, 10);
1988     if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1989         errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1990     }
1991     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1992     alias.remove(27, 99);
1993     if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1994         errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1995     }
1996     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1997     alias.retainBetween(6, 30);
1998     if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
1999         errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2000     }
2001
2002     UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2003     UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2004
2005     UnicodeString temp;
2006     temp.fastCopyFrom(longString.tempSubString());
2007     if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2008         errln("UnicodeString.tempSubString() failed");
2009     }
2010     temp.fastCopyFrom(longString.tempSubString(-3, 5));
2011     if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2012         errln("UnicodeString.tempSubString(-3, 5) failed");
2013     }
2014     temp.fastCopyFrom(longString.tempSubString(17));
2015     if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2016         errln("UnicodeString.tempSubString(17) failed");
2017     }
2018     temp.fastCopyFrom(longString.tempSubString(99));
2019     if(!temp.isEmpty()) {
2020         errln("UnicodeString.tempSubString(99) failed");
2021     }
2022     temp.fastCopyFrom(longString.tempSubStringBetween(6));
2023     if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2024         errln("UnicodeString.tempSubStringBetween(6) failed");
2025     }
2026     temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2027     if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2028         errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2029     }
2030     UnicodeString bogusString;
2031     bogusString.setToBogus();
2032     temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2033     if(!temp.isBogus()) {
2034         errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2035     }
2036 }
2037
2038 void
2039 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2040     static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2041     static const UChar fg[3]={ 0x66, 0x67, 0 };
2042     if(!app.reserveAppendCapacity(12)) {
2043         errln("Appendable.reserve(12) failed");
2044     }
2045     app.appendCodeUnit(0x61);
2046     app.appendCodePoint(0x62);
2047     app.appendCodePoint(0x50000);
2048     app.appendString(cde, 3);
2049     app.appendString(fg, -1);
2050     UChar scratch[3];
2051     int32_t capacity=-1;
2052     UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2053     if(capacity<3) {
2054         errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2055         return;
2056     }
2057     static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2058     u_memcpy(buffer, hij, 3);
2059     app.appendString(buffer, 3);
2060     if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2061         errln("Appendable.append(...) failed");
2062     }
2063     buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2064     if(buffer!=NULL || capacity!=0) {
2065         errln("Appendable.getAppendBuffer(min=0) failed");
2066     }
2067     capacity=1;
2068     buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2069     if(buffer!=NULL || capacity!=0) {
2070         errln("Appendable.getAppendBuffer(scratch<min) failed");
2071     }
2072 }
2073
2074 class SimpleAppendable : public Appendable {
2075 public:
2076     explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
2077     virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
2078     SimpleAppendable &reset() { str.remove(); return *this; }
2079 private:
2080     UnicodeString &str;
2081 };
2082
2083 void
2084 UnicodeStringTest::TestAppendable() {
2085     UnicodeString dest;
2086     SimpleAppendable app(dest);
2087     doTestAppendable(dest, app);
2088 }
2089
2090 void
2091 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2092     UnicodeString dest;
2093     UnicodeStringAppendable app(dest);
2094     doTestAppendable(dest, app);
2095 }
2096
2097 void
2098 UnicodeStringTest::TestSizeofUnicodeString() {
2099     // See the comments in unistr.h near the declaration of UnicodeString's fields.
2100     // See the API comments for UNISTR_OBJECT_SIZE.
2101     size_t sizeofUniStr=sizeof(UnicodeString);
2102     size_t expected=UNISTR_OBJECT_SIZE;
2103     if(expected!=sizeofUniStr) {
2104         // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2105         // of the compiler might add more internal padding than expected.
2106         errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2107               (int)sizeofUniStr, (int)expected);
2108     }
2109     if(sizeofUniStr<32) {
2110         errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2111     }
2112     // We assume that the entire UnicodeString object,
2113     // minus the vtable pointer and 2 bytes for flags and short length,
2114     // is available for internal storage of UChars.
2115     int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2116     UnicodeString s;
2117     const UChar *emptyBuffer=s.getBuffer();
2118     for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2119         s.append((UChar)0x2e);
2120     }
2121     const UChar *fullBuffer=s.getBuffer();
2122     if(fullBuffer!=emptyBuffer) {
2123         errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2124               expectedStackBufferLength);
2125     }
2126     const UChar *terminatedBuffer=s.getTerminatedBuffer();
2127     if(terminatedBuffer==emptyBuffer) {
2128         errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2129               expectedStackBufferLength);
2130     }
2131 }
2132
2133 void
2134 UnicodeStringTest::TestMoveSwap() {
2135     static const UChar abc[3] = { 0x61, 0x62, 0x63 };  // "abc"
2136     UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc));  // read-only alias
2137     UnicodeString s2(100, 0x7a, 100);  // 100 * 'z' should be on the heap
2138     UnicodeString s3("defg", 4, US_INV);  // in stack buffer
2139     const UChar *p = s2.getBuffer();
2140     s1.swap(s2);
2141     if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2142         errln("UnicodeString.swap() did not swap");
2143     }
2144     swap(s2, s3);
2145     if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2146         errln("swap(UnicodeString) did not swap back");
2147     }
2148     UnicodeString s4;
2149     s4.moveFrom(s1);
2150     if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2151         errln("UnicodeString.moveFrom(heap) did not move");
2152     }
2153     UnicodeString s5;
2154     s5.moveFrom(s2);
2155     if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2156         errln("UnicodeString.moveFrom(stack) did not move");
2157     }
2158     UnicodeString s6;
2159     s6.moveFrom(s3);
2160     if(s6.getBuffer() != abc || s6.length() != 3) {
2161         errln("UnicodeString.moveFrom(alias) did not move");
2162     }
2163     infoln("TestMoveSwap() with rvalue references");
2164     s1 = static_cast<UnicodeString &&>(s6);
2165     if(s1.getBuffer() != abc || s1.length() != 3) {
2166         errln("UnicodeString move assignment operator did not move");
2167     }
2168     UnicodeString s7(static_cast<UnicodeString &&>(s4));
2169     if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2170         errln("UnicodeString move constructor did not move");
2171     }
2172
2173     // Move self assignment leaves the object valid but in an undefined state.
2174     // Do it to make sure there is no crash,
2175     // but do not check for any particular resulting value.
2176     s1.moveFrom(s1);
2177     s2.moveFrom(s2);
2178     s3.moveFrom(s3);
2179     s4.moveFrom(s4);
2180     s5.moveFrom(s5);
2181     s6.moveFrom(s6);
2182     s7.moveFrom(s7);
2183     // Simple copy assignment must work.
2184     UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2185     s1 = s6 = s4 = s7 = simple;
2186     if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2187         errln("UnicodeString copy after self-move did not work");
2188     }
2189 }
2190
2191 void
2192 UnicodeStringTest::TestUInt16Pointers() {
2193     static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2194     uint16_t arr[4];
2195
2196     UnicodeString expected(u"abc");
2197     assertEquals("abc from pointer", expected, UnicodeString(carr));
2198     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2199     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2200
2201     UnicodeString alias(arr, 0, 4);
2202     alias.append(u'a').append(u'b').append(u'c');
2203     assertEquals("abc from writable alias", expected, alias);
2204     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2205
2206     UErrorCode errorCode = U_ZERO_ERROR;
2207     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2208     TEST_ASSERT_STATUS(errorCode);
2209     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2210 }
2211
2212 void
2213 UnicodeStringTest::TestWCharPointers() {
2214 #if U_SIZEOF_WCHAR_T==2
2215     static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2216     wchar_t arr[4];
2217
2218     UnicodeString expected(u"abc");
2219     assertEquals("abc from pointer", expected, UnicodeString(carr));
2220     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2221     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2222
2223     UnicodeString alias(arr, 0, 4);
2224     alias.append(u'a').append(u'b').append(u'c');
2225     assertEquals("abc from writable alias", expected, alias);
2226     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2227
2228     UErrorCode errorCode = U_ZERO_ERROR;
2229     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2230     TEST_ASSERT_STATUS(errorCode);
2231     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2232 #endif
2233 }
2234
2235 void
2236 UnicodeStringTest::TestNullPointers() {
2237     assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2238     assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2239     assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty());
2240
2241     UnicodeString alias(nullptr, 4, 4);  // empty, no alias
2242     assertTrue("empty from writable alias", alias.isEmpty());
2243     alias.append(u'a').append(u'b').append(u'c');
2244     UnicodeString expected(u"abc");
2245     assertEquals("abc from writable alias", expected, alias);
2246
2247     UErrorCode errorCode = U_ZERO_ERROR;
2248     UnicodeString(u"def").extract(nullptr, 0, errorCode);
2249     assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2250 }