icuSources/test/intltest/ustrtest.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /********************************************************************
   4  * COPYRIGHT:
   5  * Copyright (c) 1997-2016, International Business Machines Corporation and
   6  * others. All Rights Reserved.
   7  ********************************************************************/
   8
   9 #include <utility>
  10
  11 #include "ustrtest.h"
  12 #include "unicode/appendable.h"
  13 #include "unicode/std_string.h"
  14 #include "unicode/unistr.h"
  15 #include "unicode/uchar.h"
  16 #include "unicode/ustring.h"
  17 #include "unicode/locid.h"
  18 #include "unicode/strenum.h"
  19 #include "unicode/ucnv.h"
  20 #include "unicode/uenum.h"
  21 #include "unicode/utf16.h"
  22 #include "cmemory.h"
  23 #include "charstr.h"
  24
  25 #if 0
  26 #include "unicode/ustream.h"
  27
  28 #include <iostream>
  29 using namespace std;
  30
  31 #endif
  32
  33 UnicodeStringTest::~UnicodeStringTest() {}
  34
  35 extern IntlTest *createStringCaseTest();
  36
  37 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
  38 {
  39     if (exec) logln("TestSuite UnicodeStringTest: ");
  40     TESTCASE_AUTO_BEGIN;
  41     TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
  42     TESTCASE_AUTO(TestBasicManipulation);
  43     TESTCASE_AUTO(TestCompare);
  44     TESTCASE_AUTO(TestExtract);
  45     TESTCASE_AUTO(TestRemoveReplace);
  46     TESTCASE_AUTO(TestSearching);
  47     TESTCASE_AUTO(TestSpacePadding);
  48     TESTCASE_AUTO(TestPrefixAndSuffix);
  49     TESTCASE_AUTO(TestFindAndReplace);
  50     TESTCASE_AUTO(TestBogus);
  51     TESTCASE_AUTO(TestReverse);
  52     TESTCASE_AUTO(TestMiscellaneous);
  53     TESTCASE_AUTO(TestStackAllocation);
  54     TESTCASE_AUTO(TestUnescape);
  55     TESTCASE_AUTO(TestCountChar32);
  56     TESTCASE_AUTO(TestStringEnumeration);
  57     TESTCASE_AUTO(TestNameSpace);
  58     TESTCASE_AUTO(TestUTF32);
  59     TESTCASE_AUTO(TestUTF8);
  60     TESTCASE_AUTO(TestReadOnlyAlias);
  61     TESTCASE_AUTO(TestAppendable);
  62     TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
  63     TESTCASE_AUTO(TestSizeofUnicodeString);
  64     TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
  65     TESTCASE_AUTO(TestMoveSwap);
  66     TESTCASE_AUTO(TestUInt16Pointers);
  67     TESTCASE_AUTO(TestWCharPointers);
  68     TESTCASE_AUTO(TestNullPointers);
  69     TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
  70     TESTCASE_AUTO_END;
  71 }
  72
  73 void
  74 UnicodeStringTest::TestBasicManipulation()
  75 {
  76     UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
  77     UnicodeString   expectedValue;
  78     UnicodeString   *c;
  79
  80     c=test1.clone();
  81     test1.insert(24, "good ");
  82     expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
  83     if (test1 != expectedValue)
  84         errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
  85
  86     c->insert(24, "good ");
  87     if(*c != expectedValue) {
  88         errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
  89     }
  90     delete c;
  91
  92     test1.remove(41, 8);
  93     expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
  94     if (test1 != expectedValue)
  95         errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
  96
  97     test1.replace(58, 6, "ir country");
  98     expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
  99     if (test1 != expectedValue)
 100         errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
 101
 102     UChar     temp[80];
 103     test1.extract(0, 15, temp);
 104
 105     UnicodeString       test2(temp, 15);
 106
 107     expectedValue = "Now is the time";
 108     if (test2 != expectedValue)
 109         errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
 110
 111     test2 += " for me to go!\n";
 112     expectedValue = "Now is the time for me to go!\n";
 113     if (test2 != expectedValue)
 114         errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
 115
 116     if (test1.length() != 70)
 117         errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
 118     if (test2.length() != 30)
 119         errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
 120
 121     UnicodeString test3;
 122     test3.append((UChar32)0x20402);
 123     if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
 124         errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
 125     }
 126     if(test3.length() != 2){
 127         errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
 128     }
 129     test3.append((UChar32)0x0074);
 130     if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
 131         errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
 132     }
 133     if(test3.length() != 3){
 134         errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
 135     }
 136
 137     // test some UChar32 overloads
 138     if( test3.setTo((UChar32)0x10330).length() != 2 ||
 139         test3.insert(0, (UChar32)0x20100).length() != 4 ||
 140         test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
 141         (test3 = (UChar32)0x14001).length() != 2
 142     ) {
 143         errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
 144     }
 145
 146     {
 147         // test moveIndex32()
 148         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
 149
 150         if(
 151             s.moveIndex32(2, -1)!=0 ||
 152             s.moveIndex32(2, 1)!=4 ||
 153             s.moveIndex32(2, 2)!=5 ||
 154             s.moveIndex32(5, -2)!=2 ||
 155             s.moveIndex32(0, -1)!=0 ||
 156             s.moveIndex32(6, 1)!=6
 157         ) {
 158             errln("UnicodeString::moveIndex32() failed");
 159         }
 160
 161         if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
 162             errln("UnicodeString::getChar32Start() failed");
 163         }
 164
 165         if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
 166             errln("UnicodeString::getChar32Limit() failed");
 167         }
 168     }
 169
 170     {
 171         // test new 2.2 constructors and setTo function that parallel Java's substring function.
 172         UnicodeString src("Hello folks how are you?");
 173         UnicodeString target1("how are you?");
 174         if (target1 != UnicodeString(src, 12)) {
 175             errln("UnicodeString(const UnicodeString&, int32_t) failed");
 176         }
 177         UnicodeString target2("folks");
 178         if (target2 != UnicodeString(src, 6, 5)) {
 179             errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
 180         }
 181         if (target1 != target2.setTo(src, 12)) {
 182             errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
 183         }
 184     }
 185
 186     {
 187         // op+ is new in ICU 2.8
 188         UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
 189         if(s!=UnicodeString("abcdefghi", "")) {
 190             errln("operator+(UniStr, UniStr) failed");
 191         }
 192     }
 193
 194     {
 195         // tests for Jitterbug 2360
 196         // verify that APIs with source pointer + length accept length == -1
 197         // mostly test only where modified, only few functions did not already do this
 198         if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
 199             errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
 200         }
 201
 202         UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
 203         UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
 204
 205         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
 206             errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
 207         }
 208         if(t.length()!=u_strlen(buffer)) {
 209             errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
 210         }
 211
 212         if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
 213             errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
 214         }
 215         if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
 216             errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
 217         }
 218
 219         buffer[u_strlen(buffer)]=0xe4;
 220         UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
 221         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
 222             errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
 223         }
 224         if(u.length()!=UPRV_LENGTHOF(buffer)) {
 225             errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
 226         }
 227
 228         static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
 229         UConverter *cnv;
 230         UErrorCode errorCode=U_ZERO_ERROR;
 231
 232         cnv=ucnv_open("ISO-8859-1", &errorCode);
 233         UnicodeString v(cs, -1, cnv, errorCode);
 234         ucnv_close(cnv);
 235         if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
 236             errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
 237         }
 238     }
 239
 240 #if U_CHARSET_IS_UTF8
 241     {
 242         // Test the hardcoded-UTF-8 UnicodeString optimizations.
 243         static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
 244         static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
 245         UnicodeString from8a = UnicodeString((const char *)utf8);
 246         UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
 247         UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
 248         if(from8a != from16 || from8b != from16) {
 249             errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
 250         }
 251         char buffer[16];
 252         int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
 253         if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
 254             errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
 255         }
 256         length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
 257         if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
 258             errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
 259         }
 260     }
 261 #endif
 262 }
 263
 264 void
 265 UnicodeStringTest::TestCompare()
 266 {
 267     UnicodeString   test1("this is a test");
 268     UnicodeString   test2("this is a test");
 269     UnicodeString   test3("this is a test of the emergency broadcast system");
 270     UnicodeString   test4("never say, \"this is a test\"!!");
 271
 272     UnicodeString   test5((UChar)0x5000);
 273     UnicodeString   test6((UChar)0x5100);
 274
 275     UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
 276                  0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
 277     char            chars[] = "this is a test";
 278
 279     // test operator== and operator!=
 280     if (test1 != test2 || test1 == test3 || test1 == test4)
 281         errln("operator== or operator!= failed");
 282
 283     // test operator> and operator<
 284     if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
 285         !(test5 < test6)
 286     ) {
 287         errln("operator> or operator< failed");
 288     }
 289
 290     // test operator>= and operator<=
 291     if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
 292         errln("operator>= or operator<= failed");
 293
 294     // test compare(UnicodeString)
 295     if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
 296         errln("compare(UnicodeString) failed");
 297
 298     //test compare(offset, length, UnicodeString)
 299     if(test1.compare(0, 14, test2) != 0 ||
 300         test3.compare(0, 14, test2) != 0 ||
 301         test4.compare(12, 14, test2) != 0 ||
 302         test3.compare(0, 18, test1) <=0  )
 303         errln("compare(offset, length, UnicodeString) failes");
 304
 305     // test compare(UChar*)
 306     if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
 307         errln("compare(UChar*) failed");
 308
 309     // test compare(char*)
 310     if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
 311         errln("compare(char*) failed");
 312
 313     // test compare(UChar*, length)
 314     if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
 315         errln("compare(UChar*, length) failed");
 316
 317     // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
 318     if (test1.compare(0, 14, test2, 0, 14) != 0
 319     || test1.compare(0, 14, test3, 0, 14) != 0
 320     || test1.compare(0, 14, test4, 12, 14) != 0)
 321         errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
 322
 323     if (test1.compare(10, 4, test2, 0, 4) >= 0
 324     || test1.compare(10, 4, test3, 22, 9) <= 0
 325     || test1.compare(10, 4, test4, 22, 4) != 0)
 326         errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
 327
 328     // test compareBetween
 329     if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
 330                     || test1.compareBetween(0, 14, test4, 12, 26) != 0)
 331         errln("compareBetween failed");
 332
 333     if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
 334                     || test1.compareBetween(10, 14, test4, 22, 26) != 0)
 335         errln("compareBetween failed");
 336
 337     // test compare() etc. with strings that share a buffer but are not equal
 338     test2=test1; // share the buffer, length() too large for the stackBuffer
 339     test2.truncate(1); // change only the length, not the buffer
 340     if( test1==test2 || test1<=test2 ||
 341         test1.compare(test2)<=0 ||
 342         test1.compareCodePointOrder(test2)<=0 ||
 343         test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
 344         test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
 345         test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
 346         test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
 347     ) {
 348         errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
 349     }
 350
 351     /* test compareCodePointOrder() */
 352     {
 353         /* these strings are in ascending order */
 354         static const UChar strings[][4]={
 355             { 0x61, 0 },                    /* U+0061 */
 356             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
 357             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
 358             { 0xd800, 0 },                  /* U+d800 */
 359             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
 360             { 0xdfff, 0 },                  /* U+dfff */
 361             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
 362             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
 363             { 0xd800, 0xdc02, 0 },          /* U+10002 */
 364             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
 365         };
 366         UnicodeString u[20]; // must be at least as long as strings[]
 367         int32_t i;
 368
 369         for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
 370             u[i]=UnicodeString(TRUE, strings[i], -1);
 371         }
 372
 373         for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
 374             if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
 375                 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
 376             }
 377         }
 378     }
 379
 380     /* test caseCompare() */
 381     {
 382         static const UChar
 383         _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
 384         _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
 385         _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
 386         _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
 387
 388         UnicodeString
 389             mixed(TRUE, _mixed, -1),
 390             otherDefault(TRUE, _otherDefault, -1),
 391             otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
 392             different(TRUE, _different, -1);
 393
 394         int8_t result;
 395
 396         /* test caseCompare() */
 397         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
 398         if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
 399             errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
 400         }
 401         result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
 402         if(result!=0) {
 403             errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
 404         }
 405         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
 406         if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
 407             errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
 408         }
 409
 410         /* test caseCompare() */
 411         result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
 412         if(result<=0) {
 413             errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
 414         }
 415
 416         /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
 417         result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
 418         if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
 419             errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
 420         }
 421
 422         /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
 423         result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
 424         if(result<=0) {
 425             errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
 426         }
 427     }
 428
 429     // test that srcLength=-1 is handled in functions that
 430     // take input const UChar */int32_t srcLength (j785)
 431     {
 432         static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
 433         UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
 434
 435         if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
 436             errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
 437         }
 438
 439         if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
 440             errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
 441         }
 442
 443         if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
 444             errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
 445         }
 446
 447         if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
 448             errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
 449         }
 450
 451         if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
 452             errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
 453         }
 454
 455         UnicodeString s2, s3;
 456         s2.replace(0, 0, u+1, -1);
 457         s3.replace(0, 0, u, 1, -1);
 458         if(s.compare(1, 999, s2)!=0 || s2!=s3) {
 459             errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
 460         }
 461     }
 462 }
 463
 464 void
 465 UnicodeStringTest::TestExtract()
 466 {
 467     UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
 468     UnicodeString  test2;
 469     UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
 470     char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
 471     UnicodeString  test5;
 472     char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
 473
 474     test1.extract(11, 12, test2);
 475     test1.extract(11, 12, test3);
 476     if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
 477         errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
 478     }
 479
 480     // test proper pinning in extractBetween()
 481     test1.extractBetween(-3, 7, test5);
 482     if(test5!=UNICODE_STRING("Now is ", 7)) {
 483         errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
 484     }
 485
 486     test1.extractBetween(11, 23, test5);
 487     if (test1.extract(60, 71, test6) != 9) {
 488         errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
 489     }
 490     if (test1.extract(11, 12, test6) != 12) {
 491         errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
 492     }
 493
 494     // convert test4 back to Unicode for comparison
 495     UnicodeString test4b(test4, 12);
 496
 497     if (test1.extract(11, 12, (char *)NULL) != 12) {
 498         errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
 499     }
 500     if (test1.extract(11, -1, test6) != 0) {
 501         errln("UnicodeString.extract(-1) failed to stop reading the string.");
 502     }
 503
 504     for (int32_t i = 0; i < 12; i++) {
 505         if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
 506             errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
 507             break;
 508         }
 509         if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
 510             errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
 511             break;
 512         }
 513         if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
 514             errln(UnicodeString("extracting into an array of char failed at position ") + i);
 515             break;
 516         }
 517         if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
 518             errln(UnicodeString("extracting with extractBetween failed at position ") + i);
 519             break;
 520         }
 521     }
 522
 523     // test preflighting and overflows with invariant conversion
 524     if (test1.extract(0, 10, (char *)NULL, "") != 10) {
 525         errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
 526     }
 527
 528     test4[2] = (char)0xff;
 529     if (test1.extract(0, 10, test4, 2, "") != 10) {
 530         errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
 531     }
 532     if (test4[2] != (char)0xff) {
 533         errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
 534     }
 535
 536     {
 537         // test new, NUL-terminating extract() function
 538         UnicodeString s("terminate", "");
 539         UChar dest[20]={
 540             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
 541             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
 542         };
 543         UErrorCode errorCode;
 544         int32_t length;
 545
 546         errorCode=U_ZERO_ERROR;
 547         length=s.extract((UChar *)NULL, 0, errorCode);
 548         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
 549             errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
 550         }
 551
 552         errorCode=U_ZERO_ERROR;
 553         length=s.extract(dest, s.length()-1, errorCode);
 554         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
 555             errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
 556                 length, u_errorName(errorCode), s.length());
 557         }
 558
 559         errorCode=U_ZERO_ERROR;
 560         length=s.extract(dest, s.length(), errorCode);
 561         if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
 562             errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
 563                 length, u_errorName(errorCode), s.length());
 564         }
 565         if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
 566             errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
 567         }
 568
 569         errorCode=U_ZERO_ERROR;
 570         length=s.extract(dest, s.length()+1, errorCode);
 571         if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
 572             errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
 573                 length, u_errorName(errorCode), s.length());
 574         }
 575         if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
 576             errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
 577         }
 578     }
 579
 580     {
 581         // test new UConverter extract() and constructor
 582         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
 583         char buffer[32];
 584         static const char expect[]={
 585             (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
 586             (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
 587             (char)0xc3, (char)0x84,
 588             (char)0xe1, (char)0xbb, (char)0x90
 589         };
 590         UErrorCode errorCode=U_ZERO_ERROR;
 591         UConverter *cnv=ucnv_open("UTF-8", &errorCode);
 592         int32_t length;
 593
 594         if(U_SUCCESS(errorCode)) {
 595             // test preflighting
 596             if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
 597                 errorCode!=U_BUFFER_OVERFLOW_ERROR
 598             ) {
 599                 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
 600                       length, u_errorName(errorCode));
 601             }
 602             errorCode=U_ZERO_ERROR;
 603             if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
 604                 errorCode!=U_BUFFER_OVERFLOW_ERROR
 605             ) {
 606                 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
 607                       length, u_errorName(errorCode));
 608             }
 609
 610             // try error cases
 611             errorCode=U_ZERO_ERROR;
 612             if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
 613                 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
 614             }
 615             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 616             if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
 617                 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
 618             }
 619             errorCode=U_ZERO_ERROR;
 620
 621             // extract for real
 622             if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
 623                 uprv_memcmp(buffer, expect, 13)!=0 ||
 624                 buffer[13]!=0 ||
 625                 U_FAILURE(errorCode)
 626             ) {
 627                 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
 628                       length, u_errorName(errorCode));
 629             }
 630             // Test again with just the converter name.
 631             if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
 632                 uprv_memcmp(buffer, expect, 13)!=0 ||
 633                 buffer[13]!=0 ||
 634                 U_FAILURE(errorCode)
 635             ) {
 636                 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
 637                       length, u_errorName(errorCode));
 638             }
 639
 640             // try the constructor
 641             UnicodeString t(expect, sizeof(expect), cnv, errorCode);
 642             if(U_FAILURE(errorCode) || s!=t) {
 643                 errln("UnicodeString(UConverter) conversion failed (%s)",
 644                       u_errorName(errorCode));
 645             }
 646
 647             ucnv_close(cnv);
 648         }
 649     }
 650 }
 651
 652 void
 653 UnicodeStringTest::TestRemoveReplace()
 654 {
 655     UnicodeString   test1("The rain in Spain stays mainly on the plain");
 656     UnicodeString   test2("eat SPAMburgers!");
 657     UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
 658     char            test4[] = "SPAM";
 659     UnicodeString&  test5 = test1;
 660
 661     test1.replace(4, 4, test2, 4, 4);
 662     test1.replace(12, 5, test3, 4);
 663     test3[4] = 0;
 664     test1.replace(17, 4, test3);
 665     test1.replace(23, 4, test4);
 666     test1.replaceBetween(37, 42, test2, 4, 8);
 667
 668     if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
 669         errln("One of the replace methods failed:\n"
 670               "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
 671               "  got \"" + test1 + "\"");
 672
 673     test1.remove(21, 1);
 674     test1.removeBetween(26, 28);
 675
 676     if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
 677         errln("One of the remove methods failed:\n"
 678               "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
 679               "  got \"" + test1 + "\"");
 680
 681     for (int32_t i = 0; i < test1.length(); i++) {
 682         if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
 683             test1.setCharAt(i, 0x78);
 684         }
 685     }
 686
 687     if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
 688         errln("One of the remove methods failed:\n"
 689               "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
 690               "  got \"" + test1 + "\"");
 691
 692     test1.remove();
 693     if (test1.length() != 0)
 694         errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
 695 }
 696
 697 void
 698 UnicodeStringTest::TestSearching()
 699 {
 700     UnicodeString test1("test test ttest tetest testesteststt");
 701     UnicodeString test2("test");
 702     UChar testChar = 0x74;
 703
 704     UChar32 testChar32 = 0x20402;
 705     UChar testData[]={
 706         //   0       1       2       3       4       5       6       7
 707         0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
 708
 709         //   8       9      10      11      12      13      14      15
 710         0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
 711
 712         //  16      17      18      19
 713         0xdc02, 0xd841, 0x0073, 0x0000
 714     };
 715     UnicodeString test3(testData);
 716     UnicodeString test4(testChar32);
 717
 718     uint16_t occurrences = 0;
 719     int32_t startPos = 0;
 720     for ( ;
 721           startPos != -1 && startPos < test1.length();
 722           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
 723         ;
 724     if (occurrences != 6)
 725         errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
 726
 727     for ( occurrences = 0, startPos = 10;
 728           startPos != -1 && startPos < test1.length();
 729           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
 730         ;
 731     if (occurrences != 4)
 732         errln(UnicodeString("indexOf with starting offset failed: "
 733                             "expected to find 4 occurrences, found ") + occurrences);
 734
 735     int32_t endPos = 28;
 736     for ( occurrences = 0, startPos = 5;
 737           startPos != -1 && startPos < test1.length();
 738           (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
 739         ;
 740     if (occurrences != 4)
 741         errln(UnicodeString("indexOf with starting and ending offsets failed: "
 742                             "expected to find 4 occurrences, found ") + occurrences);
 743
 744     //using UChar32 string
 745     for ( startPos=0, occurrences=0;
 746           startPos != -1 && startPos < test3.length();
 747           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
 748         ;
 749     if (occurrences != 4)
 750         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
 751
 752     for ( startPos=10, occurrences=0;
 753           startPos != -1 && startPos < test3.length();
 754           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
 755         ;
 756     if (occurrences != 2)
 757         errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
 758     //---
 759
 760     for ( occurrences = 0, startPos = 0;
 761           startPos != -1 && startPos < test1.length();
 762           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 763         ;
 764     if (occurrences != 16)
 765         errln(UnicodeString("indexOf with character failed: "
 766                             "expected to find 16 occurrences, found ") + occurrences);
 767
 768     for ( occurrences = 0, startPos = 10;
 769           startPos != -1 && startPos < test1.length();
 770           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 771         ;
 772     if (occurrences != 12)
 773         errln(UnicodeString("indexOf with character & start offset failed: "
 774                             "expected to find 12 occurrences, found ") + occurrences);
 775
 776     for ( occurrences = 0, startPos = 5, endPos = 28;
 777           startPos != -1 && startPos < test1.length();
 778           (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 779         ;
 780     if (occurrences != 10)
 781         errln(UnicodeString("indexOf with character & start & end offsets failed: "
 782                             "expected to find 10 occurrences, found ") + occurrences);
 783
 784     //testing for UChar32
 785     UnicodeString subString;
 786     for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
 787         subString.append(test3, startPos, test3.length());
 788         if(subString.indexOf(testChar32) != -1 ){
 789              ++occurrences;
 790         }
 791         subString.remove();
 792     }
 793     if (occurrences != 14)
 794         errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
 795
 796     for ( occurrences = 0, startPos = 0;
 797           startPos != -1 && startPos < test3.length();
 798           (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 799         ;
 800     if (occurrences != 4)
 801         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
 802
 803     endPos=test3.length();
 804     for ( occurrences = 0, startPos = 5;
 805           startPos != -1 && startPos < test3.length();
 806           (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 807         ;
 808     if (occurrences != 3)
 809         errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
 810     //---
 811
 812     if(test1.lastIndexOf(test2)!=29) {
 813         errln("test1.lastIndexOf(test2)!=29");
 814     }
 815
 816     if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
 817         errln("test1.lastIndexOf(test2, start) failed");
 818     }
 819
 820     for ( occurrences = 0, startPos = 32;
 821           startPos != -1;
 822           (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
 823         ;
 824     if (occurrences != 4)
 825         errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
 826                             "expected to find 4 occurrences, found ") + occurrences);
 827
 828     for ( occurrences = 0, startPos = 32;
 829           startPos != -1;
 830           (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
 831         ;
 832     if (occurrences != 11)
 833         errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
 834                             "expected to find 11 occurrences, found ") + occurrences);
 835
 836     //testing UChar32
 837     startPos=test3.length();
 838     for ( occurrences = 0;
 839           startPos != -1;
 840           (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
 841         ;
 842     if (occurrences != 3)
 843         errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
 844
 845
 846     for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
 847         subString.remove();
 848         subString.append(test3, 0, endPos);
 849         if(subString.lastIndexOf(testChar32) != -1 ){
 850             ++occurrences;
 851         }
 852     }
 853     if (occurrences != 18)
 854         errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
 855     //---
 856
 857     // test that indexOf(UChar32) and lastIndexOf(UChar32)
 858     // do not find surrogate code points when they are part of matched pairs
 859     // (= part of supplementary code points)
 860     // Jitterbug 1542
 861     if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
 862         errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
 863     }
 864     if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
 865         UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
 866         test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
 867     ) {
 868         errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
 869     }
 870 }
 871
 872 void
 873 UnicodeStringTest::TestSpacePadding()
 874 {
 875     UnicodeString test1("hello");
 876     UnicodeString test2("   there");
 877     UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
 878     UnicodeString test4;
 879     UBool returnVal;
 880     UnicodeString expectedValue;
 881
 882     returnVal = test1.padLeading(15);
 883     expectedValue = "          hello";
 884     if (returnVal == FALSE || test1 != expectedValue)
 885         errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
 886
 887     returnVal = test2.padTrailing(15);
 888     expectedValue = "   there       ";
 889     if (returnVal == FALSE || test2 != expectedValue)
 890         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
 891
 892     expectedValue = test3;
 893     returnVal = test3.padTrailing(15);
 894     if (returnVal == TRUE || test3 != expectedValue)
 895         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
 896
 897     expectedValue = "hello";
 898     test4.setTo(test1).trim();
 899
 900     if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
 901         errln("trim(UnicodeString&) failed");
 902
 903     test1.trim();
 904     if (test1 != expectedValue)
 905         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
 906
 907     test2.trim();
 908     expectedValue = "there";
 909     if (test2 != expectedValue)
 910         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
 911
 912     test3.trim();
 913     expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
 914     if (test3 != expectedValue)
 915         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
 916
 917     returnVal = test1.truncate(15);
 918     expectedValue = "hello";
 919     if (returnVal == TRUE || test1 != expectedValue)
 920         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
 921
 922     returnVal = test2.truncate(15);
 923     expectedValue = "there";
 924     if (returnVal == TRUE || test2 != expectedValue)
 925         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
 926
 927     returnVal = test3.truncate(15);
 928     expectedValue = "Hi!  How ya doi";
 929     if (returnVal == FALSE || test3 != expectedValue)
 930         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
 931 }
 932
 933 void
 934 UnicodeStringTest::TestPrefixAndSuffix()
 935 {
 936     UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
 937     UnicodeString test2("Now");
 938     UnicodeString test3("country.");
 939     UnicodeString test4("count");
 940
 941     if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
 942         errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
 943     }
 944
 945     if (test1.startsWith(test3) ||
 946         test1.startsWith(test3.getBuffer(), test3.length()) ||
 947         test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
 948     ) {
 949         errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
 950     }
 951
 952     if (test1.endsWith(test2)) {
 953         errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
 954     }
 955
 956     if (!test1.endsWith(test3)) {
 957         errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 958     }
 959     if (!test1.endsWith(test3, 0, INT32_MAX)) {
 960         errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 961     }
 962
 963     if(!test1.endsWith(test3.getBuffer(), test3.length())) {
 964         errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 965     }
 966     if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
 967         errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 968     }
 969
 970     if (!test3.startsWith(test4)) {
 971         errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
 972     }
 973
 974     if (test4.startsWith(test3)) {
 975         errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
 976     }
 977 }
 978
 979 void
 980 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
 981     UnicodeString test("abcde");
 982     const UChar ab[] = { 0x61, 0x62, 0 };
 983     const UChar de[] = { 0x64, 0x65, 0 };
 984     assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
 985     assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
 986     assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
 987     assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
 988 }
 989
 990 void
 991 UnicodeStringTest::TestFindAndReplace()
 992 {
 993     UnicodeString test1("One potato, two potato, three potato, four\n");
 994     UnicodeString test2("potato");
 995     UnicodeString test3("MISSISSIPPI");
 996
 997     UnicodeString expectedValue;
 998
 999     test1.findAndReplace(test2, test3);
1000     expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1001     if (test1 != expectedValue)
1002         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1003     test1.findAndReplace(2, 32, test3, test2);
1004     expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1005     if (test1 != expectedValue)
1006         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1007 }
1008
1009 void
1010 UnicodeStringTest::TestReverse()
1011 {
1012     UnicodeString test("backwards words say to used I");
1013
1014     test.reverse();
1015     test.reverse(2, 4);
1016     test.reverse(7, 2);
1017     test.reverse(10, 3);
1018     test.reverse(14, 5);
1019     test.reverse(20, 9);
1020
1021     if (test != "I used to say words backwards")
1022         errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1023             + test + "\"");
1024
1025     test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1026     test.reverse();
1027     if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1028         errln("reverse() failed with supplementary characters");
1029     }
1030
1031     // Test case for ticket #8091:
1032     // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1033     // an odd-length string that contains no other lead surrogates.
1034     test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1035     UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1036     test.reverse();
1037     if(test!=expected) {
1038         errln("reverse() failed with only lead surrogate in the middle");
1039     }
1040 }
1041
1042 void
1043 UnicodeStringTest::TestMiscellaneous()
1044 {
1045     UnicodeString   test1("This is a test");
1046     UnicodeString   test2("This is a test");
1047     UnicodeString   test3("Me too!");
1048
1049     // test getBuffer(minCapacity) and releaseBuffer()
1050     test1=UnicodeString(); // make sure that it starts with its stackBuffer
1051     UChar *p=test1.getBuffer(20);
1052     if(test1.getCapacity()<20) {
1053         errln("UnicodeString::getBuffer(20).getCapacity()<20");
1054     }
1055
1056     test1.append((UChar)7); // must not be able to modify the string here
1057     test1.setCharAt(3, 7);
1058     test1.reverse();
1059     if( test1.length()!=0 ||
1060         test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1061         test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1062     ) {
1063         errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1064     }
1065
1066     p[0]=1;
1067     p[1]=2;
1068     p[2]=3;
1069     test1.releaseBuffer(3);
1070     test1.append((UChar)4);
1071
1072     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1073         errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1074     }
1075
1076     // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1077     test1.releaseBuffer(1);
1078     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1079         errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1080     }
1081
1082     // test getBuffer(const)
1083     const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1084     if( test1.length()!=4 ||
1085         q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1086         r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1087     ) {
1088         errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1089     }
1090
1091     // test releaseBuffer() with a NUL-terminated buffer
1092     test1.getBuffer(20)[2]=0;
1093     test1.releaseBuffer(); // implicit -1
1094     if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1095         errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1096     }
1097
1098     // test releaseBuffer() with a non-NUL-terminated buffer
1099     p=test1.getBuffer(256);
1100     for(int32_t i=0; i<test1.getCapacity(); ++i) {
1101         p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1102     }
1103     test1.releaseBuffer();  // implicit -1
1104     if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1105         errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1106     }
1107
1108     // test getTerminatedBuffer()
1109     test1=UnicodeString("This is another test.", "");
1110     test2=UnicodeString("This is another test.", "");
1111     q=test1.getTerminatedBuffer();
1112     if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1113         errln("getTerminatedBuffer()[length]!=0");
1114     }
1115
1116     const UChar u[]={ 5, 6, 7, 8, 0 };
1117     test1.setTo(FALSE, u, 3);
1118     q=test1.getTerminatedBuffer();
1119     if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1120         errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1121     }
1122
1123     test1.setTo(TRUE, u, -1);
1124     q=test1.getTerminatedBuffer();
1125     if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1126         errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1127     }
1128
1129     // NOTE: Some compilers will optimize u"la" to point to the same static memory
1130     // as u" lila", offset by 3 code units
1131     test1=UnicodeString(TRUE, u"la", 2);
1132     test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1);
1133     assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1134         u"la lila", test1);
1135
1136     test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX);
1137     assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1138         u"la dudum lila", test1);
1139
1140     static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1141     test1.insert(9, ucs, -1);
1142     assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1143         u"la dudum hm lila", test1);
1144
1145     test1.replace(9, 2, (UChar)0x2b);
1146     assertEquals("UnicodeString::replace(start, length, UChar) failed",
1147         u"la dudum + lila", test1);
1148
1149     if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1150         errln("UnicodeString::hasMetaData() returns TRUE");
1151     }
1152
1153     // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1154     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1155     test1.truncate(36);  // ensure length()<getCapacity()
1156     test2=test1;  // share the buffer
1157     test1.truncate(5);
1158     if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1159         errln("UnicodeString(shared buffer).truncate() failed");
1160     }
1161     if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1162         errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1163               "modified another copy of the string!");
1164     }
1165     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1166     test1.truncate(36);  // ensure length()<getCapacity()
1167     test2=test1;  // share the buffer
1168     test1.remove();
1169     if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1170         errln("UnicodeString(shared buffer).remove() failed");
1171     }
1172     if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1173         errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1174               "modified another copy of the string!");
1175     }
1176
1177     // ticket #9740
1178     test1.setTo(TRUE, ucs, 3);
1179     assertEquals("length of read-only alias", 3, test1.length());
1180     test1.trim();
1181     assertEquals("length of read-only alias after trim()", 2, test1.length());
1182     assertEquals("length of terminated buffer of read-only alias + trim()",
1183                  2, u_strlen(test1.getTerminatedBuffer()));
1184 }
1185
1186 void
1187 UnicodeStringTest::TestStackAllocation()
1188 {
1189     UChar           testString[] ={
1190         0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1191     UChar           guardWord = 0x4DED;
1192     UnicodeString*  test = 0;
1193
1194     test = new  UnicodeString(testString);
1195     if (*test != "This is a crazy test.")
1196         errln("Test string failed to initialize properly.");
1197     if (guardWord != 0x04DED)
1198         errln("Test string initialization overwrote guard word!");
1199
1200     test->insert(8, "only ");
1201     test->remove(15, 6);
1202     if (*test != "This is only a test.")
1203         errln("Manipulation of test string failed to work right.");
1204     if (guardWord != 0x4DED)
1205         errln("Manipulation of test string overwrote guard word!");
1206
1207     // we have to deinitialize and release the backing store by calling the destructor
1208     // explicitly, since we can't overload operator delete
1209     delete test;
1210
1211     UChar workingBuffer[] = {
1212         0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1213         0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1214         0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1215         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1216         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1217     UChar guardWord2 = 0x4DED;
1218
1219     test = new UnicodeString(workingBuffer, 35, 100);
1220     if (*test != "Now is the time for all men to come")
1221         errln("Stack-allocated backing store failed to initialize correctly.");
1222     if (guardWord2 != 0x4DED)
1223         errln("Stack-allocated backing store overwrote guard word!");
1224
1225     test->insert(24, "good ");
1226     if (*test != "Now is the time for all good men to come")
1227         errln("insert() on stack-allocated UnicodeString didn't work right");
1228     if (guardWord2 != 0x4DED)
1229         errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1230
1231     if (workingBuffer[24] != 0x67)
1232         errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1233
1234     *test += " to the aid of their country.";
1235     if (*test != "Now is the time for all good men to come to the aid of their country.")
1236         errln("Stack-allocated UnicodeString overflow didn't work");
1237     if (guardWord2 != 0x4DED)
1238         errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1239
1240     *test = "ha!";
1241     if (*test != "ha!")
1242         errln("Assignment to stack-allocated UnicodeString didn't work");
1243     if (workingBuffer[0] != 0x4e)
1244         errln("Change to UnicodeString after overflow are still affecting original buffer");
1245     if (guardWord2 != 0x4DED)
1246         errln("Change to UnicodeString after overflow overwrote guard word!");
1247
1248     // test read-only aliasing with setTo()
1249     workingBuffer[0] = 0x20ac;
1250     workingBuffer[1] = 0x125;
1251     workingBuffer[2] = 0;
1252     test->setTo(TRUE, workingBuffer, 2);
1253     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1254         errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1255     }
1256
1257     UnicodeString *c=test->clone();
1258
1259     workingBuffer[1] = 0x109;
1260     if(test->charAt(1) != 0x109) {
1261         errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1262     }
1263
1264     if(c->length() != 2 || c->charAt(1) != 0x125) {
1265         errln("clone(alias) did not copy the buffer");
1266     }
1267     delete c;
1268
1269     test->setTo(TRUE, workingBuffer, -1);
1270     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1271         errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1272     }
1273
1274     test->setTo(FALSE, workingBuffer, -1);
1275     if(!test->isBogus()) {
1276         errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1277     }
1278
1279     delete test;
1280
1281     test=new UnicodeString();
1282     UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1283     test->setTo(buffer, 4, 10);
1284     if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1285         test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1286         errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1287     }
1288     delete test;
1289
1290
1291     // test the UChar32 constructor
1292     UnicodeString c32Test((UChar32)0x10ff2a);
1293     if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1294         c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1295     ) {
1296         errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1297     }
1298
1299     // test the (new) capacity constructor
1300     UnicodeString capTest(5, (UChar32)0x2a, 5);
1301     if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1302         capTest.char32At(0) != 0x2a ||
1303         capTest.char32At(4) != 0x2a
1304     ) {
1305         errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1306     }
1307
1308     capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1309     if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1310         capTest.char32At(0) != 0x10ff2a ||
1311         capTest.char32At(4) != 0x10ff2a
1312     ) {
1313         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1314     }
1315
1316     capTest = UnicodeString(5, (UChar32)0, 0);
1317     if(capTest.length() != 0) {
1318         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1319     }
1320 }
1321
1322 /**
1323  * Test the unescape() function.
1324  */
1325 void UnicodeStringTest::TestUnescape(void) {
1326     UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1327     UnicodeString OUT("abc");
1328     OUT.append((UChar)0x4567);
1329     OUT.append(" ");
1330     OUT.append((UChar)0xA);
1331     OUT.append((UChar)0xD);
1332     OUT.append(" ");
1333     OUT.append((UChar32)0x00101234);
1334     OUT.append("xyz");
1335     OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1336     UnicodeString result = IN.unescape();
1337     if (result != OUT) {
1338         errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1339               prettify(result) + ", expected " +
1340               prettify(OUT));
1341     }
1342
1343     // test that an empty string is returned in case of an error
1344     if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1345         errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1346     }
1347 }
1348
1349 /* test code point counting functions --------------------------------------- */
1350
1351 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1352 static int32_t
1353 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1354     int32_t count=s.countChar32(start, length);
1355     return count>number;
1356 }
1357
1358 /* compare the real function against the reference */
1359 void
1360 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1361     if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1362         errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1363                 start, length, number, s.hasMoreChar32Than(start, length, number));
1364     }
1365 }
1366
1367 void
1368 UnicodeStringTest::TestCountChar32(void) {
1369     {
1370         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1371
1372         // test countChar32()
1373         // note that this also calls and tests u_countChar32(length>=0)
1374         if(
1375             s.countChar32()!=4 ||
1376             s.countChar32(1)!=4 ||
1377             s.countChar32(2)!=3 ||
1378             s.countChar32(2, 3)!=2 ||
1379             s.countChar32(2, 0)!=0
1380         ) {
1381             errln("UnicodeString::countChar32() failed");
1382         }
1383
1384         // NUL-terminate the string buffer and test u_countChar32(length=-1)
1385         const UChar *buffer=s.getTerminatedBuffer();
1386         if(
1387             u_countChar32(buffer, -1)!=4 ||
1388             u_countChar32(buffer+1, -1)!=4 ||
1389             u_countChar32(buffer+2, -1)!=3 ||
1390             u_countChar32(buffer+3, -1)!=3 ||
1391             u_countChar32(buffer+4, -1)!=2 ||
1392             u_countChar32(buffer+5, -1)!=1 ||
1393             u_countChar32(buffer+6, -1)!=0
1394         ) {
1395             errln("u_countChar32(length=-1) failed");
1396         }
1397
1398         // test u_countChar32() with bad input
1399         if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1400             errln("u_countChar32(bad input) failed (returned non-zero counts)");
1401         }
1402     }
1403
1404     /* test data and variables for hasMoreChar32Than() */
1405     static const UChar str[]={
1406         0x61, 0x62, 0xd800, 0xdc00,
1407         0xd801, 0xdc01, 0x63, 0xd802,
1408         0x64, 0xdc03, 0x65, 0x66,
1409         0xd804, 0xdc04, 0xd805, 0xdc05,
1410         0x67
1411     };
1412     UnicodeString string(str, UPRV_LENGTHOF(str));
1413     int32_t start, length, number;
1414
1415     /* test hasMoreChar32Than() */
1416     for(length=string.length(); length>=0; --length) {
1417         for(start=0; start<=length; ++start) {
1418             for(number=-1; number<=((length-start)+2); ++number) {
1419                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1420             }
1421         }
1422     }
1423
1424     /* test hasMoreChar32Than() with pinning */
1425     for(start=-1; start<=string.length()+1; ++start) {
1426         for(number=-1; number<=((string.length()-start)+2); ++number) {
1427             _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1428         }
1429     }
1430
1431     /* test hasMoreChar32Than() with a bogus string */
1432     string.setToBogus();
1433     for(length=-1; length<=1; ++length) {
1434         for(start=-1; start<=length; ++start) {
1435             for(number=-1; number<=((length-start)+2); ++number) {
1436                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1437             }
1438         }
1439     }
1440 }
1441
1442 void
1443 UnicodeStringTest::TestBogus() {
1444     UnicodeString   test1("This is a test");
1445     UnicodeString   test2("This is a test");
1446     UnicodeString   test3("Me too!");
1447
1448     // test isBogus() and setToBogus()
1449     if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1450         errln("A string returned TRUE for isBogus()!");
1451     }
1452
1453     // NULL pointers are treated like empty strings
1454     // use other illegal arguments to make a bogus string
1455     test3.setTo(FALSE, test1.getBuffer(), -2);
1456     if(!test3.isBogus()) {
1457         errln("A bogus string returned FALSE for isBogus()!");
1458     }
1459     if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1460         errln("hashCode() failed");
1461     }
1462     if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1463         errln("bogus.getBuffer()!=0");
1464     }
1465     if (test1.indexOf(test3) != -1) {
1466         errln("bogus.indexOf() != -1");
1467     }
1468     if (test1.lastIndexOf(test3) != -1) {
1469         errln("bogus.lastIndexOf() != -1");
1470     }
1471     if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1472         errln("caseCompare() doesn't work with bogus strings");
1473     }
1474     if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1475         errln("compareCodePointOrder() doesn't work with bogus strings");
1476     }
1477
1478     // verify that non-assignment modifications fail and do not revive a bogus string
1479     test3.setToBogus();
1480     test3.append((UChar)0x61);
1481     if(!test3.isBogus() || test3.getBuffer()!=0) {
1482         errln("bogus.append('a') worked but must not");
1483     }
1484
1485     test3.setToBogus();
1486     test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1487     if(!test3.isBogus() || test3.getBuffer()!=0) {
1488         errln("bogus.findAndReplace() worked but must not");
1489     }
1490
1491     test3.setToBogus();
1492     test3.trim();
1493     if(!test3.isBogus() || test3.getBuffer()!=0) {
1494         errln("bogus.trim() revived bogus but must not");
1495     }
1496
1497     test3.setToBogus();
1498     test3.remove(1);
1499     if(!test3.isBogus() || test3.getBuffer()!=0) {
1500         errln("bogus.remove(1) revived bogus but must not");
1501     }
1502
1503     test3.setToBogus();
1504     if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1505         errln("bogus.setCharAt(0, 'b') worked but must not");
1506     }
1507
1508     test3.setToBogus();
1509     if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1510         errln("bogus.truncate(1) revived bogus but must not");
1511     }
1512
1513     // verify that assignments revive a bogus string
1514     test3.setToBogus();
1515     if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1516         errln("bogus.operator=() failed");
1517     }
1518
1519     test3.setToBogus();
1520     if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1521         errln("bogus.fastCopyFrom() failed");
1522     }
1523
1524     test3.setToBogus();
1525     if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1526         errln("bogus.setTo(UniStr) failed");
1527     }
1528
1529     test3.setToBogus();
1530     if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1531         errln("bogus.setTo(UniStr, 0) failed");
1532     }
1533
1534     test3.setToBogus();
1535     if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1536         errln("bogus.setTo(UniStr, 0, len) failed");
1537     }
1538
1539     test3.setToBogus();
1540     if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1541         errln("bogus.setTo(const UChar *, len) failed");
1542     }
1543
1544     test3.setToBogus();
1545     if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1546         errln("bogus.setTo(UChar) failed");
1547     }
1548
1549     test3.setToBogus();
1550     if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1551         errln("bogus.setTo(UChar32) failed");
1552     }
1553
1554     test3.setToBogus();
1555     if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1556         errln("bogus.setTo(readonly alias) failed");
1557     }
1558
1559     // writable alias to another string's buffer: very bad idea, just convenient for this test
1560     test3.setToBogus();
1561     if(!test3.isBogus() ||
1562             test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1563                         test1.length(), test1.getCapacity()).isBogus() ||
1564             test3!=test1) {
1565         errln("bogus.setTo(writable alias) failed");
1566     }
1567
1568     // verify simple, documented ways to turn a bogus string into an empty one
1569     test3.setToBogus();
1570     if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1571         errln("bogus.operator=(UnicodeString()) failed");
1572     }
1573
1574     test3.setToBogus();
1575     if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1576         errln("bogus.setTo(UnicodeString()) failed");
1577     }
1578
1579     test3.setToBogus();
1580     if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1581         errln("bogus.remove() failed");
1582     }
1583
1584     test3.setToBogus();
1585     if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1586         errln("bogus.remove(0, INT32_MAX) failed");
1587     }
1588
1589     test3.setToBogus();
1590     if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1591         errln("bogus.truncate(0) failed");
1592     }
1593
1594     test3.setToBogus();
1595     if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1596         errln("bogus.setTo((UChar32)-1) failed");
1597     }
1598
1599     static const UChar nul=0;
1600
1601     test3.setToBogus();
1602     if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1603         errln("bogus.setTo(&nul, 0) failed");
1604     }
1605
1606     test3.setToBogus();
1607     if(!test3.isBogus() || test3.getBuffer()!=0) {
1608         errln("setToBogus() failed to make a string bogus");
1609     }
1610
1611     test3.setToBogus();
1612     if(test1.isBogus() || !(test1=test3).isBogus()) {
1613         errln("normal=bogus failed to make the left string bogus");
1614     }
1615
1616     // test that NULL primitive input string values are treated like
1617     // empty strings, not errors (bogus)
1618     test2.setTo((UChar32)0x10005);
1619     if(test2.insert(1, nullptr, 1).length()!=2) {
1620         errln("UniStr.insert(...nullptr...) should not modify the string but does");
1621     }
1622
1623     UErrorCode errorCode=U_ZERO_ERROR;
1624     UnicodeString
1625         test4((const UChar *)NULL),
1626         test5(TRUE, (const UChar *)NULL, 1),
1627         test6((UChar *)NULL, 5, 5),
1628         test7((const char *)NULL, 3, NULL, errorCode);
1629     if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1630         errln("a constructor set to bogus for a NULL input string, should be empty");
1631     }
1632
1633     test4.setTo(NULL, 3);
1634     test5.setTo(TRUE, (const UChar *)NULL, 1);
1635     test6.setTo((UChar *)NULL, 5, 5);
1636     if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1637         errln("a setTo() set to bogus for a NULL input string, should be empty");
1638     }
1639
1640     // test that bogus==bogus<any
1641     if(test1!=test3 || test1.compare(test3)!=0) {
1642         errln("bogus==bogus failed");
1643     }
1644
1645     test2.remove();
1646     if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1647         errln("bogus<empty failed");
1648     }
1649 }
1650
1651 // StringEnumeration ------------------------------------------------------- ***
1652 // most of StringEnumeration is tested elsewhere
1653 // this test improves code coverage
1654
1655 static const char *const
1656 testEnumStrings[]={
1657     "a",
1658     "b",
1659     "c",
1660     "this is a long string which helps us test some buffer limits",
1661     "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1662 };
1663
1664 class TestEnumeration : public StringEnumeration {
1665 public:
1666     TestEnumeration() : i(0) {}
1667
1668     virtual int32_t count(UErrorCode& /*status*/) const {
1669         return UPRV_LENGTHOF(testEnumStrings);
1670     }
1671
1672     virtual const UnicodeString *snext(UErrorCode &status) {
1673         if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1674             unistr=UnicodeString(testEnumStrings[i++], "");
1675             return &unistr;
1676         }
1677
1678         return NULL;
1679     }
1680
1681     virtual void reset(UErrorCode& /*status*/) {
1682         i=0;
1683     }
1684
1685     static inline UClassID getStaticClassID() {
1686         return (UClassID)&fgClassID;
1687     }
1688     virtual UClassID getDynamicClassID() const {
1689         return getStaticClassID();
1690     }
1691
1692 private:
1693     static const char fgClassID;
1694
1695     int32_t i;
1696 };
1697
1698 const char TestEnumeration::fgClassID=0;
1699
1700 void
1701 UnicodeStringTest::TestStringEnumeration() {
1702     UnicodeString s;
1703     TestEnumeration ten;
1704     int32_t i, length;
1705     UErrorCode status;
1706
1707     const UChar *pu;
1708     const char *pc;
1709
1710     // test the next() default implementation and ensureCharsCapacity()
1711     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1712         status=U_ZERO_ERROR;
1713         pc=ten.next(&length, status);
1714         s=UnicodeString(testEnumStrings[i], "");
1715         if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1716             errln("StringEnumeration.next(%d) failed", i);
1717         }
1718     }
1719     status=U_ZERO_ERROR;
1720     if(ten.next(&length, status)!=NULL) {
1721         errln("StringEnumeration.next(done)!=NULL");
1722     }
1723
1724     // test the unext() default implementation
1725     ten.reset(status);
1726     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1727         status=U_ZERO_ERROR;
1728         pu=ten.unext(&length, status);
1729         s=UnicodeString(testEnumStrings[i], "");
1730         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1731             errln("StringEnumeration.unext(%d) failed", i);
1732         }
1733     }
1734     status=U_ZERO_ERROR;
1735     if(ten.unext(&length, status)!=NULL) {
1736         errln("StringEnumeration.unext(done)!=NULL");
1737     }
1738
1739     // test that the default clone() implementation works, and returns NULL
1740     if(ten.clone()!=NULL) {
1741         errln("StringEnumeration.clone()!=NULL");
1742     }
1743
1744     // test that uenum_openFromStringEnumeration() works
1745     // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1746     StringEnumeration *newTen = new TestEnumeration;
1747     status=U_ZERO_ERROR;
1748     UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1749     if (uten==NULL || U_FAILURE(status)) {
1750         errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1751         return;
1752     }
1753
1754     // test  uenum_next()
1755     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1756         status=U_ZERO_ERROR;
1757         pc=uenum_next(uten, &length, &status);
1758         if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1759             errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1760         }
1761     }
1762     status=U_ZERO_ERROR;
1763     if(uenum_next(uten, &length, &status)!=NULL) {
1764         errln("File %s, line %d, uenum_next(done)!=NULL");
1765     }
1766
1767     // test the uenum_unext()
1768     uenum_reset(uten, &status);
1769     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1770         status=U_ZERO_ERROR;
1771         pu=uenum_unext(uten, &length, &status);
1772         s=UnicodeString(testEnumStrings[i], "");
1773         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1774             errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1775         }
1776     }
1777     status=U_ZERO_ERROR;
1778     if(uenum_unext(uten, &length, &status)!=NULL) {
1779         errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1780     }
1781
1782     uenum_close(uten);
1783 }
1784
1785 /*
1786  * Namespace test, to make sure that macros like UNICODE_STRING include the
1787  * namespace qualifier.
1788  *
1789  * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1790  */
1791 namespace bogus {
1792     class UnicodeString {
1793     public:
1794         enum EInvariant { kInvariant };
1795         UnicodeString() : i(1) {}
1796         UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
1797         UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1798 ) : i(length) {}
1799     private:
1800         int32_t i;
1801     };
1802 }
1803
1804 void
1805 UnicodeStringTest::TestNameSpace() {
1806     // Provoke name collision unless the UnicodeString macros properly
1807     // qualify the icu::UnicodeString class.
1808     using namespace bogus;
1809
1810     // Use all UnicodeString macros from unistr.h.
1811     icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1812     icu::UnicodeString s2=UNICODE_STRING("def", 3);
1813     icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1814
1815     // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1816     icu::UnicodeString s4=s1+s2+s3;
1817     if(s4.length()!=9) {
1818         errln("Something wrong with UnicodeString::operator+().");
1819     }
1820 }
1821
1822 void
1823 UnicodeStringTest::TestUTF32() {
1824     // Input string length US_STACKBUF_SIZE to cause overflow of the
1825     // initially chosen fStackBuffer due to supplementary characters.
1826     static const UChar32 utf32[] = {
1827         0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1828         0x10000, 0x20000, 0xe0000, 0x10ffff
1829     };
1830     static const UChar expected_utf16[] = {
1831         0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1832         0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1833     };
1834     UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1835     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1836     if(from32 != expected) {
1837         errln("UnicodeString::fromUTF32() did not create the expected string.");
1838     }
1839
1840     static const UChar utf16[] = {
1841         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1842     };
1843     static const UChar32 expected_utf32[] = {
1844         0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1845     };
1846     UChar32 result32[16];
1847     UErrorCode errorCode = U_ZERO_ERROR;
1848     int32_t length32 =
1849         UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1850         toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1851     if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1852         0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1853         result32[length32] != 0
1854     ) {
1855         errln("UnicodeString::toUTF32() did not create the expected string.");
1856     }
1857 }
1858
1859 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1860 public:
1861     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1862             : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
1863     virtual void Flush() { calledFlush = TRUE; }
1864     UBool calledFlush;
1865 };
1866
1867 void
1868 UnicodeStringTest::TestUTF8() {
1869     static const uint8_t utf8[] = {
1870         // Code points:
1871         // 0x41, 0xd900,
1872         // 0x61, 0xdc00,
1873         // 0x110000, 0x5a,
1874         // 0x50000, 0x7a,
1875         // 0x10000, 0x20000,
1876         // 0xe0000, 0x10ffff
1877         0x41, 0xed, 0xa4, 0x80,
1878         0x61, 0xed, 0xb0, 0x80,
1879         0xf4, 0x90, 0x80, 0x80, 0x5a,
1880         0xf1, 0x90, 0x80, 0x80, 0x7a,
1881         0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1882         0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1883     };
1884     static const UChar expected_utf16[] = {
1885         0x41, 0xfffd, 0xfffd, 0xfffd,
1886         0x61, 0xfffd, 0xfffd, 0xfffd,
1887         0xfffd,  0xfffd, 0xfffd, 0xfffd,0x5a,
1888         0xd900, 0xdc00, 0x7a,
1889         0xd800, 0xdc00, 0xd840, 0xdc00,
1890         0xdb40, 0xdc00, 0xdbff, 0xdfff
1891     };
1892     UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1893     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1894
1895     if(from8 != expected) {
1896         errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1897     }
1898     std::string utf8_string((const char *)utf8, sizeof(utf8));
1899     UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1900     if(from8b != expected) {
1901         errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1902     }
1903
1904     static const UChar utf16[] = {
1905         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1906     };
1907     static const uint8_t expected_utf8[] = {
1908         0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1909         0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1910     };
1911     UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1912
1913     char buffer[64];
1914     TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1915     us.toUTF8(sink);
1916     if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1917         0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1918     ) {
1919         errln("UnicodeString::toUTF8() did not create the expected string.");
1920     }
1921     if(!sink.calledFlush) {
1922         errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1923     }
1924     // Initial contents for testing that toUTF8String() appends.
1925     std::string result8 = "-->";
1926     std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1927     // Use the return value just for testing.
1928     std::string &result8r = us.toUTF8String(result8);
1929     if(result8r != expected8 || &result8r != &result8) {
1930         errln("UnicodeString::toUTF8String() did not create the expected string.");
1931     }
1932 }
1933
1934 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
1935 static UnicodeString wrapUChars(const UChar *uchars) {
1936     return UnicodeString(TRUE, uchars, -1);
1937 }
1938
1939 void
1940 UnicodeStringTest::TestReadOnlyAlias() {
1941     UChar uchars[]={ 0x61, 0x62, 0 };
1942     UnicodeString alias(TRUE, uchars, 2);
1943     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1944         errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1945         return;
1946     }
1947     alias.truncate(1);
1948     if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1949         errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1950     }
1951     if(alias.getTerminatedBuffer()==uchars) {
1952         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1953               "did not allocate and copy as expected.");
1954     }
1955     if(uchars[1]!=0x62) {
1956         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1957               "modified the original buffer.");
1958     }
1959     if(1!=u_strlen(alias.getTerminatedBuffer())) {
1960         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1961               "does not return a buffer terminated at the proper length.");
1962     }
1963
1964     alias.setTo(TRUE, uchars, 2);
1965     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1966         errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1967         return;
1968     }
1969     alias.remove();
1970     if(alias.length()!=0) {
1971         errln("UnicodeString(read-only-alias).remove() did not work.");
1972     }
1973     if(alias.getTerminatedBuffer()==uchars) {
1974         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1975               "did not un-alias as expected.");
1976     }
1977     if(uchars[0]!=0x61) {
1978         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1979               "modified the original buffer.");
1980     }
1981     if(0!=u_strlen(alias.getTerminatedBuffer())) {
1982         errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1983               "does not return a buffer terminated at length 0.");
1984     }
1985
1986     UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1987     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1988     alias.remove(0, 10);
1989     if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1990         errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1991     }
1992     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1993     alias.remove(27, 99);
1994     if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1995         errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1996     }
1997     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1998     alias.retainBetween(6, 30);
1999     if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
2000         errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2001     }
2002
2003     UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2004     UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2005
2006     UnicodeString temp;
2007     temp.fastCopyFrom(longString.tempSubString());
2008     if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2009         errln("UnicodeString.tempSubString() failed");
2010     }
2011     temp.fastCopyFrom(longString.tempSubString(-3, 5));
2012     if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2013         errln("UnicodeString.tempSubString(-3, 5) failed");
2014     }
2015     temp.fastCopyFrom(longString.tempSubString(17));
2016     if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2017         errln("UnicodeString.tempSubString(17) failed");
2018     }
2019     temp.fastCopyFrom(longString.tempSubString(99));
2020     if(!temp.isEmpty()) {
2021         errln("UnicodeString.tempSubString(99) failed");
2022     }
2023     temp.fastCopyFrom(longString.tempSubStringBetween(6));
2024     if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2025         errln("UnicodeString.tempSubStringBetween(6) failed");
2026     }
2027     temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2028     if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2029         errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2030     }
2031     UnicodeString bogusString;
2032     bogusString.setToBogus();
2033     temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2034     if(!temp.isBogus()) {
2035         errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2036     }
2037 }
2038
2039 void
2040 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2041     static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2042     static const UChar fg[3]={ 0x66, 0x67, 0 };
2043     if(!app.reserveAppendCapacity(12)) {
2044         errln("Appendable.reserve(12) failed");
2045     }
2046     app.appendCodeUnit(0x61);
2047     app.appendCodePoint(0x62);
2048     app.appendCodePoint(0x50000);
2049     app.appendString(cde, 3);
2050     app.appendString(fg, -1);
2051     UChar scratch[3];
2052     int32_t capacity=-1;
2053     UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2054     if(capacity<3) {
2055         errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2056         return;
2057     }
2058     static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2059     u_memcpy(buffer, hij, 3);
2060     app.appendString(buffer, 3);
2061     if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2062         errln("Appendable.append(...) failed");
2063     }
2064     buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2065     if(buffer!=NULL || capacity!=0) {
2066         errln("Appendable.getAppendBuffer(min=0) failed");
2067     }
2068     capacity=1;
2069     buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2070     if(buffer!=NULL || capacity!=0) {
2071         errln("Appendable.getAppendBuffer(scratch<min) failed");
2072     }
2073 }
2074
2075 class SimpleAppendable : public Appendable {
2076 public:
2077     explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
2078     virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
2079     SimpleAppendable &reset() { str.remove(); return *this; }
2080 private:
2081     UnicodeString &str;
2082 };
2083
2084 void
2085 UnicodeStringTest::TestAppendable() {
2086     UnicodeString dest;
2087     SimpleAppendable app(dest);
2088     doTestAppendable(dest, app);
2089 }
2090
2091 void
2092 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2093     UnicodeString dest;
2094     UnicodeStringAppendable app(dest);
2095     doTestAppendable(dest, app);
2096 }
2097
2098 void
2099 UnicodeStringTest::TestSizeofUnicodeString() {
2100     // See the comments in unistr.h near the declaration of UnicodeString's fields.
2101     // See the API comments for UNISTR_OBJECT_SIZE.
2102     size_t sizeofUniStr=sizeof(UnicodeString);
2103     size_t expected=UNISTR_OBJECT_SIZE;
2104     if(expected!=sizeofUniStr) {
2105         // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2106         // of the compiler might add more internal padding than expected.
2107         errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2108               (int)sizeofUniStr, (int)expected);
2109     }
2110     if(sizeofUniStr<32) {
2111         errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2112     }
2113     // We assume that the entire UnicodeString object,
2114     // minus the vtable pointer and 2 bytes for flags and short length,
2115     // is available for internal storage of UChars.
2116     int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2117     UnicodeString s;
2118     const UChar *emptyBuffer=s.getBuffer();
2119     for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2120         s.append((UChar)0x2e);
2121     }
2122     const UChar *fullBuffer=s.getBuffer();
2123     if(fullBuffer!=emptyBuffer) {
2124         errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2125               expectedStackBufferLength);
2126     }
2127     const UChar *terminatedBuffer=s.getTerminatedBuffer();
2128     if(terminatedBuffer==emptyBuffer) {
2129         errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2130               expectedStackBufferLength);
2131     }
2132 }
2133
2134 // Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
2135 void moveFrom(UnicodeString &dest, UnicodeString &src) {
2136     dest = std::move(src);
2137 }
2138
2139 void
2140 UnicodeStringTest::TestMoveSwap() {
2141     static const UChar abc[3] = { 0x61, 0x62, 0x63 };  // "abc"
2142     UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc));  // read-only alias
2143     UnicodeString s2(100, 0x7a, 100);  // 100 * 'z' should be on the heap
2144     UnicodeString s3("defg", 4, US_INV);  // in stack buffer
2145     const UChar *p = s2.getBuffer();
2146     s1.swap(s2);
2147     if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2148         errln("UnicodeString.swap() did not swap");
2149     }
2150     swap(s2, s3);
2151     if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2152         errln("swap(UnicodeString) did not swap back");
2153     }
2154     UnicodeString s4;
2155     s4 = std::move(s1);
2156     if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2157         errln("UnicodeString = std::move(heap) did not move");
2158     }
2159     UnicodeString s5;
2160     s5 = std::move(s2);
2161     if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2162         errln("UnicodeString = std::move(stack) did not move");
2163     }
2164     UnicodeString s6;
2165     s6 = std::move(s3);
2166     if(s6.getBuffer() != abc || s6.length() != 3) {
2167         errln("UnicodeString = std::move(alias) did not move");
2168     }
2169     infoln("TestMoveSwap() with rvalue references");
2170     s1 = static_cast<UnicodeString &&>(s6);
2171     if(s1.getBuffer() != abc || s1.length() != 3) {
2172         errln("UnicodeString move assignment operator did not move");
2173     }
2174     UnicodeString s7(static_cast<UnicodeString &&>(s4));
2175     if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2176         errln("UnicodeString move constructor did not move");
2177     }
2178
2179     // Move self assignment leaves the object valid but in an undefined state.
2180     // Do it to make sure there is no crash,
2181     // but do not check for any particular resulting value.
2182     moveFrom(s1, s1);
2183     moveFrom(s2, s2);
2184     moveFrom(s3, s3);
2185     moveFrom(s4, s4);
2186     moveFrom(s5, s5);
2187     moveFrom(s6, s6);
2188     moveFrom(s7, s7);
2189     // Simple copy assignment must work.
2190     UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2191     s1 = s6 = s4 = s7 = simple;
2192     if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2193         errln("UnicodeString copy after self-move did not work");
2194     }
2195 }
2196
2197 void
2198 UnicodeStringTest::TestUInt16Pointers() {
2199     static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2200     uint16_t arr[4];
2201
2202     UnicodeString expected(u"abc");
2203     assertEquals("abc from pointer", expected, UnicodeString(carr));
2204     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2205     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2206
2207     UnicodeString alias(arr, 0, 4);
2208     alias.append(u'a').append(u'b').append(u'c');
2209     assertEquals("abc from writable alias", expected, alias);
2210     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2211
2212     UErrorCode errorCode = U_ZERO_ERROR;
2213     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2214     assertSuccess(WHERE, errorCode);
2215     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2216 }
2217
2218 void
2219 UnicodeStringTest::TestWCharPointers() {
2220 #if U_SIZEOF_WCHAR_T==2
2221     static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2222     wchar_t arr[4];
2223
2224     UnicodeString expected(u"abc");
2225     assertEquals("abc from pointer", expected, UnicodeString(carr));
2226     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2227     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2228
2229     UnicodeString alias(arr, 0, 4);
2230     alias.append(u'a').append(u'b').append(u'c');
2231     assertEquals("abc from writable alias", expected, alias);
2232     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2233
2234     UErrorCode errorCode = U_ZERO_ERROR;
2235     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2236     assertSuccess(WHERE, errorCode);
2237     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2238 #endif
2239 }
2240
2241 void
2242 UnicodeStringTest::TestNullPointers() {
2243     assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2244     assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2245     assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty());
2246
2247     UnicodeString alias(nullptr, 4, 4);  // empty, no alias
2248     assertTrue("empty from writable alias", alias.isEmpty());
2249     alias.append(u'a').append(u'b').append(u'c');
2250     UnicodeString expected(u"abc");
2251     assertEquals("abc from writable alias", expected, alias);
2252
2253     UErrorCode errorCode = U_ZERO_ERROR;
2254     UnicodeString(u"def").extract(nullptr, 0, errorCode);
2255     assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2256 }
2257
2258 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2259     IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2260
2261     // Test append operation
2262     UnicodeString str(u"foo ");
2263     str.append(str);
2264     str.append(str);
2265     str.append(str);
2266     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2267
2268     // Test append operation with readonly alias to start
2269     str = UnicodeString(TRUE, u"foo ", 4);
2270     str.append(str);
2271     str.append(str);
2272     str.append(str);
2273     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2274
2275     // Test append operation with aliased substring
2276     str = u"abcde";
2277     UnicodeString sub = str.tempSubString(1, 2);
2278     str.append(sub);
2279     assertEquals("", u"abcdebc", str);
2280
2281     // Test append operation with double-aliased substring
2282     str = UnicodeString(TRUE, u"abcde", 5);
2283     sub = str.tempSubString(1, 2);
2284     str.append(sub);
2285     assertEquals("", u"abcdebc", str);
2286
2287     // Test insert operation
2288     str = u"a-*b";
2289     str.insert(2, str);
2290     str.insert(4, str);
2291     str.insert(8, str);
2292     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2293
2294     // Test insert operation with readonly alias to start
2295     str = UnicodeString(TRUE, u"a-*b", 4);
2296     str.insert(2, str);
2297     str.insert(4, str);
2298     str.insert(8, str);
2299     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2300
2301     // Test insert operation with aliased substring
2302     str = u"abcde";
2303     sub = str.tempSubString(1, 3);
2304     str.insert(2, sub);
2305     assertEquals("", u"abbcdcde", str);
2306
2307     // Test insert operation with double-aliased substring
2308     str = UnicodeString(TRUE, u"abcde", 5);
2309     sub = str.tempSubString(1, 3);
2310     str.insert(2, sub);
2311     assertEquals("", u"abbcdcde", str);
2312 }