icuSources/test/intltest/ustrtest.cpp

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2012, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6
   7 #include "ustrtest.h"
   8 #include "unicode/appendable.h"
   9 #include "unicode/std_string.h"
  10 #include "unicode/unistr.h"
  11 #include "unicode/uchar.h"
  12 #include "unicode/ustring.h"
  13 #include "unicode/locid.h"
  14 #include "unicode/ucnv.h"
  15 #include "unicode/uenum.h"
  16 #include "unicode/utf16.h"
  17 #include "cmemory.h"
  18 #include "charstr.h"
  19
  20 #if 0
  21 #include "unicode/ustream.h"
  22
  23 #include <iostream>
  24 using namespace std;
  25
  26 #endif
  27
  28 #define LENGTHOF(array) (int32_t)((sizeof(array)/sizeof((array)[0])))
  29
  30 UnicodeStringTest::~UnicodeStringTest() {}
  31
  32 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
  33 {
  34     if (exec) logln("TestSuite UnicodeStringTest: ");
  35     switch (index) {
  36         case 0:
  37             name = "StringCaseTest";
  38             if (exec) {
  39                 logln("StringCaseTest---"); logln("");
  40                 StringCaseTest test;
  41                 callTest(test, par);
  42             }
  43             break;
  44         case 1: name = "TestBasicManipulation"; if (exec) TestBasicManipulation(); break;
  45         case 2: name = "TestCompare"; if (exec) TestCompare(); break;
  46         case 3: name = "TestExtract"; if (exec) TestExtract(); break;
  47         case 4: name = "TestRemoveReplace"; if (exec) TestRemoveReplace(); break;
  48         case 5: name = "TestSearching"; if (exec) TestSearching(); break;
  49         case 6: name = "TestSpacePadding"; if (exec) TestSpacePadding(); break;
  50         case 7: name = "TestPrefixAndSuffix"; if (exec) TestPrefixAndSuffix(); break;
  51         case 8: name = "TestFindAndReplace"; if (exec) TestFindAndReplace(); break;
  52         case 9: name = "TestBogus"; if (exec) TestBogus(); break;
  53         case 10: name = "TestReverse"; if (exec) TestReverse(); break;
  54         case 11: name = "TestMiscellaneous"; if (exec) TestMiscellaneous(); break;
  55         case 12: name = "TestStackAllocation"; if (exec) TestStackAllocation(); break;
  56         case 13: name = "TestUnescape"; if (exec) TestUnescape(); break;
  57         case 14: name = "TestCountChar32"; if (exec) TestCountChar32(); break;
  58         case 15: name = "TestStringEnumeration"; if (exec) TestStringEnumeration(); break;
  59         case 16: name = "TestNameSpace"; if (exec) TestNameSpace(); break;
  60         case 17: name = "TestUTF32"; if (exec) TestUTF32(); break;
  61         case 18: name = "TestUTF8"; if (exec) TestUTF8(); break;
  62         case 19: name = "TestReadOnlyAlias"; if (exec) TestReadOnlyAlias(); break;
  63         case 20: name = "TestAppendable"; if (exec) TestAppendable(); break;
  64         case 21: name = "TestUnicodeStringImplementsAppendable"; if (exec) TestUnicodeStringImplementsAppendable(); break;
  65         case 22: name = "TestSizeofUnicodeString"; if (exec) TestSizeofUnicodeString(); break;
  66         case 23: name = "TestStartsWithAndEndsWithNulTerminated"; if (exec) TestStartsWithAndEndsWithNulTerminated(); break;
  67
  68         default: name = ""; break; //needed to end loop
  69     }
  70 }
  71
  72 void
  73 UnicodeStringTest::TestBasicManipulation()
  74 {
  75     UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
  76     UnicodeString   expectedValue;
  77     UnicodeString   *c;
  78
  79     c=(UnicodeString *)test1.clone();
  80     test1.insert(24, "good ");
  81     expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
  82     if (test1 != expectedValue)
  83         errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
  84
  85     c->insert(24, "good ");
  86     if(*c != expectedValue) {
  87         errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
  88     }
  89     delete c;
  90
  91     test1.remove(41, 8);
  92     expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
  93     if (test1 != expectedValue)
  94         errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
  95
  96     test1.replace(58, 6, "ir country");
  97     expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
  98     if (test1 != expectedValue)
  99         errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
 100
 101     UChar     temp[80];
 102     test1.extract(0, 15, temp);
 103
 104     UnicodeString       test2(temp, 15);
 105
 106     expectedValue = "Now is the time";
 107     if (test2 != expectedValue)
 108         errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
 109
 110     test2 += " for me to go!\n";
 111     expectedValue = "Now is the time for me to go!\n";
 112     if (test2 != expectedValue)
 113         errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
 114
 115     if (test1.length() != 70)
 116         errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
 117     if (test2.length() != 30)
 118         errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
 119
 120     UnicodeString test3;
 121     test3.append((UChar32)0x20402);
 122     if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
 123         errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
 124     }
 125     if(test3.length() != 2){
 126         errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
 127     }
 128     test3.append((UChar32)0x0074);
 129     if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
 130         errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
 131     }
 132     if(test3.length() != 3){
 133         errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
 134     }
 135
 136     // test some UChar32 overloads
 137     if( test3.setTo((UChar32)0x10330).length() != 2 ||
 138         test3.insert(0, (UChar32)0x20100).length() != 4 ||
 139         test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
 140         (test3 = (UChar32)0x14001).length() != 2
 141     ) {
 142         errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
 143     }
 144
 145     {
 146         // test moveIndex32()
 147         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
 148
 149         if(
 150             s.moveIndex32(2, -1)!=0 ||
 151             s.moveIndex32(2, 1)!=4 ||
 152             s.moveIndex32(2, 2)!=5 ||
 153             s.moveIndex32(5, -2)!=2 ||
 154             s.moveIndex32(0, -1)!=0 ||
 155             s.moveIndex32(6, 1)!=6
 156         ) {
 157             errln("UnicodeString::moveIndex32() failed");
 158         }
 159
 160         if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
 161             errln("UnicodeString::getChar32Start() failed");
 162         }
 163
 164         if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
 165             errln("UnicodeString::getChar32Limit() failed");
 166         }
 167     }
 168
 169     {
 170         // test new 2.2 constructors and setTo function that parallel Java's substring function.
 171         UnicodeString src("Hello folks how are you?");
 172         UnicodeString target1("how are you?");
 173         if (target1 != UnicodeString(src, 12)) {
 174             errln("UnicodeString(const UnicodeString&, int32_t) failed");
 175         }
 176         UnicodeString target2("folks");
 177         if (target2 != UnicodeString(src, 6, 5)) {
 178             errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
 179         }
 180         if (target1 != target2.setTo(src, 12)) {
 181             errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
 182         }
 183     }
 184
 185     {
 186         // op+ is new in ICU 2.8
 187         UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
 188         if(s!=UnicodeString("abcdefghi", "")) {
 189             errln("operator+(UniStr, UniStr) failed");
 190         }
 191     }
 192
 193     {
 194         // tests for Jitterbug 2360
 195         // verify that APIs with source pointer + length accept length == -1
 196         // mostly test only where modified, only few functions did not already do this
 197         if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
 198             errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
 199         }
 200
 201         UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
 202         UnicodeString s, t(buffer, -1, LENGTHOF(buffer));
 203
 204         if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
 205             errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
 206         }
 207         if(t.length()!=u_strlen(buffer)) {
 208             errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
 209         }
 210
 211         if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
 212             errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
 213         }
 214         if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
 215             errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
 216         }
 217
 218         buffer[u_strlen(buffer)]=0xe4;
 219         UnicodeString u(buffer, -1, LENGTHOF(buffer));
 220         if(s.setTo(buffer, -1, LENGTHOF(buffer)).length()!=LENGTHOF(buffer)) {
 221             errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
 222         }
 223         if(u.length()!=LENGTHOF(buffer)) {
 224             errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
 225         }
 226
 227         static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
 228         UConverter *cnv;
 229         UErrorCode errorCode=U_ZERO_ERROR;
 230
 231         cnv=ucnv_open("ISO-8859-1", &errorCode);
 232         UnicodeString v(cs, -1, cnv, errorCode);
 233         ucnv_close(cnv);
 234         if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
 235             errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
 236         }
 237     }
 238
 239 #if U_CHARSET_IS_UTF8
 240     {
 241         // Test the hardcoded-UTF-8 UnicodeString optimizations.
 242         static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
 243         static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
 244         UnicodeString from8a = UnicodeString((const char *)utf8);
 245         UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
 246         UnicodeString from16(FALSE, utf16, LENGTHOF(utf16));
 247         if(from8a != from16 || from8b != from16) {
 248             errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
 249         }
 250         char buffer[16];
 251         int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
 252         if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
 253             errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
 254         }
 255         length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
 256         if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
 257             errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
 258         }
 259     }
 260 #endif
 261 }
 262
 263 void
 264 UnicodeStringTest::TestCompare()
 265 {
 266     UnicodeString   test1("this is a test");
 267     UnicodeString   test2("this is a test");
 268     UnicodeString   test3("this is a test of the emergency broadcast system");
 269     UnicodeString   test4("never say, \"this is a test\"!!");
 270
 271     UnicodeString   test5((UChar)0x5000);
 272     UnicodeString   test6((UChar)0x5100);
 273
 274     UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
 275                  0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
 276     char            chars[] = "this is a test";
 277
 278     // test operator== and operator!=
 279     if (test1 != test2 || test1 == test3 || test1 == test4)
 280         errln("operator== or operator!= failed");
 281
 282     // test operator> and operator<
 283     if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
 284         !(test5 < test6)
 285     ) {
 286         errln("operator> or operator< failed");
 287     }
 288
 289     // test operator>= and operator<=
 290     if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
 291         errln("operator>= or operator<= failed");
 292
 293     // test compare(UnicodeString)
 294     if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
 295         errln("compare(UnicodeString) failed");
 296
 297     //test compare(offset, length, UnicodeString)
 298     if(test1.compare(0, 14, test2) != 0 ||
 299         test3.compare(0, 14, test2) != 0 ||
 300         test4.compare(12, 14, test2) != 0 ||
 301         test3.compare(0, 18, test1) <=0  )
 302         errln("compare(offset, length, UnicodeString) failes");
 303
 304     // test compare(UChar*)
 305     if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
 306         errln("compare(UChar*) failed");
 307
 308     // test compare(char*)
 309     if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
 310         errln("compare(char*) failed");
 311
 312     // test compare(UChar*, length)
 313     if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
 314         errln("compare(UChar*, length) failed");
 315
 316     // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
 317     if (test1.compare(0, 14, test2, 0, 14) != 0
 318     || test1.compare(0, 14, test3, 0, 14) != 0
 319     || test1.compare(0, 14, test4, 12, 14) != 0)
 320         errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
 321
 322     if (test1.compare(10, 4, test2, 0, 4) >= 0
 323     || test1.compare(10, 4, test3, 22, 9) <= 0
 324     || test1.compare(10, 4, test4, 22, 4) != 0)
 325         errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
 326
 327     // test compareBetween
 328     if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
 329                     || test1.compareBetween(0, 14, test4, 12, 26) != 0)
 330         errln("compareBetween failed");
 331
 332     if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
 333                     || test1.compareBetween(10, 14, test4, 22, 26) != 0)
 334         errln("compareBetween failed");
 335
 336     // test compare() etc. with strings that share a buffer but are not equal
 337     test2=test1; // share the buffer, length() too large for the stackBuffer
 338     test2.truncate(1); // change only the length, not the buffer
 339     if( test1==test2 || test1<=test2 ||
 340         test1.compare(test2)<=0 ||
 341         test1.compareCodePointOrder(test2)<=0 ||
 342         test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
 343         test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
 344         test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
 345         test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
 346     ) {
 347         errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
 348     }
 349
 350     /* test compareCodePointOrder() */
 351     {
 352         /* these strings are in ascending order */
 353         static const UChar strings[][4]={
 354             { 0x61, 0 },                    /* U+0061 */
 355             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
 356             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
 357             { 0xd800, 0 },                  /* U+d800 */
 358             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
 359             { 0xdfff, 0 },                  /* U+dfff */
 360             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
 361             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
 362             { 0xd800, 0xdc02, 0 },          /* U+10002 */
 363             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
 364         };
 365         UnicodeString u[20]; // must be at least as long as strings[]
 366         int32_t i;
 367
 368         for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])); ++i) {
 369             u[i]=UnicodeString(TRUE, strings[i], -1);
 370         }
 371
 372         for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])-1); ++i) {
 373             if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
 374                 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
 375             }
 376         }
 377     }
 378
 379     /* test caseCompare() */
 380     {
 381         static const UChar
 382         _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
 383         _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
 384         _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
 385         _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
 386
 387         UnicodeString
 388             mixed(TRUE, _mixed, -1),
 389             otherDefault(TRUE, _otherDefault, -1),
 390             otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
 391             different(TRUE, _different, -1);
 392
 393         int8_t result;
 394
 395         /* test caseCompare() */
 396         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
 397         if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
 398             errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
 399         }
 400         result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
 401         if(result!=0) {
 402             errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
 403         }
 404         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
 405         if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
 406             errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
 407         }
 408
 409         /* test caseCompare() */
 410         result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
 411         if(result<=0) {
 412             errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
 413         }
 414
 415         /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
 416         result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
 417         if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
 418             errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
 419         }
 420
 421         /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
 422         result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
 423         if(result<=0) {
 424             errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
 425         }
 426     }
 427
 428     // test that srcLength=-1 is handled in functions that
 429     // take input const UChar */int32_t srcLength (j785)
 430     {
 431         static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
 432         UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
 433
 434         if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
 435             errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
 436         }
 437
 438         if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
 439             errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
 440         }
 441
 442         if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
 443             errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
 444         }
 445
 446         if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
 447             errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
 448         }
 449
 450         if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
 451             errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
 452         }
 453
 454         UnicodeString s2, s3;
 455         s2.replace(0, 0, u+1, -1);
 456         s3.replace(0, 0, u, 1, -1);
 457         if(s.compare(1, 999, s2)!=0 || s2!=s3) {
 458             errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
 459         }
 460     }
 461 }
 462
 463 void
 464 UnicodeStringTest::TestExtract()
 465 {
 466     UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
 467     UnicodeString  test2;
 468     UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
 469     char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
 470     UnicodeString  test5;
 471     char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
 472
 473     test1.extract(11, 12, test2);
 474     test1.extract(11, 12, test3);
 475     if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
 476         errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
 477     }
 478
 479     // test proper pinning in extractBetween()
 480     test1.extractBetween(-3, 7, test5);
 481     if(test5!=UNICODE_STRING("Now is ", 7)) {
 482         errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
 483     }
 484
 485     test1.extractBetween(11, 23, test5);
 486     if (test1.extract(60, 71, test6) != 9) {
 487         errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
 488     }
 489     if (test1.extract(11, 12, test6) != 12) {
 490         errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
 491     }
 492
 493     // convert test4 back to Unicode for comparison
 494     UnicodeString test4b(test4, 12);
 495
 496     if (test1.extract(11, 12, (char *)NULL) != 12) {
 497         errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
 498     }
 499     if (test1.extract(11, -1, test6) != 0) {
 500         errln("UnicodeString.extract(-1) failed to stop reading the string.");
 501     }
 502
 503     for (int32_t i = 0; i < 12; i++) {
 504         if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
 505             errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
 506             break;
 507         }
 508         if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
 509             errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
 510             break;
 511         }
 512         if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
 513             errln(UnicodeString("extracting into an array of char failed at position ") + i);
 514             break;
 515         }
 516         if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
 517             errln(UnicodeString("extracting with extractBetween failed at position ") + i);
 518             break;
 519         }
 520     }
 521
 522     // test preflighting and overflows with invariant conversion
 523     if (test1.extract(0, 10, (char *)NULL, "") != 10) {
 524         errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
 525     }
 526
 527     test4[2] = (char)0xff;
 528     if (test1.extract(0, 10, test4, 2, "") != 10) {
 529         errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
 530     }
 531     if (test4[2] != (char)0xff) {
 532         errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
 533     }
 534
 535     {
 536         // test new, NUL-terminating extract() function
 537         UnicodeString s("terminate", "");
 538         UChar dest[20]={
 539             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
 540             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
 541         };
 542         UErrorCode errorCode;
 543         int32_t length;
 544
 545         errorCode=U_ZERO_ERROR;
 546         length=s.extract((UChar *)NULL, 0, errorCode);
 547         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
 548             errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
 549         }
 550
 551         errorCode=U_ZERO_ERROR;
 552         length=s.extract(dest, s.length()-1, errorCode);
 553         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
 554             errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
 555                 length, u_errorName(errorCode), s.length());
 556         }
 557
 558         errorCode=U_ZERO_ERROR;
 559         length=s.extract(dest, s.length(), errorCode);
 560         if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
 561             errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
 562                 length, u_errorName(errorCode), s.length());
 563         }
 564         if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
 565             errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
 566         }
 567
 568         errorCode=U_ZERO_ERROR;
 569         length=s.extract(dest, s.length()+1, errorCode);
 570         if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
 571             errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
 572                 length, u_errorName(errorCode), s.length());
 573         }
 574         if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
 575             errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
 576         }
 577     }
 578
 579     {
 580         // test new UConverter extract() and constructor
 581         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
 582         char buffer[32];
 583         static const char expect[]={
 584             (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
 585             (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
 586             (char)0xc3, (char)0x84,
 587             (char)0xe1, (char)0xbb, (char)0x90
 588         };
 589         UErrorCode errorCode=U_ZERO_ERROR;
 590         UConverter *cnv=ucnv_open("UTF-8", &errorCode);
 591         int32_t length;
 592
 593         if(U_SUCCESS(errorCode)) {
 594             // test preflighting
 595             if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
 596                 errorCode!=U_BUFFER_OVERFLOW_ERROR
 597             ) {
 598                 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
 599                       length, u_errorName(errorCode));
 600             }
 601             errorCode=U_ZERO_ERROR;
 602             if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
 603                 errorCode!=U_BUFFER_OVERFLOW_ERROR
 604             ) {
 605                 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
 606                       length, u_errorName(errorCode));
 607             }
 608
 609             // try error cases
 610             errorCode=U_ZERO_ERROR;
 611             if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
 612                 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
 613             }
 614             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
 615             if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
 616                 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
 617             }
 618             errorCode=U_ZERO_ERROR;
 619
 620             // extract for real
 621             if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
 622                 uprv_memcmp(buffer, expect, 13)!=0 ||
 623                 buffer[13]!=0 ||
 624                 U_FAILURE(errorCode)
 625             ) {
 626                 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
 627                       length, u_errorName(errorCode));
 628             }
 629             // Test again with just the converter name.
 630             if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
 631                 uprv_memcmp(buffer, expect, 13)!=0 ||
 632                 buffer[13]!=0 ||
 633                 U_FAILURE(errorCode)
 634             ) {
 635                 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
 636                       length, u_errorName(errorCode));
 637             }
 638
 639             // try the constructor
 640             UnicodeString t(expect, sizeof(expect), cnv, errorCode);
 641             if(U_FAILURE(errorCode) || s!=t) {
 642                 errln("UnicodeString(UConverter) conversion failed (%s)",
 643                       u_errorName(errorCode));
 644             }
 645
 646             ucnv_close(cnv);
 647         }
 648     }
 649 }
 650
 651 void
 652 UnicodeStringTest::TestRemoveReplace()
 653 {
 654     UnicodeString   test1("The rain in Spain stays mainly on the plain");
 655     UnicodeString   test2("eat SPAMburgers!");
 656     UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
 657     char            test4[] = "SPAM";
 658     UnicodeString&  test5 = test1;
 659
 660     test1.replace(4, 4, test2, 4, 4);
 661     test1.replace(12, 5, test3, 4);
 662     test3[4] = 0;
 663     test1.replace(17, 4, test3);
 664     test1.replace(23, 4, test4);
 665     test1.replaceBetween(37, 42, test2, 4, 8);
 666
 667     if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
 668         errln("One of the replace methods failed:\n"
 669               "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
 670               "  got \"" + test1 + "\"");
 671
 672     test1.remove(21, 1);
 673     test1.removeBetween(26, 28);
 674
 675     if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
 676         errln("One of the remove methods failed:\n"
 677               "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
 678               "  got \"" + test1 + "\"");
 679
 680     for (int32_t i = 0; i < test1.length(); i++) {
 681         if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
 682             test1.setCharAt(i, 0x78);
 683         }
 684     }
 685
 686     if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
 687         errln("One of the remove methods failed:\n"
 688               "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
 689               "  got \"" + test1 + "\"");
 690
 691     test1.remove();
 692     if (test1.length() != 0)
 693         errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
 694 }
 695
 696 void
 697 UnicodeStringTest::TestSearching()
 698 {
 699     UnicodeString test1("test test ttest tetest testesteststt");
 700     UnicodeString test2("test");
 701     UChar testChar = 0x74;
 702
 703     UChar32 testChar32 = 0x20402;
 704     UChar testData[]={
 705         //   0       1       2       3       4       5       6       7
 706         0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
 707
 708         //   8       9      10      11      12      13      14      15
 709         0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
 710
 711         //  16      17      18      19
 712         0xdc02, 0xd841, 0x0073, 0x0000
 713     };
 714     UnicodeString test3(testData);
 715     UnicodeString test4(testChar32);
 716
 717     uint16_t occurrences = 0;
 718     int32_t startPos = 0;
 719     for ( ;
 720           startPos != -1 && startPos < test1.length();
 721           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
 722         ;
 723     if (occurrences != 6)
 724         errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
 725
 726     for ( occurrences = 0, startPos = 10;
 727           startPos != -1 && startPos < test1.length();
 728           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
 729         ;
 730     if (occurrences != 4)
 731         errln(UnicodeString("indexOf with starting offset failed: "
 732                             "expected to find 4 occurrences, found ") + occurrences);
 733
 734     int32_t endPos = 28;
 735     for ( occurrences = 0, startPos = 5;
 736           startPos != -1 && startPos < test1.length();
 737           (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
 738         ;
 739     if (occurrences != 4)
 740         errln(UnicodeString("indexOf with starting and ending offsets failed: "
 741                             "expected to find 4 occurrences, found ") + occurrences);
 742
 743     //using UChar32 string
 744     for ( startPos=0, occurrences=0;
 745           startPos != -1 && startPos < test3.length();
 746           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
 747         ;
 748     if (occurrences != 4)
 749         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
 750
 751     for ( startPos=10, occurrences=0;
 752           startPos != -1 && startPos < test3.length();
 753           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
 754         ;
 755     if (occurrences != 2)
 756         errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
 757     //---
 758
 759     for ( occurrences = 0, startPos = 0;
 760           startPos != -1 && startPos < test1.length();
 761           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 762         ;
 763     if (occurrences != 16)
 764         errln(UnicodeString("indexOf with character failed: "
 765                             "expected to find 16 occurrences, found ") + occurrences);
 766
 767     for ( occurrences = 0, startPos = 10;
 768           startPos != -1 && startPos < test1.length();
 769           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 770         ;
 771     if (occurrences != 12)
 772         errln(UnicodeString("indexOf with character & start offset failed: "
 773                             "expected to find 12 occurrences, found ") + occurrences);
 774
 775     for ( occurrences = 0, startPos = 5, endPos = 28;
 776           startPos != -1 && startPos < test1.length();
 777           (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 778         ;
 779     if (occurrences != 10)
 780         errln(UnicodeString("indexOf with character & start & end offsets failed: "
 781                             "expected to find 10 occurrences, found ") + occurrences);
 782
 783     //testing for UChar32
 784     UnicodeString subString;
 785     for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
 786         subString.append(test3, startPos, test3.length());
 787         if(subString.indexOf(testChar32) != -1 ){
 788              ++occurrences;
 789         }
 790         subString.remove();
 791     }
 792     if (occurrences != 14)
 793         errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
 794
 795     for ( occurrences = 0, startPos = 0;
 796           startPos != -1 && startPos < test3.length();
 797           (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 798         ;
 799     if (occurrences != 4)
 800         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
 801
 802     endPos=test3.length();
 803     for ( occurrences = 0, startPos = 5;
 804           startPos != -1 && startPos < test3.length();
 805           (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
 806         ;
 807     if (occurrences != 3)
 808         errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
 809     //---
 810
 811     if(test1.lastIndexOf(test2)!=29) {
 812         errln("test1.lastIndexOf(test2)!=29");
 813     }
 814
 815     if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
 816         errln("test1.lastIndexOf(test2, start) failed");
 817     }
 818
 819     for ( occurrences = 0, startPos = 32;
 820           startPos != -1;
 821           (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
 822         ;
 823     if (occurrences != 4)
 824         errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
 825                             "expected to find 4 occurrences, found ") + occurrences);
 826
 827     for ( occurrences = 0, startPos = 32;
 828           startPos != -1;
 829           (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
 830         ;
 831     if (occurrences != 11)
 832         errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
 833                             "expected to find 11 occurrences, found ") + occurrences);
 834
 835     //testing UChar32
 836     startPos=test3.length();
 837     for ( occurrences = 0;
 838           startPos != -1;
 839           (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
 840         ;
 841     if (occurrences != 3)
 842         errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
 843
 844
 845     for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
 846         subString.remove();
 847         subString.append(test3, 0, endPos);
 848         if(subString.lastIndexOf(testChar32) != -1 ){
 849             ++occurrences;
 850         }
 851     }
 852     if (occurrences != 18)
 853         errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
 854     //---
 855
 856     // test that indexOf(UChar32) and lastIndexOf(UChar32)
 857     // do not find surrogate code points when they are part of matched pairs
 858     // (= part of supplementary code points)
 859     // Jitterbug 1542
 860     if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
 861         errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
 862     }
 863     if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
 864         UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
 865         test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
 866     ) {
 867         errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
 868     }
 869 }
 870
 871 void
 872 UnicodeStringTest::TestSpacePadding()
 873 {
 874     UnicodeString test1("hello");
 875     UnicodeString test2("   there");
 876     UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
 877     UnicodeString test4;
 878     UBool returnVal;
 879     UnicodeString expectedValue;
 880
 881     returnVal = test1.padLeading(15);
 882     expectedValue = "          hello";
 883     if (returnVal == FALSE || test1 != expectedValue)
 884         errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
 885
 886     returnVal = test2.padTrailing(15);
 887     expectedValue = "   there       ";
 888     if (returnVal == FALSE || test2 != expectedValue)
 889         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
 890
 891     expectedValue = test3;
 892     returnVal = test3.padTrailing(15);
 893     if (returnVal == TRUE || test3 != expectedValue)
 894         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
 895
 896     expectedValue = "hello";
 897     test4.setTo(test1).trim();
 898
 899     if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
 900         errln("trim(UnicodeString&) failed");
 901
 902     test1.trim();
 903     if (test1 != expectedValue)
 904         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
 905
 906     test2.trim();
 907     expectedValue = "there";
 908     if (test2 != expectedValue)
 909         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
 910
 911     test3.trim();
 912     expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
 913     if (test3 != expectedValue)
 914         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
 915
 916     returnVal = test1.truncate(15);
 917     expectedValue = "hello";
 918     if (returnVal == TRUE || test1 != expectedValue)
 919         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
 920
 921     returnVal = test2.truncate(15);
 922     expectedValue = "there";
 923     if (returnVal == TRUE || test2 != expectedValue)
 924         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
 925
 926     returnVal = test3.truncate(15);
 927     expectedValue = "Hi!  How ya doi";
 928     if (returnVal == FALSE || test3 != expectedValue)
 929         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
 930 }
 931
 932 void
 933 UnicodeStringTest::TestPrefixAndSuffix()
 934 {
 935     UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
 936     UnicodeString test2("Now");
 937     UnicodeString test3("country.");
 938     UnicodeString test4("count");
 939
 940     if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
 941         errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
 942     }
 943
 944     if (test1.startsWith(test3) ||
 945         test1.startsWith(test3.getBuffer(), test3.length()) ||
 946         test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
 947     ) {
 948         errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
 949     }
 950
 951     if (test1.endsWith(test2)) {
 952         errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
 953     }
 954
 955     if (!test1.endsWith(test3)) {
 956         errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 957     }
 958     if (!test1.endsWith(test3, 0, INT32_MAX)) {
 959         errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 960     }
 961
 962     if(!test1.endsWith(test3.getBuffer(), test3.length())) {
 963         errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 964     }
 965     if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
 966         errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
 967     }
 968
 969     if (!test3.startsWith(test4)) {
 970         errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
 971     }
 972
 973     if (test4.startsWith(test3)) {
 974         errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
 975     }
 976 }
 977
 978 void
 979 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
 980     UnicodeString test("abcde");
 981     const UChar ab[] = { 0x61, 0x62, 0 };
 982     const UChar de[] = { 0x64, 0x65, 0 };
 983     assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
 984     assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
 985     assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
 986     assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
 987 }
 988
 989 void
 990 UnicodeStringTest::TestFindAndReplace()
 991 {
 992     UnicodeString test1("One potato, two potato, three potato, four\n");
 993     UnicodeString test2("potato");
 994     UnicodeString test3("MISSISSIPPI");
 995
 996     UnicodeString expectedValue;
 997
 998     test1.findAndReplace(test2, test3);
 999     expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1000     if (test1 != expectedValue)
1001         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1002     test1.findAndReplace(2, 32, test3, test2);
1003     expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1004     if (test1 != expectedValue)
1005         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1006 }
1007
1008 void
1009 UnicodeStringTest::TestReverse()
1010 {
1011     UnicodeString test("backwards words say to used I");
1012
1013     test.reverse();
1014     test.reverse(2, 4);
1015     test.reverse(7, 2);
1016     test.reverse(10, 3);
1017     test.reverse(14, 5);
1018     test.reverse(20, 9);
1019
1020     if (test != "I used to say words backwards")
1021         errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1022             + test + "\"");
1023
1024     test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1025     test.reverse();
1026     if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1027         errln("reverse() failed with supplementary characters");
1028     }
1029
1030     // Test case for ticket #8091:
1031     // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1032     // an odd-length string that contains no other lead surrogates.
1033     test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1034     UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1035     test.reverse();
1036     if(test!=expected) {
1037         errln("reverse() failed with only lead surrogate in the middle");
1038     }
1039 }
1040
1041 void
1042 UnicodeStringTest::TestMiscellaneous()
1043 {
1044     UnicodeString   test1("This is a test");
1045     UnicodeString   test2("This is a test");
1046     UnicodeString   test3("Me too!");
1047
1048     // test getBuffer(minCapacity) and releaseBuffer()
1049     test1=UnicodeString(); // make sure that it starts with its stackBuffer
1050     UChar *p=test1.getBuffer(20);
1051     if(test1.getCapacity()<20) {
1052         errln("UnicodeString::getBuffer(20).getCapacity()<20");
1053     }
1054
1055     test1.append((UChar)7); // must not be able to modify the string here
1056     test1.setCharAt(3, 7);
1057     test1.reverse();
1058     if( test1.length()!=0 ||
1059         test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1060         test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1061     ) {
1062         errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1063     }
1064
1065     p[0]=1;
1066     p[1]=2;
1067     p[2]=3;
1068     test1.releaseBuffer(3);
1069     test1.append((UChar)4);
1070
1071     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1072         errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1073     }
1074
1075     // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1076     test1.releaseBuffer(1);
1077     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1078         errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1079     }
1080
1081     // test getBuffer(const)
1082     const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1083     if( test1.length()!=4 ||
1084         q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1085         r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1086     ) {
1087         errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1088     }
1089
1090     // test releaseBuffer() with a NUL-terminated buffer
1091     test1.getBuffer(20)[2]=0;
1092     test1.releaseBuffer(); // implicit -1
1093     if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1094         errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1095     }
1096
1097     // test releaseBuffer() with a non-NUL-terminated buffer
1098     p=test1.getBuffer(256);
1099     for(int32_t i=0; i<test1.getCapacity(); ++i) {
1100         p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1101     }
1102     test1.releaseBuffer();  // implicit -1
1103     if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1104         errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1105     }
1106
1107     // test getTerminatedBuffer()
1108     test1=UnicodeString("This is another test.", "");
1109     test2=UnicodeString("This is another test.", "");
1110     q=test1.getTerminatedBuffer();
1111     if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1112         errln("getTerminatedBuffer()[length]!=0");
1113     }
1114
1115     const UChar u[]={ 5, 6, 7, 8, 0 };
1116     test1.setTo(FALSE, u, 3);
1117     q=test1.getTerminatedBuffer();
1118     if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1119         errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1120     }
1121
1122     test1.setTo(TRUE, u, -1);
1123     q=test1.getTerminatedBuffer();
1124     if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1125         errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1126     }
1127
1128     test1=UNICODE_STRING("la", 2);
1129     test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
1130     if(test1!=UNICODE_STRING("la lila", 7)) {
1131         errln("UnicodeString::append(const UChar *, start, length) failed");
1132     }
1133
1134     test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX);
1135     if(test1!=UNICODE_STRING("la dudum lila", 13)) {
1136         errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
1137     }
1138
1139     static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1140     test1.insert(9, ucs, -1);
1141     if(test1!=UNICODE_STRING("la dudum hm lila", 16)) {
1142         errln("UnicodeString::insert(start, const UChar *, length) failed");
1143     }
1144
1145     test1.replace(9, 2, (UChar)0x2b);
1146     if(test1!=UNICODE_STRING("la dudum + lila", 15)) {
1147         errln("UnicodeString::replace(start, length, UChar) failed");
1148     }
1149
1150     if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1151         errln("UnicodeString::hasMetaData() returns TRUE");
1152     }
1153
1154     // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1155     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1156     test1.truncate(36);  // ensure length()<getCapacity()
1157     test2=test1;  // share the buffer
1158     test1.truncate(5);
1159     if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1160         errln("UnicodeString(shared buffer).truncate() failed");
1161     }
1162     if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1163         errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1164               "modified another copy of the string!");
1165     }
1166     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1167     test1.truncate(36);  // ensure length()<getCapacity()
1168     test2=test1;  // share the buffer
1169     test1.remove();
1170     if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1171         errln("UnicodeString(shared buffer).remove() failed");
1172     }
1173     if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1174         errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1175               "modified another copy of the string!");
1176     }
1177 }
1178
1179 void
1180 UnicodeStringTest::TestStackAllocation()
1181 {
1182     UChar           testString[] ={
1183         0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1184     UChar           guardWord = 0x4DED;
1185     UnicodeString*  test = 0;
1186
1187     test = new  UnicodeString(testString);
1188     if (*test != "This is a crazy test.")
1189         errln("Test string failed to initialize properly.");
1190     if (guardWord != 0x04DED)
1191         errln("Test string initialization overwrote guard word!");
1192
1193     test->insert(8, "only ");
1194     test->remove(15, 6);
1195     if (*test != "This is only a test.")
1196         errln("Manipulation of test string failed to work right.");
1197     if (guardWord != 0x4DED)
1198         errln("Manipulation of test string overwrote guard word!");
1199
1200     // we have to deinitialize and release the backing store by calling the destructor
1201     // explicitly, since we can't overload operator delete
1202     delete test;
1203
1204     UChar workingBuffer[] = {
1205         0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1206         0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1207         0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1208         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1209         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1210     UChar guardWord2 = 0x4DED;
1211
1212     test = new UnicodeString(workingBuffer, 35, 100);
1213     if (*test != "Now is the time for all men to come")
1214         errln("Stack-allocated backing store failed to initialize correctly.");
1215     if (guardWord2 != 0x4DED)
1216         errln("Stack-allocated backing store overwrote guard word!");
1217
1218     test->insert(24, "good ");
1219     if (*test != "Now is the time for all good men to come")
1220         errln("insert() on stack-allocated UnicodeString didn't work right");
1221     if (guardWord2 != 0x4DED)
1222         errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1223
1224     if (workingBuffer[24] != 0x67)
1225         errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1226
1227     *test += " to the aid of their country.";
1228     if (*test != "Now is the time for all good men to come to the aid of their country.")
1229         errln("Stack-allocated UnicodeString overflow didn't work");
1230     if (guardWord2 != 0x4DED)
1231         errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1232
1233     *test = "ha!";
1234     if (*test != "ha!")
1235         errln("Assignment to stack-allocated UnicodeString didn't work");
1236     if (workingBuffer[0] != 0x4e)
1237         errln("Change to UnicodeString after overflow are still affecting original buffer");
1238     if (guardWord2 != 0x4DED)
1239         errln("Change to UnicodeString after overflow overwrote guard word!");
1240
1241     // test read-only aliasing with setTo()
1242     workingBuffer[0] = 0x20ac;
1243     workingBuffer[1] = 0x125;
1244     workingBuffer[2] = 0;
1245     test->setTo(TRUE, workingBuffer, 2);
1246     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1247         errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1248     }
1249
1250     UnicodeString *c=(UnicodeString *)test->clone();
1251
1252     workingBuffer[1] = 0x109;
1253     if(test->charAt(1) != 0x109) {
1254         errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1255     }
1256
1257     if(c->length() != 2 || c->charAt(1) != 0x125) {
1258         errln("clone(alias) did not copy the buffer");
1259     }
1260     delete c;
1261
1262     test->setTo(TRUE, workingBuffer, -1);
1263     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1264         errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1265     }
1266
1267     test->setTo(FALSE, workingBuffer, -1);
1268     if(!test->isBogus()) {
1269         errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1270     }
1271
1272     delete test;
1273
1274     test=new UnicodeString();
1275     UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1276     test->setTo(buffer, 4, 10);
1277     if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1278         test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1279         errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1280     }
1281     delete test;
1282
1283
1284     // test the UChar32 constructor
1285     UnicodeString c32Test((UChar32)0x10ff2a);
1286     if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1287         c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1288     ) {
1289         errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1290     }
1291
1292     // test the (new) capacity constructor
1293     UnicodeString capTest(5, (UChar32)0x2a, 5);
1294     if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1295         capTest.char32At(0) != 0x2a ||
1296         capTest.char32At(4) != 0x2a
1297     ) {
1298         errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1299     }
1300
1301     capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1302     if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1303         capTest.char32At(0) != 0x10ff2a ||
1304         capTest.char32At(4) != 0x10ff2a
1305     ) {
1306         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1307     }
1308
1309     capTest = UnicodeString(5, (UChar32)0, 0);
1310     if(capTest.length() != 0) {
1311         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1312     }
1313 }
1314
1315 /**
1316  * Test the unescape() function.
1317  */
1318 void UnicodeStringTest::TestUnescape(void) {
1319     UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1320     UnicodeString OUT("abc");
1321     OUT.append((UChar)0x4567);
1322     OUT.append(" ");
1323     OUT.append((UChar)0xA);
1324     OUT.append((UChar)0xD);
1325     OUT.append(" ");
1326     OUT.append((UChar32)0x00101234);
1327     OUT.append("xyz");
1328     OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1329     UnicodeString result = IN.unescape();
1330     if (result != OUT) {
1331         errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1332               prettify(result) + ", expected " +
1333               prettify(OUT));
1334     }
1335
1336     // test that an empty string is returned in case of an error
1337     if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1338         errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1339     }
1340 }
1341
1342 /* test code point counting functions --------------------------------------- */
1343
1344 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1345 static int32_t
1346 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1347     int32_t count=s.countChar32(start, length);
1348     return count>number;
1349 }
1350
1351 /* compare the real function against the reference */
1352 void
1353 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1354     if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1355         errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1356                 start, length, number, s.hasMoreChar32Than(start, length, number));
1357     }
1358 }
1359
1360 void
1361 UnicodeStringTest::TestCountChar32(void) {
1362     {
1363         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1364
1365         // test countChar32()
1366         // note that this also calls and tests u_countChar32(length>=0)
1367         if(
1368             s.countChar32()!=4 ||
1369             s.countChar32(1)!=4 ||
1370             s.countChar32(2)!=3 ||
1371             s.countChar32(2, 3)!=2 ||
1372             s.countChar32(2, 0)!=0
1373         ) {
1374             errln("UnicodeString::countChar32() failed");
1375         }
1376
1377         // NUL-terminate the string buffer and test u_countChar32(length=-1)
1378         const UChar *buffer=s.getTerminatedBuffer();
1379         if(
1380             u_countChar32(buffer, -1)!=4 ||
1381             u_countChar32(buffer+1, -1)!=4 ||
1382             u_countChar32(buffer+2, -1)!=3 ||
1383             u_countChar32(buffer+3, -1)!=3 ||
1384             u_countChar32(buffer+4, -1)!=2 ||
1385             u_countChar32(buffer+5, -1)!=1 ||
1386             u_countChar32(buffer+6, -1)!=0
1387         ) {
1388             errln("u_countChar32(length=-1) failed");
1389         }
1390
1391         // test u_countChar32() with bad input
1392         if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1393             errln("u_countChar32(bad input) failed (returned non-zero counts)");
1394         }
1395     }
1396
1397     /* test data and variables for hasMoreChar32Than() */
1398     static const UChar str[]={
1399         0x61, 0x62, 0xd800, 0xdc00,
1400         0xd801, 0xdc01, 0x63, 0xd802,
1401         0x64, 0xdc03, 0x65, 0x66,
1402         0xd804, 0xdc04, 0xd805, 0xdc05,
1403         0x67
1404     };
1405     UnicodeString string(str, LENGTHOF(str));
1406     int32_t start, length, number;
1407
1408     /* test hasMoreChar32Than() */
1409     for(length=string.length(); length>=0; --length) {
1410         for(start=0; start<=length; ++start) {
1411             for(number=-1; number<=((length-start)+2); ++number) {
1412                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1413             }
1414         }
1415     }
1416
1417     /* test hasMoreChar32Than() with pinning */
1418     for(start=-1; start<=string.length()+1; ++start) {
1419         for(number=-1; number<=((string.length()-start)+2); ++number) {
1420             _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1421         }
1422     }
1423
1424     /* test hasMoreChar32Than() with a bogus string */
1425     string.setToBogus();
1426     for(length=-1; length<=1; ++length) {
1427         for(start=-1; start<=length; ++start) {
1428             for(number=-1; number<=((length-start)+2); ++number) {
1429                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1430             }
1431         }
1432     }
1433 }
1434
1435 void
1436 UnicodeStringTest::TestBogus() {
1437     UnicodeString   test1("This is a test");
1438     UnicodeString   test2("This is a test");
1439     UnicodeString   test3("Me too!");
1440
1441     // test isBogus() and setToBogus()
1442     if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1443         errln("A string returned TRUE for isBogus()!");
1444     }
1445
1446     // NULL pointers are treated like empty strings
1447     // use other illegal arguments to make a bogus string
1448     test3.setTo(FALSE, test1.getBuffer(), -2);
1449     if(!test3.isBogus()) {
1450         errln("A bogus string returned FALSE for isBogus()!");
1451     }
1452     if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1453         errln("hashCode() failed");
1454     }
1455     if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1456         errln("bogus.getBuffer()!=0");
1457     }
1458     if (test1.indexOf(test3) != -1) {
1459         errln("bogus.indexOf() != -1");
1460     }
1461     if (test1.lastIndexOf(test3) != -1) {
1462         errln("bogus.lastIndexOf() != -1");
1463     }
1464     if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1465         errln("caseCompare() doesn't work with bogus strings");
1466     }
1467     if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1468         errln("compareCodePointOrder() doesn't work with bogus strings");
1469     }
1470
1471     // verify that non-assignment modifications fail and do not revive a bogus string
1472     test3.setToBogus();
1473     test3.append((UChar)0x61);
1474     if(!test3.isBogus() || test3.getBuffer()!=0) {
1475         errln("bogus.append('a') worked but must not");
1476     }
1477
1478     test3.setToBogus();
1479     test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1480     if(!test3.isBogus() || test3.getBuffer()!=0) {
1481         errln("bogus.findAndReplace() worked but must not");
1482     }
1483
1484     test3.setToBogus();
1485     test3.trim();
1486     if(!test3.isBogus() || test3.getBuffer()!=0) {
1487         errln("bogus.trim() revived bogus but must not");
1488     }
1489
1490     test3.setToBogus();
1491     test3.remove(1);
1492     if(!test3.isBogus() || test3.getBuffer()!=0) {
1493         errln("bogus.remove(1) revived bogus but must not");
1494     }
1495
1496     test3.setToBogus();
1497     if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1498         errln("bogus.setCharAt(0, 'b') worked but must not");
1499     }
1500
1501     test3.setToBogus();
1502     if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1503         errln("bogus.truncate(1) revived bogus but must not");
1504     }
1505
1506     // verify that assignments revive a bogus string
1507     test3.setToBogus();
1508     if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1509         errln("bogus.operator=() failed");
1510     }
1511
1512     test3.setToBogus();
1513     if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1514         errln("bogus.fastCopyFrom() failed");
1515     }
1516
1517     test3.setToBogus();
1518     if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1519         errln("bogus.setTo(UniStr) failed");
1520     }
1521
1522     test3.setToBogus();
1523     if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1524         errln("bogus.setTo(UniStr, 0) failed");
1525     }
1526
1527     test3.setToBogus();
1528     if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1529         errln("bogus.setTo(UniStr, 0, len) failed");
1530     }
1531
1532     test3.setToBogus();
1533     if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1534         errln("bogus.setTo(const UChar *, len) failed");
1535     }
1536
1537     test3.setToBogus();
1538     if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1539         errln("bogus.setTo(UChar) failed");
1540     }
1541
1542     test3.setToBogus();
1543     if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1544         errln("bogus.setTo(UChar32) failed");
1545     }
1546
1547     test3.setToBogus();
1548     if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1549         errln("bogus.setTo(readonly alias) failed");
1550     }
1551
1552     // writable alias to another string's buffer: very bad idea, just convenient for this test
1553     test3.setToBogus();
1554     if(!test3.isBogus() || test3.setTo((UChar *)test1.getBuffer(), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) {
1555         errln("bogus.setTo(writable alias) failed");
1556     }
1557
1558     // verify simple, documented ways to turn a bogus string into an empty one
1559     test3.setToBogus();
1560     if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1561         errln("bogus.operator=(UnicodeString()) failed");
1562     }
1563
1564     test3.setToBogus();
1565     if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1566         errln("bogus.setTo(UnicodeString()) failed");
1567     }
1568
1569     test3.setToBogus();
1570     if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1571         errln("bogus.remove() failed");
1572     }
1573
1574     test3.setToBogus();
1575     if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1576         errln("bogus.remove(0, INT32_MAX) failed");
1577     }
1578
1579     test3.setToBogus();
1580     if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1581         errln("bogus.truncate(0) failed");
1582     }
1583
1584     test3.setToBogus();
1585     if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1586         errln("bogus.setTo((UChar32)-1) failed");
1587     }
1588
1589     static const UChar nul=0;
1590
1591     test3.setToBogus();
1592     if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1593         errln("bogus.setTo(&nul, 0) failed");
1594     }
1595
1596     test3.setToBogus();
1597     if(!test3.isBogus() || test3.getBuffer()!=0) {
1598         errln("setToBogus() failed to make a string bogus");
1599     }
1600
1601     test3.setToBogus();
1602     if(test1.isBogus() || !(test1=test3).isBogus()) {
1603         errln("normal=bogus failed to make the left string bogus");
1604     }
1605
1606     // test that NULL primitive input string values are treated like
1607     // empty strings, not errors (bogus)
1608     test2.setTo((UChar32)0x10005);
1609     if(test2.insert(1, NULL, 1).length()!=2) {
1610         errln("UniStr.insert(...NULL...) should not modify the string but does");
1611     }
1612
1613     UErrorCode errorCode=U_ZERO_ERROR;
1614     UnicodeString
1615         test4((const UChar *)NULL),
1616         test5(TRUE, (const UChar *)NULL, 1),
1617         test6((UChar *)NULL, 5, 5),
1618         test7((const char *)NULL, 3, NULL, errorCode);
1619     if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1620         errln("a constructor set to bogus for a NULL input string, should be empty");
1621     }
1622
1623     test4.setTo(NULL, 3);
1624     test5.setTo(TRUE, (const UChar *)NULL, 1);
1625     test6.setTo((UChar *)NULL, 5, 5);
1626     if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1627         errln("a setTo() set to bogus for a NULL input string, should be empty");
1628     }
1629
1630     // test that bogus==bogus<any
1631     if(test1!=test3 || test1.compare(test3)!=0) {
1632         errln("bogus==bogus failed");
1633     }
1634
1635     test2.remove();
1636     if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1637         errln("bogus<empty failed");
1638     }
1639 }
1640
1641 // StringEnumeration ------------------------------------------------------- ***
1642 // most of StringEnumeration is tested elsewhere
1643 // this test improves code coverage
1644
1645 static const char *const
1646 testEnumStrings[]={
1647     "a",
1648     "b",
1649     "c",
1650     "this is a long string which helps us test some buffer limits",
1651     "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1652 };
1653
1654 class TestEnumeration : public StringEnumeration {
1655 public:
1656     TestEnumeration() : i(0) {}
1657
1658     virtual int32_t count(UErrorCode& /*status*/) const {
1659         return LENGTHOF(testEnumStrings);
1660     }
1661
1662     virtual const UnicodeString *snext(UErrorCode &status) {
1663         if(U_SUCCESS(status) && i<LENGTHOF(testEnumStrings)) {
1664             unistr=UnicodeString(testEnumStrings[i++], "");
1665             return &unistr;
1666         }
1667
1668         return NULL;
1669     }
1670
1671     virtual void reset(UErrorCode& /*status*/) {
1672         i=0;
1673     }
1674
1675     static inline UClassID getStaticClassID() {
1676         return (UClassID)&fgClassID;
1677     }
1678     virtual UClassID getDynamicClassID() const {
1679         return getStaticClassID();
1680     }
1681
1682 private:
1683     static const char fgClassID;
1684
1685     int32_t i, length;
1686 };
1687
1688 const char TestEnumeration::fgClassID=0;
1689
1690 void
1691 UnicodeStringTest::TestStringEnumeration() {
1692     UnicodeString s;
1693     TestEnumeration ten;
1694     int32_t i, length;
1695     UErrorCode status;
1696
1697     const UChar *pu;
1698     const char *pc;
1699
1700     // test the next() default implementation and ensureCharsCapacity()
1701     for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
1702         status=U_ZERO_ERROR;
1703         pc=ten.next(&length, status);
1704         s=UnicodeString(testEnumStrings[i], "");
1705         if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1706             errln("StringEnumeration.next(%d) failed", i);
1707         }
1708     }
1709     status=U_ZERO_ERROR;
1710     if(ten.next(&length, status)!=NULL) {
1711         errln("StringEnumeration.next(done)!=NULL");
1712     }
1713
1714     // test the unext() default implementation
1715     ten.reset(status);
1716     for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
1717         status=U_ZERO_ERROR;
1718         pu=ten.unext(&length, status);
1719         s=UnicodeString(testEnumStrings[i], "");
1720         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1721             errln("StringEnumeration.unext(%d) failed", i);
1722         }
1723     }
1724     status=U_ZERO_ERROR;
1725     if(ten.unext(&length, status)!=NULL) {
1726         errln("StringEnumeration.unext(done)!=NULL");
1727     }
1728
1729     // test that the default clone() implementation works, and returns NULL
1730     if(ten.clone()!=NULL) {
1731         errln("StringEnumeration.clone()!=NULL");
1732     }
1733
1734     // test that uenum_openFromStringEnumeration() works
1735     // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1736     StringEnumeration *newTen = new TestEnumeration;
1737     status=U_ZERO_ERROR;
1738     UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1739     if (uten==NULL || U_FAILURE(status)) {
1740         errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1741         return;
1742     }
1743
1744     // test  uenum_next()
1745     for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
1746         status=U_ZERO_ERROR;
1747         pc=uenum_next(uten, &length, &status);
1748         if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1749             errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1750         }
1751     }
1752     status=U_ZERO_ERROR;
1753     if(uenum_next(uten, &length, &status)!=NULL) {
1754         errln("File %s, line %d, uenum_next(done)!=NULL");
1755     }
1756
1757     // test the uenum_unext()
1758     uenum_reset(uten, &status);
1759     for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
1760         status=U_ZERO_ERROR;
1761         pu=uenum_unext(uten, &length, &status);
1762         s=UnicodeString(testEnumStrings[i], "");
1763         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1764             errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1765         }
1766     }
1767     status=U_ZERO_ERROR;
1768     if(uenum_unext(uten, &length, &status)!=NULL) {
1769         errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1770     }
1771
1772     uenum_close(uten);
1773 }
1774
1775 /*
1776  * Namespace test, to make sure that macros like UNICODE_STRING include the
1777  * namespace qualifier.
1778  *
1779  * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1780  */
1781 namespace bogus {
1782     class UnicodeString {
1783     public:
1784         enum EInvariant { kInvariant };
1785         UnicodeString() : i(1) {}
1786         UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {}
1787         UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1788 ) : i(length) {}
1789     private:
1790         int32_t i;
1791     };
1792 }
1793
1794 void
1795 UnicodeStringTest::TestNameSpace() {
1796     // Provoke name collision unless the UnicodeString macros properly
1797     // qualify the icu::UnicodeString class.
1798     using namespace bogus;
1799
1800     // Use all UnicodeString macros from unistr.h.
1801     icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1802     icu::UnicodeString s2=UNICODE_STRING("def", 3);
1803     icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1804
1805     // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1806     icu::UnicodeString s4=s1+s2+s3;
1807     if(s4.length()!=9) {
1808         errln("Something wrong with UnicodeString::operator+().");
1809     }
1810 }
1811
1812 void
1813 UnicodeStringTest::TestUTF32() {
1814     // Input string length US_STACKBUF_SIZE to cause overflow of the
1815     // initially chosen fStackBuffer due to supplementary characters.
1816     static const UChar32 utf32[] = {
1817         0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1818         0x10000, 0x20000, 0xe0000, 0x10ffff
1819     };
1820     static const UChar expected_utf16[] = {
1821         0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1822         0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1823     };
1824     UnicodeString from32 = UnicodeString::fromUTF32(utf32, LENGTHOF(utf32));
1825     UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16));
1826     if(from32 != expected) {
1827         errln("UnicodeString::fromUTF32() did not create the expected string.");
1828     }
1829
1830     static const UChar utf16[] = {
1831         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1832     };
1833     static const UChar32 expected_utf32[] = {
1834         0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1835     };
1836     UChar32 result32[16];
1837     UErrorCode errorCode = U_ZERO_ERROR;
1838     int32_t length32 =
1839         UnicodeString(FALSE, utf16, LENGTHOF(utf16)).
1840         toUTF32(result32, LENGTHOF(result32), errorCode);
1841     if( length32 != LENGTHOF(expected_utf32) ||
1842         0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1843         result32[length32] != 0
1844     ) {
1845         errln("UnicodeString::toUTF32() did not create the expected string.");
1846     }
1847 }
1848
1849 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1850 public:
1851     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1852             : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
1853     virtual void Flush() { calledFlush = TRUE; }
1854     UBool calledFlush;
1855 };
1856
1857 void
1858 UnicodeStringTest::TestUTF8() {
1859     static const uint8_t utf8[] = {
1860         // Code points:
1861         // 0x41, 0xd900,
1862         // 0x61, 0xdc00,
1863         // 0x110000, 0x5a,
1864         // 0x50000, 0x7a,
1865         // 0x10000, 0x20000,
1866         // 0xe0000, 0x10ffff
1867         0x41, 0xed, 0xa4, 0x80,
1868         0x61, 0xed, 0xb0, 0x80,
1869         0xf4, 0x90, 0x80, 0x80, 0x5a,
1870         0xf1, 0x90, 0x80, 0x80, 0x7a,
1871         0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1872         0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1873     };
1874     static const UChar expected_utf16[] = {
1875         0x41, 0xfffd,
1876         0x61, 0xfffd,
1877         0xfffd, 0x5a,
1878         0xd900, 0xdc00, 0x7a,
1879         0xd800, 0xdc00, 0xd840, 0xdc00,
1880         0xdb40, 0xdc00, 0xdbff, 0xdfff
1881     };
1882     UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1883     UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16));
1884
1885     if(from8 != expected) {
1886         errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1887     }
1888 #if U_HAVE_STD_STRING
1889     std::string utf8_string((const char *)utf8, sizeof(utf8));
1890     UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1891     if(from8b != expected) {
1892         errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1893     }
1894 #endif
1895
1896     static const UChar utf16[] = {
1897         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1898     };
1899     static const uint8_t expected_utf8[] = {
1900         0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1901         0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1902     };
1903     UnicodeString us(FALSE, utf16, LENGTHOF(utf16));
1904
1905     char buffer[64];
1906     TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1907     us.toUTF8(sink);
1908     if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1909         0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1910     ) {
1911         errln("UnicodeString::toUTF8() did not create the expected string.");
1912     }
1913     if(!sink.calledFlush) {
1914         errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1915     }
1916 #if U_HAVE_STD_STRING
1917     // Initial contents for testing that toUTF8String() appends.
1918     std::string result8 = "-->";
1919     std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1920     // Use the return value just for testing.
1921     std::string &result8r = us.toUTF8String(result8);
1922     if(result8r != expected8 || &result8r != &result8) {
1923         errln("UnicodeString::toUTF8String() did not create the expected string.");
1924     }
1925 #endif
1926 }
1927
1928 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
1929 static UnicodeString wrapUChars(const UChar *uchars) {
1930     return UnicodeString(TRUE, uchars, -1);
1931 }
1932
1933 void
1934 UnicodeStringTest::TestReadOnlyAlias() {
1935     UChar uchars[]={ 0x61, 0x62, 0 };
1936     UnicodeString alias(TRUE, uchars, 2);
1937     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1938         errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1939         return;
1940     }
1941     alias.truncate(1);
1942     if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1943         errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1944     }
1945     if(alias.getTerminatedBuffer()==uchars) {
1946         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1947               "did not allocate and copy as expected.");
1948     }
1949     if(uchars[1]!=0x62) {
1950         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1951               "modified the original buffer.");
1952     }
1953     if(1!=u_strlen(alias.getTerminatedBuffer())) {
1954         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1955               "does not return a buffer terminated at the proper length.");
1956     }
1957
1958     alias.setTo(TRUE, uchars, 2);
1959     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1960         errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1961         return;
1962     }
1963     alias.remove();
1964     if(alias.length()!=0) {
1965         errln("UnicodeString(read-only-alias).remove() did not work.");
1966     }
1967     if(alias.getTerminatedBuffer()==uchars) {
1968         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1969               "did not un-alias as expected.");
1970     }
1971     if(uchars[0]!=0x61) {
1972         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1973               "modified the original buffer.");
1974     }
1975     if(0!=u_strlen(alias.getTerminatedBuffer())) {
1976         errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1977               "does not return a buffer terminated at length 0.");
1978     }
1979
1980     UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1981     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1982     alias.remove(0, 10);
1983     if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1984         errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1985     }
1986     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1987     alias.remove(27, 99);
1988     if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1989         errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1990     }
1991     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1992     alias.retainBetween(6, 30);
1993     if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
1994         errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
1995     }
1996
1997     UChar abc[]={ 0x61, 0x62, 0x63, 0 };
1998     UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
1999
2000     UnicodeString temp;
2001     temp.fastCopyFrom(longString.tempSubString());
2002     if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2003         errln("UnicodeString.tempSubString() failed");
2004     }
2005     temp.fastCopyFrom(longString.tempSubString(-3, 5));
2006     if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2007         errln("UnicodeString.tempSubString(-3, 5) failed");
2008     }
2009     temp.fastCopyFrom(longString.tempSubString(17));
2010     if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2011         errln("UnicodeString.tempSubString(17) failed");
2012     }
2013     temp.fastCopyFrom(longString.tempSubString(99));
2014     if(!temp.isEmpty()) {
2015         errln("UnicodeString.tempSubString(99) failed");
2016     }
2017     temp.fastCopyFrom(longString.tempSubStringBetween(6));
2018     if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2019         errln("UnicodeString.tempSubStringBetween(6) failed");
2020     }
2021     temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2022     if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2023         errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2024     }
2025     UnicodeString bogusString;
2026     bogusString.setToBogus();
2027     temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2028     if(!temp.isBogus()) {
2029         errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2030     }
2031 }
2032
2033 void
2034 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2035     static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2036     static const UChar fg[3]={ 0x66, 0x67, 0 };
2037     if(!app.reserveAppendCapacity(12)) {
2038         errln("Appendable.reserve(12) failed");
2039     }
2040     app.appendCodeUnit(0x61);
2041     app.appendCodePoint(0x62);
2042     app.appendCodePoint(0x50000);
2043     app.appendString(cde, 3);
2044     app.appendString(fg, -1);
2045     UChar scratch[3];
2046     int32_t capacity=-1;
2047     UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2048     if(capacity<3) {
2049         errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2050         return;
2051     }
2052     static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2053     u_memcpy(buffer, hij, 3);
2054     app.appendString(buffer, 3);
2055     if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2056         errln("Appendable.append(...) failed");
2057     }
2058     buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2059     if(buffer!=NULL || capacity!=0) {
2060         errln("Appendable.getAppendBuffer(min=0) failed");
2061     }
2062     capacity=1;
2063     buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2064     if(buffer!=NULL || capacity!=0) {
2065         errln("Appendable.getAppendBuffer(scratch<min) failed");
2066     }
2067 }
2068
2069 class SimpleAppendable : public Appendable {
2070 public:
2071     explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
2072     virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
2073     SimpleAppendable &reset() { str.remove(); return *this; }
2074 private:
2075     UnicodeString &str;
2076 };
2077
2078 void
2079 UnicodeStringTest::TestAppendable() {
2080     UnicodeString dest;
2081     SimpleAppendable app(dest);
2082     doTestAppendable(dest, app);
2083 }
2084
2085 void
2086 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2087     UnicodeString dest;
2088     UnicodeStringAppendable app(dest);
2089     doTestAppendable(dest, app);
2090 }
2091
2092 void
2093 UnicodeStringTest::TestSizeofUnicodeString() {
2094     // See the comments in unistr.h near the declaration of UnicodeString's fields.
2095     size_t sizeofUniStr=sizeof(UnicodeString);
2096     size_t expected;
2097     switch(sizeof(void *)) {
2098     case 4:
2099         expected=32;
2100         break;
2101     case 8:
2102         expected=40;
2103         break;
2104     default:
2105         logln("This platform has neither 32-bit nor 64-bit pointers.");
2106         return;
2107     }
2108     if(expected!=sizeofUniStr) {
2109         errln("sizeof(UnicodeString)=%d, expected %d", (int)sizeofUniStr, (int)expected);
2110     }
2111 }