icuSources/test/intltest/strcase.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 *******************************************************************************
   5 *
   6 *   Copyright (C) 2002-2016, International Business Machines
   7 *   Corporation and others.  All Rights Reserved.
   8 *
   9 *******************************************************************************
  10 *   file name:  strcase.cpp
  11 *   encoding:   UTF-8
  12 *   tab size:   8 (not used)
  13 *   indentation:4
  14 *
  15 *   created on: 2002mar12
  16 *   created by: Markus W. Scherer
  17 *
  18 *   Test file for string casing C++ API functions.
  19 */
  20
  21 #include "unicode/std_string.h"
  22 #include "unicode/casemap.h"
  23 #include "unicode/edits.h"
  24 #include "unicode/uchar.h"
  25 #include "unicode/ures.h"
  26 #include "unicode/uloc.h"
  27 #include "unicode/locid.h"
  28 #include "unicode/ubrk.h"
  29 #include "unicode/unistr.h"
  30 #include "unicode/ucasemap.h"
  31 #include "ucase.h"
  32 #include "ustrtest.h"
  33 #include "unicode/tstdtmod.h"
  34 #include "cmemory.h"
  35
  36 struct EditChange {
  37     UBool change;
  38     int32_t oldLength, newLength;
  39 };
  40
  41 class StringCaseTest: public IntlTest {
  42 public:
  43     StringCaseTest();
  44     virtual ~StringCaseTest();
  45
  46     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
  47
  48     void TestCaseConversion();
  49
  50     void TestCasingImpl(const UnicodeString &input,
  51                         const UnicodeString &output,
  52                         int32_t whichCase,
  53                         void *iter, const char *localeID, uint32_t options);
  54     void TestCasing();
  55     void TestFullCaseFoldingIterator();
  56     void TestGreekUpper();
  57     void TestLongUpper();
  58     void TestMalformedUTF8();
  59     void TestBufferOverflow();
  60     void TestEdits();
  61     void TestCaseMapWithEdits();
  62     void TestCaseMapUTF8WithEdits();
  63     void TestLongUnicodeString();
  64     void TestBug13127();
  65
  66 private:
  67     void assertGreekUpper(const char16_t *s, const char16_t *expected);
  68     void checkEditsIter(
  69         const UnicodeString &name, Edits::Iterator ei1, Edits::Iterator ei2,  // two equal iterators
  70         const EditChange expected[], int32_t expLength, UBool withUnchanged,
  71         UErrorCode &errorCode);
  72
  73     Locale GREEK_LOCALE_;
  74 };
  75
  76 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
  77
  78 StringCaseTest::~StringCaseTest() {}
  79
  80 extern IntlTest *createStringCaseTest() {
  81     return new StringCaseTest();
  82 }
  83
  84 void
  85 StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
  86     if(exec) {
  87         logln("TestSuite StringCaseTest: ");
  88     }
  89     TESTCASE_AUTO_BEGIN;
  90     TESTCASE_AUTO(TestCaseConversion);
  91 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
  92     TESTCASE_AUTO(TestCasing);
  93     TESTCASE_AUTO(TestBug13127);
  94 #endif
  95     TESTCASE_AUTO(TestFullCaseFoldingIterator);
  96     TESTCASE_AUTO(TestGreekUpper);
  97     TESTCASE_AUTO(TestLongUpper);
  98     TESTCASE_AUTO(TestMalformedUTF8);
  99     TESTCASE_AUTO(TestBufferOverflow);
 100     TESTCASE_AUTO(TestEdits);
 101     TESTCASE_AUTO(TestCaseMapWithEdits);
 102     TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
 103     TESTCASE_AUTO(TestLongUnicodeString);
 104     TESTCASE_AUTO_END;
 105 }
 106
 107 void
 108 StringCaseTest::TestCaseConversion()
 109 {
 110     static const UChar uppercaseGreek[] =
 111         { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
 112         0x39f, 0x3a3, 0 };
 113         // "IESUS CHRISTOS"
 114
 115     static const UChar lowercaseGreek[] =
 116         { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
 117         0x3bf, 0x3c2, 0 };
 118         // "iesus christos"
 119
 120     static const UChar lowercaseTurkish[] =
 121         { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
 122         0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
 123
 124     static const UChar uppercaseTurkish[] =
 125         { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
 126         0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
 127
 128     UnicodeString expectedResult;
 129     UnicodeString   test3;
 130
 131     test3 += (UChar32)0x0130;
 132     test3 += "STANBUL, NOT CONSTANTINOPLE!";
 133
 134     UnicodeString   test4(test3);
 135     test4.toLower(Locale(""));
 136     expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
 137     if (test4 != expectedResult)
 138         errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
 139
 140     test4 = test3;
 141     test4.toLower(Locale("tr", "TR"));
 142     expectedResult = lowercaseTurkish;
 143     if (test4 != expectedResult)
 144         errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
 145
 146     test3 = "topkap";
 147     test3 += (UChar32)0x0131;
 148     test3 += " palace, istanbul";
 149     test4 = test3;
 150
 151     test4.toUpper(Locale(""));
 152     expectedResult = "TOPKAPI PALACE, ISTANBUL";
 153     if (test4 != expectedResult)
 154         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
 155
 156     test4 = test3;
 157     test4.toUpper(Locale("tr", "TR"));
 158     expectedResult = uppercaseTurkish;
 159     if (test4 != expectedResult)
 160         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
 161
 162     test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
 163
 164     test3.toUpper(Locale("de", "DE"));
 165     expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
 166     if (test3 != expectedResult)
 167         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
 168
 169     test4.replace(0, test4.length(), uppercaseGreek);
 170
 171     test4.toLower(Locale("el", "GR"));
 172     expectedResult = lowercaseGreek;
 173     if (test4 != expectedResult)
 174         errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
 175
 176     test4.replace(0, test4.length(), lowercaseGreek);
 177
 178     test4.toUpper();
 179     expectedResult = uppercaseGreek;
 180     if (test4 != expectedResult)
 181         errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
 182
 183     // more string case mapping tests with the new implementation
 184     {
 185         static const UChar
 186
 187         beforeLower[]= { 0x61, 0x42, 0x49,  0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
 188         lowerRoot[]=   { 0x61, 0x62, 0x69,  0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
 189         lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
 190
 191         beforeUpper[]= { 0x61, 0x42, 0x69,  0x3c2, 0xdf,       0x3c3, 0x2f, 0xfb03,           0xfb03,           0xfb03,           0xd93f, 0xdfff },
 192         upperRoot[]=   { 0x41, 0x42, 0x49,  0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
 193         upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
 194
 195         beforeMiniUpper[]=  { 0xdf, 0x61 },
 196         miniUpper[]=        { 0x53, 0x53, 0x41 };
 197
 198         UnicodeString s;
 199
 200         /* lowercase with root locale */
 201         s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
 202         s.toLower("");
 203         if( s.length()!=UPRV_LENGTHOF(lowerRoot) ||
 204             s!=UnicodeString(FALSE, lowerRoot, s.length())
 205         ) {
 206             errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\"");
 207         }
 208
 209         /* lowercase with turkish locale */
 210         s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
 211         s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
 212         if( s.length()!=UPRV_LENGTHOF(lowerTurkish) ||
 213             s!=UnicodeString(FALSE, lowerTurkish, s.length())
 214         ) {
 215             errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\"");
 216         }
 217
 218         /* uppercase with root locale */
 219         s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
 220         s.setCharAt(0, beforeUpper[0]).toUpper(Locale(""));
 221         if( s.length()!=UPRV_LENGTHOF(upperRoot) ||
 222             s!=UnicodeString(FALSE, upperRoot, s.length())
 223         ) {
 224             errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\"");
 225         }
 226
 227         /* uppercase with turkish locale */
 228         s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
 229         s.toUpper(Locale("tr"));
 230         if( s.length()!=UPRV_LENGTHOF(upperTurkish) ||
 231             s!=UnicodeString(FALSE, upperTurkish, s.length())
 232         ) {
 233             errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\"");
 234         }
 235
 236         /* uppercase a short string with root locale */
 237         s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper));
 238         s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
 239         if( s.length()!=UPRV_LENGTHOF(miniUpper) ||
 240             s!=UnicodeString(FALSE, miniUpper, s.length())
 241         ) {
 242             errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\"");
 243         }
 244     }
 245
 246     // test some supplementary characters (>= Unicode 3.1)
 247     {
 248         UnicodeString t;
 249
 250         UnicodeString
 251             deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
 252             deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
 253             deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
 254         (t=deseretInput).toLower();
 255         if(t!=deseretLower) {
 256             errln("error lowercasing Deseret (plane 1) characters");
 257         }
 258         (t=deseretInput).toUpper();
 259         if(t!=deseretUpper) {
 260             errln("error uppercasing Deseret (plane 1) characters");
 261         }
 262     }
 263
 264     // test some more cases that looked like problems
 265     {
 266         UnicodeString t;
 267
 268         UnicodeString
 269             ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
 270             ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
 271             ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
 272         (t=ljInput).toLower("en");
 273         if(t!=ljLower) {
 274             errln("error lowercasing LJ characters");
 275         }
 276         (t=ljInput).toUpper("en");
 277         if(t!=ljUpper) {
 278             errln("error uppercasing LJ characters");
 279         }
 280     }
 281
 282 #if !UCONFIG_NO_NORMALIZATION
 283     // some context-sensitive casing depends on normalization data being present
 284
 285     // Unicode 3.1.1 SpecialCasing tests
 286     {
 287         UnicodeString t;
 288
 289         // sigmas preceded and/or followed by cased letters
 290         UnicodeString
 291             sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
 292             sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
 293             sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
 294
 295         (t=sigmas).toLower();
 296         if(t!=sigmasLower) {
 297             errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
 298         }
 299
 300         (t=sigmas).toUpper(Locale(""));
 301         if(t!=sigmasUpper) {
 302             errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
 303         }
 304
 305         // turkish & azerbaijani dotless i & dotted I
 306         // remove dot above if there was a capital I before and there are no more accents above
 307         UnicodeString
 308             dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
 309             dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
 310             dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
 311
 312         (t=dots).toLower("tr");
 313         if(t!=dotsTurkish) {
 314             errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
 315         }
 316
 317         (t=dots).toLower("de");
 318         if(t!=dotsDefault) {
 319             errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
 320         }
 321     }
 322
 323     // more Unicode 3.1.1 tests
 324     {
 325         UnicodeString t;
 326
 327         // lithuanian dot above in uppercasing
 328         UnicodeString
 329             dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
 330             dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
 331             dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
 332
 333         (t=dots).toUpper("lt");
 334         if(t!=dotsLithuanian) {
 335             errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
 336         }
 337
 338         (t=dots).toUpper("de");
 339         if(t!=dotsDefault) {
 340             errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
 341         }
 342
 343         // lithuanian adds dot above to i in lowercasing if there are more above accents
 344         UnicodeString
 345             i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
 346             iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
 347             iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
 348
 349         (t=i).toLower("lt");
 350         if(t!=iLithuanian) {
 351             errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
 352         }
 353
 354         (t=i).toLower("de");
 355         if(t!=iDefault) {
 356             errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
 357         }
 358     }
 359
 360 #endif
 361
 362     // test case folding
 363     {
 364         UnicodeString
 365             s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
 366             f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
 367             g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
 368             t;
 369
 370         (t=s).foldCase();
 371         if(f!=t) {
 372             errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
 373         }
 374
 375         // alternate handling for dotted I/dotless i (U+0130, U+0131)
 376         (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
 377         if(g!=t) {
 378             errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
 379         }
 380     }
 381 }
 382
 383 // data-driven case mapping tests ------------------------------------------ ***
 384
 385 enum {
 386     TEST_LOWER,
 387     TEST_UPPER,
 388     TEST_TITLE,
 389     TEST_FOLD,
 390     TEST_COUNT
 391 };
 392
 393 // names of TestData children in casing.txt
 394 static const char *const dataNames[TEST_COUNT+1]={
 395     "lowercasing",
 396     "uppercasing",
 397     "titlecasing",
 398     "casefolding",
 399     ""
 400 };
 401
 402 void
 403 StringCaseTest::TestCasingImpl(const UnicodeString &input,
 404                                const UnicodeString &output,
 405                                int32_t whichCase,
 406                                void *iter, const char *localeID, uint32_t options) {
 407     // UnicodeString
 408     UnicodeString result;
 409     const char *name;
 410     Locale locale(localeID);
 411
 412     result=input;
 413     switch(whichCase) {
 414     case TEST_LOWER:
 415         name="toLower";
 416         result.toLower(locale);
 417         break;
 418     case TEST_UPPER:
 419         name="toUpper";
 420         result.toUpper(locale);
 421         break;
 422 #if !UCONFIG_NO_BREAK_ITERATION
 423     case TEST_TITLE:
 424         name="toTitle";
 425         result.toTitle((BreakIterator *)iter, locale, options);
 426         break;
 427 #endif
 428     case TEST_FOLD:
 429         name="foldCase";
 430         result.foldCase(options);
 431         break;
 432     default:
 433         name="";
 434         break; // won't happen
 435     }
 436     if(result!=output) {
 437         dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
 438     }
 439 #if !UCONFIG_NO_BREAK_ITERATION
 440     if(whichCase==TEST_TITLE && options==0) {
 441         result=input;
 442         result.toTitle((BreakIterator *)iter, locale);
 443         if(result!=output) {
 444             dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
 445         }
 446     }
 447 #endif
 448
 449     // UTF-8
 450     char utf8In[100], utf8Out[100];
 451     int32_t utf8InLength, utf8OutLength, resultLength;
 452     UChar *buffer;
 453
 454     IcuTestErrorCode errorCode(*this, "TestCasingImpl");
 455     LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
 456 #if !UCONFIG_NO_BREAK_ITERATION
 457     if(iter!=NULL) {
 458         // Clone the break iterator so that the UCaseMap can safely adopt it.
 459         UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
 460         ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
 461     }
 462 #endif
 463
 464     u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
 465     switch(whichCase) {
 466     case TEST_LOWER:
 467         name="ucasemap_utf8ToLower";
 468         utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
 469                     utf8Out, (int32_t)sizeof(utf8Out),
 470                     utf8In, utf8InLength, errorCode);
 471         break;
 472     case TEST_UPPER:
 473         name="ucasemap_utf8ToUpper";
 474         utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
 475                     utf8Out, (int32_t)sizeof(utf8Out),
 476                     utf8In, utf8InLength, errorCode);
 477         break;
 478 #if !UCONFIG_NO_BREAK_ITERATION
 479     case TEST_TITLE:
 480         name="ucasemap_utf8ToTitle";
 481         utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
 482                     utf8Out, (int32_t)sizeof(utf8Out),
 483                     utf8In, utf8InLength, errorCode);
 484         break;
 485 #endif
 486     case TEST_FOLD:
 487         name="ucasemap_utf8FoldCase";
 488         utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
 489                     utf8Out, (int32_t)sizeof(utf8Out),
 490                     utf8In, utf8InLength, errorCode);
 491         break;
 492     default:
 493         name="";
 494         utf8OutLength=0;
 495         break; // won't happen
 496     }
 497     buffer=result.getBuffer(utf8OutLength);
 498     u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
 499     result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);
 500
 501     if(errorCode.isFailure()) {
 502         errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
 503         errorCode.reset();
 504     } else if(result!=output) {
 505         errln("error: %s() got a wrong result for a test case from casing.res", name);
 506         errln("expected \"" + output + "\" got \"" + result + "\"" );
 507     }
 508 }
 509
 510 void
 511 StringCaseTest::TestCasing() {
 512     UErrorCode status = U_ZERO_ERROR;
 513 #if !UCONFIG_NO_BREAK_ITERATION
 514     LocalUBreakIteratorPointer iter;
 515 #endif
 516     char cLocaleID[100];
 517     UnicodeString locale, input, output, optionsString, result;
 518     uint32_t options;
 519     int32_t whichCase, type;
 520     LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status));
 521     if(U_SUCCESS(status)) {
 522         for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
 523 #if UCONFIG_NO_BREAK_ITERATION
 524             if(whichCase==TEST_TITLE) {
 525                 continue;
 526             }
 527 #endif
 528             LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status));
 529             if(U_FAILURE(status)) {
 530                 errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
 531                 break;
 532             }
 533             const DataMap *myCase = NULL;
 534             while(casingTest->nextCase(myCase, status)) {
 535                 input = myCase->getString("Input", status);
 536                 output = myCase->getString("Output", status);
 537
 538                 if(whichCase!=TEST_FOLD) {
 539                     locale = myCase->getString("Locale", status);
 540                 }
 541                 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
 542
 543 #if !UCONFIG_NO_BREAK_ITERATION
 544                 if(whichCase==TEST_TITLE) {
 545                     type = myCase->getInt("Type", status);
 546                     if(type>=0) {
 547                         iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status));
 548                     } else if(type==-2) {
 549                         // Open a trivial break iterator that only delivers { 0, length }
 550                         // or even just { 0 } as boundaries.
 551                         static const UChar rules[] = { 0x2e, 0x2a, 0x3b };  // ".*;"
 552                         UParseError parseError;
 553                         iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status));
 554                     }
 555                 }
 556 #endif
 557                 options = 0;
 558                 if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) {
 559                     optionsString = myCase->getString("Options", status);
 560                     if(optionsString.indexOf((UChar)0x54)>=0) {  // T
 561                         options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
 562                     }
 563                     if(optionsString.indexOf((UChar)0x4c)>=0) {  // L
 564                         options|=U_TITLECASE_NO_LOWERCASE;
 565                     }
 566                     if(optionsString.indexOf((UChar)0x41)>=0) {  // A
 567                         options|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
 568                     }
 569                 }
 570
 571                 if(U_FAILURE(status)) {
 572                     dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase],  u_errorName(status));
 573                     status = U_ZERO_ERROR;
 574                 } else {
 575 #if UCONFIG_NO_BREAK_ITERATION
 576                     LocalPointer<UMemory> iter;
 577 #endif
 578                     TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options);
 579                 }
 580
 581 #if !UCONFIG_NO_BREAK_ITERATION
 582                 iter.adoptInstead(NULL);
 583 #endif
 584             }
 585         }
 586     }
 587
 588 #if !UCONFIG_NO_BREAK_ITERATION
 589     // more tests for API coverage
 590     status=U_ZERO_ERROR;
 591     input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
 592     (result=input).toTitle(NULL);
 593     if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
 594         dataerrln("UnicodeString::toTitle(NULL) failed.");
 595     }
 596 #endif
 597 }
 598
 599 void
 600 StringCaseTest::TestFullCaseFoldingIterator() {
 601     UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
 602     UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
 603     FullCaseFoldingIterator iter;
 604     int32_t count=0;
 605     int32_t countSpecific=0;
 606     UChar32 c;
 607     UnicodeString full;
 608     while((c=iter.next(full))>=0) {
 609         ++count;
 610         // Check that the full Case_Folding has more than 1 code point.
 611         if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
 612             errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
 613             continue;
 614         }
 615         // Check that full == Case_Folding(c).
 616         UnicodeString cf(c);
 617         cf.foldCase();
 618         if(full!=cf) {
 619             errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
 620             continue;
 621         }
 622         // Spot-check a couple of specific cases.
 623         if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
 624             ++countSpecific;
 625         }
 626     }
 627     if(countSpecific!=3) {
 628         errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
 629     }
 630     if(count<70) {
 631         errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
 632     }
 633 }
 634
 635 void
 636 StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) {
 637     UnicodeString s16(s);
 638     UnicodeString expected16(expected);
 639     UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
 640     UnicodeString result16(s16);
 641     result16.toUpper(GREEK_LOCALE_);
 642     assertEquals(msg, expected16, result16);
 643
 644     msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap=";
 645     int32_t length = expected16.length();
 646     int32_t capacities[] = {
 647         // Keep in sync with the UTF-8 capacities near the bottom of this function.
 648         0, length / 2, length - 1, length, length + 1
 649     };
 650     for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
 651         int32_t cap = capacities[i];
 652         UChar *dest16 = result16.getBuffer(expected16.length() + 1);
 653         u_memset(dest16, 0x55AA, result16.getCapacity());
 654         UErrorCode errorCode = U_ZERO_ERROR;
 655         length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode);
 656         assertEquals(msg + cap, expected16.length(), length);
 657         UErrorCode expectedErrorCode;
 658         if (cap < expected16.length()) {
 659             expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
 660         } else if (cap == expected16.length()) {
 661             expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
 662         } else {
 663             expectedErrorCode = U_ZERO_ERROR;
 664             assertEquals(msg + cap + " NUL", 0, dest16[length]);
 665         }
 666         assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
 667         result16.releaseBuffer(length);
 668         if (cap >= expected16.length()) {
 669             assertEquals(msg + cap, expected16, result16);
 670         }
 671     }
 672
 673     UErrorCode errorCode = U_ZERO_ERROR;
 674     LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode));
 675     assertSuccess("ucasemap_open", errorCode);
 676     std::string s8;
 677     s16.toUTF8String(s8);
 678     msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")";
 679     char dest8[1000];
 680     length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8),
 681                                   s8.data(), s8.length(), &errorCode);
 682     assertSuccess("ucasemap_utf8ToUpper", errorCode);
 683     StringPiece result8(dest8, length);
 684     UnicodeString result16From8 = UnicodeString::fromUTF8(result8);
 685     assertEquals(msg, expected16, result16From8);
 686
 687     msg += " cap=";
 688     capacities[1] = length / 2;
 689     capacities[2] = length - 1;
 690     capacities[3] = length;
 691     capacities[4] = length + 1;
 692     char dest8b[1000];
 693     int32_t expected8Length = length;  // Assuming the previous call worked.
 694     for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
 695         int32_t cap = capacities[i];
 696         memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b));
 697         UErrorCode errorCode = U_ZERO_ERROR;
 698         length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap,
 699                                       s8.data(), s8.length(), &errorCode);
 700         assertEquals(msg + cap, expected8Length, length);
 701         UErrorCode expectedErrorCode;
 702         if (cap < expected8Length) {
 703             expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
 704         } else if (cap == expected8Length) {
 705             expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
 706         } else {
 707             expectedErrorCode = U_ZERO_ERROR;
 708             // Casts to int32_t to avoid matching UBool.
 709             assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]);
 710         }
 711         assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
 712         if (cap >= expected8Length) {
 713             assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length));
 714         }
 715     }
 716 }
 717
 718 void
 719 StringCaseTest::TestGreekUpper() {
 720     // http://bugs.icu-project.org/trac/ticket/5456
 721     assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
 722     // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
 723     // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
 724     assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ");
 725     assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
 726     assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
 727     assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
 728     assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ");
 729     assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ");
 730     assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
 731     // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
 732     assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
 733     assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
 734     // http://unicode.org/udhr/d/udhr_ell_polytonic.html
 735     assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
 736     assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
 737     // From Google bug report
 738     assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
 739     // http://crbug.com/234797
 740     assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
 741     assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
 742     assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
 743     // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
 744     assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ");
 745 }
 746
 747 void
 748 StringCaseTest::TestLongUpper() {
 749     if (quick) {
 750         logln("not exhaustive mode: skipping this test");
 751         return;
 752     }
 753     // Ticket #12663, crash with an extremely long string where
 754     // U+0390 maps to 0399 0308 0301 so that the result is three times as long
 755     // and overflows an int32_t.
 756     int32_t length = 0x40000004;  // more than 1G UChars
 757     UnicodeString s(length, (UChar32)0x390, length);
 758     UnicodeString result;
 759     UChar *dest = result.getBuffer(length + 1);
 760     if (s.isBogus() || dest == NULL) {
 761         logln("Out of memory, unable to run this test on this machine.");
 762         return;
 763     }
 764     IcuTestErrorCode errorCode(*this, "TestLongUpper");
 765     int32_t destLength = u_strToUpper(dest, result.getCapacity(),
 766                                       s.getBuffer(), s.length(), "", errorCode);
 767     result.releaseBuffer(destLength);
 768     if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) {
 769         errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
 770               errorCode.errorName(), (long)destLength);
 771     }
 772 }
 773
 774 void StringCaseTest::TestMalformedUTF8() {
 775     // ticket #12639
 776     IcuTestErrorCode errorCode(*this, "TestMalformedUTF8");
 777     LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
 778     if (errorCode.isFailure()) {
 779         errln("ucasemap_open(English) failed - %s", errorCode.errorName());
 780         return;
 781     }
 782     char src[1] = { (char)0x85 };  // malformed UTF-8
 783     char dest[3] = { 0, 0, 0 };
 784     int32_t destLength;
 785 #if !UCONFIG_NO_BREAK_ITERATION
 786     destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
 787     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
 788         errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
 789               errorCode.errorName(), (int)destLength, dest[0]);
 790     }
 791 #endif
 792
 793     errorCode.reset();
 794     dest[0] = 0;
 795     destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
 796     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
 797         errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
 798               errorCode.errorName(), (int)destLength, dest[0]);
 799     }
 800
 801     errorCode.reset();
 802     dest[0] = 0;
 803     destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
 804     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
 805         errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
 806               errorCode.errorName(), (int)destLength, dest[0]);
 807     }
 808
 809     errorCode.reset();
 810     dest[0] = 0;
 811     destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
 812     if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
 813         errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
 814               errorCode.errorName(), (int)destLength, dest[0]);
 815     }
 816 }
 817
 818 void StringCaseTest::TestBufferOverflow() {
 819     // Ticket #12849, incorrect result from Title Case preflight operation,
 820     // when buffer overflow error is expected.
 821     IcuTestErrorCode errorCode(*this, "TestBufferOverflow");
 822     LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode));
 823     if (errorCode.isFailure()) {
 824         errln("ucasemap_open(English) failed - %s", errorCode.errorName());
 825         return;
 826     }
 827
 828     UnicodeString data("hello world");
 829     int32_t result;
 830 #if !UCONFIG_NO_BREAK_ITERATION
 831     result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode);
 832     if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) {
 833         errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
 834               "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
 835               __FILE__, __LINE__, data.length(), errorCode.errorName(), result);
 836     }
 837 #endif
 838     errorCode.reset();
 839
 840     std::string data_utf8;
 841     data.toUTF8String(data_utf8);
 842 #if !UCONFIG_NO_BREAK_ITERATION
 843     result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), data_utf8.length(), errorCode);
 844     if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) {
 845         errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
 846               "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
 847               __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result);
 848     }
 849 #endif
 850     errorCode.reset();
 851 }
 852
 853 void StringCaseTest::checkEditsIter(
 854         const UnicodeString &name,
 855         Edits::Iterator ei1, Edits::Iterator ei2,  // two equal iterators
 856         const EditChange expected[], int32_t expLength, UBool withUnchanged,
 857         UErrorCode &errorCode) {
 858     assertFalse(name, ei2.findSourceIndex(-1, errorCode));
 859
 860     int32_t expSrcIndex = 0;
 861     int32_t expDestIndex = 0;
 862     int32_t expReplIndex = 0;
 863     for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
 864         const EditChange &expect = expected[expIndex];
 865         UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
 866         if (withUnchanged || expect.change) {
 867             assertTrue(msg, ei1.next(errorCode));
 868             assertEquals(msg, expect.change, ei1.hasChange());
 869             assertEquals(msg, expect.oldLength, ei1.oldLength());
 870             assertEquals(msg, expect.newLength, ei1.newLength());
 871             assertEquals(msg, expSrcIndex, ei1.sourceIndex());
 872             assertEquals(msg, expDestIndex, ei1.destinationIndex());
 873             assertEquals(msg, expReplIndex, ei1.replacementIndex());
 874         }
 875
 876         if (expect.oldLength > 0) {
 877             assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode));
 878             assertEquals(msg, expect.change, ei2.hasChange());
 879             assertEquals(msg, expect.oldLength, ei2.oldLength());
 880             assertEquals(msg, expect.newLength, ei2.newLength());
 881             assertEquals(msg, expSrcIndex, ei2.sourceIndex());
 882             assertEquals(msg, expDestIndex, ei2.destinationIndex());
 883             assertEquals(msg, expReplIndex, ei2.replacementIndex());
 884             if (!withUnchanged) {
 885                 // For some iterators, move past the current range
 886                 // so that findSourceIndex() has to look before the current index.
 887                 ei2.next(errorCode);
 888                 ei2.next(errorCode);
 889             }
 890         }
 891
 892         expSrcIndex += expect.oldLength;
 893         expDestIndex += expect.newLength;
 894         if (expect.change) {
 895             expReplIndex += expect.newLength;
 896         }
 897     }
 898     // TODO: remove casts from u"" when merging into trunk
 899     UnicodeString msg = UnicodeString(name).append(u" end");
 900     assertFalse(msg, ei1.next(errorCode));
 901     assertFalse(msg, ei1.hasChange());
 902     assertEquals(msg, 0, ei1.oldLength());
 903     assertEquals(msg, 0, ei1.newLength());
 904     assertEquals(msg, expSrcIndex, ei1.sourceIndex());
 905     assertEquals(msg, expDestIndex, ei1.destinationIndex());
 906     assertEquals(msg, expReplIndex, ei1.replacementIndex());
 907
 908     assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode));
 909 }
 910
 911 void StringCaseTest::TestEdits() {
 912     IcuTestErrorCode errorCode(*this, "TestEdits");
 913     Edits edits;
 914     assertFalse("new Edits", edits.hasChanges());
 915     assertEquals("new Edits", 0, edits.lengthDelta());
 916     edits.addUnchanged(1);  // multiple unchanged ranges are combined
 917     edits.addUnchanged(10000);  // too long, and they are split
 918     edits.addReplace(0, 0);
 919     edits.addUnchanged(2);
 920     assertFalse("unchanged 10003", edits.hasChanges());
 921     assertEquals("unchanged 10003", 0, edits.lengthDelta());
 922     edits.addReplace(1, 1);  // multiple short equal-length edits are compressed
 923     edits.addUnchanged(0);
 924     edits.addReplace(1, 1);
 925     edits.addReplace(1, 1);
 926     edits.addReplace(0, 10);
 927     edits.addReplace(100, 0);
 928     edits.addReplace(3000, 4000);  // variable-length encoding
 929     edits.addReplace(100000, 100000);
 930     assertTrue("some edits", edits.hasChanges());
 931     assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
 932     UErrorCode outErrorCode = U_ZERO_ERROR;
 933     assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
 934
 935     static const EditChange coarseExpectedChanges[] = {
 936             { FALSE, 10003, 10003 },
 937             { TRUE, 103103, 104013 }
 938     };
 939     checkEditsIter(u"coarse",
 940             edits.getCoarseIterator(), edits.getCoarseIterator(),
 941             coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
 942     checkEditsIter(u"coarse changes",
 943             edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
 944             coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
 945
 946     static const EditChange fineExpectedChanges[] = {
 947             { FALSE, 10003, 10003 },
 948             { TRUE, 1, 1 },
 949             { TRUE, 1, 1 },
 950             { TRUE, 1, 1 },
 951             { TRUE, 0, 10 },
 952             { TRUE, 100, 0 },
 953             { TRUE, 3000, 4000 },
 954             { TRUE, 100000, 100000 }
 955     };
 956     checkEditsIter(u"fine",
 957             edits.getFineIterator(), edits.getFineIterator(),
 958             fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
 959     checkEditsIter(u"fine changes",
 960             edits.getFineChangesIterator(), edits.getFineChangesIterator(),
 961             fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
 962
 963     edits.reset();
 964     assertFalse("reset", edits.hasChanges());
 965     assertEquals("reset", 0, edits.lengthDelta());
 966     Edits::Iterator ei = edits.getCoarseChangesIterator();
 967     assertFalse("reset then iterator", ei.next(errorCode));
 968 }
 969
 970 void StringCaseTest::TestCaseMapWithEdits() {
 971     IcuTestErrorCode errorCode(*this, "TestEdits");
 972     UChar dest[20];
 973     Edits edits;
 974
 975     int32_t length = CaseMap::toLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT,
 976                                       u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
 977     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
 978     static const EditChange lowerExpectedChanges[] = {
 979             { TRUE, 1, 1 },
 980             { FALSE, 4, 4 },
 981             { TRUE, 1, 1 },
 982             { FALSE, 2, 2 }
 983     };
 984     checkEditsIter(u"toLower(IstanBul)",
 985             edits.getFineIterator(), edits.getFineIterator(),
 986             lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
 987             TRUE, errorCode);
 988
 989     edits.reset();
 990     length = CaseMap::toUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT,
 991                               u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
 992     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
 993     static const EditChange upperExpectedChanges[] = {
 994             { FALSE, 1, 1 },
 995             { TRUE, 1, 1 },
 996             { TRUE, 1, 1 },
 997             { TRUE, 1, 1 },
 998             { TRUE, 1, 1 },
 999             { TRUE, 1, 1 }
1000     };
1001     checkEditsIter(u"toUpper(Πατάτα)",
1002             edits.getFineIterator(), edits.getFineIterator(),
1003             upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1004             TRUE, errorCode);
1005
1006     edits.reset();
1007
1008 #if !UCONFIG_NO_BREAK_ITERATION
1009     length = CaseMap::toTitle("nl",
1010                               UCASEMAP_OMIT_UNCHANGED_TEXT |
1011                               U_TITLECASE_NO_BREAK_ADJUSTMENT |
1012                               U_TITLECASE_NO_LOWERCASE,
1013                               NULL, u"IjssEL IglOo", 12,
1014                               dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1015     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1016     static const EditChange titleExpectedChanges[] = {
1017             { FALSE, 1, 1 },
1018             { TRUE, 1, 1 },
1019             { FALSE, 10, 10 }
1020     };
1021     checkEditsIter(u"toTitle(IjssEL IglOo)",
1022             edits.getFineIterator(), edits.getFineIterator(),
1023             titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1024             TRUE, errorCode);
1025 #endif
1026
1027     edits.reset();
1028     length = CaseMap::fold(UCASEMAP_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1029                            u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1030     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1031     static const EditChange foldExpectedChanges[] = {
1032             { TRUE, 1, 1 },
1033             { TRUE, 1, 2 },
1034             { FALSE, 3, 3 },
1035             { TRUE, 1, 1 },
1036             { FALSE, 2, 2 }
1037     };
1038     checkEditsIter(u"foldCase(IßtanBul)",
1039             edits.getFineIterator(), edits.getFineIterator(),
1040             foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1041             TRUE, errorCode);
1042 }
1043
1044 void StringCaseTest::TestCaseMapUTF8WithEdits() {
1045     IcuTestErrorCode errorCode(*this, "TestEdits");
1046     char dest[50];
1047     Edits edits;
1048
1049     int32_t length = CaseMap::utf8ToLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT,
1050                                           u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1051     assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
1052                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1053     static const EditChange lowerExpectedChanges[] = {
1054             { TRUE, 1, 2 },
1055             { FALSE, 4, 4 },
1056             { TRUE, 1, 1 },
1057             { FALSE, 2, 2 }
1058     };
1059     checkEditsIter(u"toLower(IstanBul)",
1060             edits.getFineIterator(), edits.getFineIterator(),
1061             lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1062             TRUE, errorCode);
1063
1064     edits.reset();
1065     length = CaseMap::utf8ToUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT,
1066                                   u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1067     assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1068                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1069     static const EditChange upperExpectedChanges[] = {
1070             { FALSE, 2, 2 },
1071             { TRUE, 2, 2 },
1072             { TRUE, 2, 2 },
1073             { TRUE, 2, 2 },
1074             { TRUE, 2, 2 },
1075             { TRUE, 2, 2 }
1076     };
1077     checkEditsIter(u"toUpper(Πατάτα)",
1078             edits.getFineIterator(), edits.getFineIterator(),
1079             upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1080             TRUE, errorCode);
1081
1082     edits.reset();
1083 #if !UCONFIG_NO_BREAK_ITERATION
1084     length = CaseMap::utf8ToTitle("nl",
1085                                   UCASEMAP_OMIT_UNCHANGED_TEXT |
1086                                   U_TITLECASE_NO_BREAK_ADJUSTMENT |
1087                                   U_TITLECASE_NO_LOWERCASE,
1088                                   NULL, u8"IjssEL IglOo", 12,
1089                                   dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1090     assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1091                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1092     static const EditChange titleExpectedChanges[] = {
1093             { FALSE, 1, 1 },
1094             { TRUE, 1, 1 },
1095             { FALSE, 10, 10 }
1096     };
1097     checkEditsIter(u"toTitle(IjssEL IglOo)",
1098             edits.getFineIterator(), edits.getFineIterator(),
1099             titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1100             TRUE, errorCode);
1101 #endif
1102
1103     edits.reset();
1104     length = CaseMap::utf8Fold(UCASEMAP_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1105                                u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1106     assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1107                  UnicodeString::fromUTF8(StringPiece(dest, length)));
1108     static const EditChange foldExpectedChanges[] = {
1109             { TRUE, 1, 2 },
1110             { TRUE, 2, 2 },
1111             { FALSE, 3, 3 },
1112             { TRUE, 1, 1 },
1113             { FALSE, 2, 2 }
1114     };
1115     checkEditsIter(u"foldCase(IßtanBul)",
1116             edits.getFineIterator(), edits.getFineIterator(),
1117             foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1118             TRUE, errorCode);
1119 }
1120
1121 void StringCaseTest::TestLongUnicodeString() {
1122     // Code coverage for UnicodeString case mapping code handling
1123     // long strings or many changes in a string.
1124     UnicodeString s(TRUE,
1125         (const UChar *)
1126         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1127         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1128         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1129         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1130         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1131         u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
1132     UnicodeString expected(TRUE,
1133         (const UChar *)
1134         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1135         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1136         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1137         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1138         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1139         u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
1140     s.toUpper(Locale::getRoot());
1141     assertEquals("string length 306", expected, s);
1142 }
1143
1144 void StringCaseTest::TestBug13127() {
1145     // Test case crashed when the bug was present.
1146     const char16_t *s16 = u"日本語";
1147     UnicodeString s(TRUE, s16, -1);
1148     s.toTitle(0, Locale::getEnglish());
1149 }