]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/strcase.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / strcase.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: strcase.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002mar12
16 * created by: Markus W. Scherer
17 *
18 * Test file for string casing C++ API functions.
19 */
20
21 #include "unicode/std_string.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/casemap.h"
24 #include "unicode/edits.h"
25 #include "unicode/uchar.h"
26 #include "unicode/ures.h"
27 #include "unicode/uloc.h"
28 #include "unicode/locid.h"
29 #include "unicode/ubrk.h"
30 #include "unicode/unistr.h"
31 #include "unicode/ucasemap.h"
32 #include "unicode/ustring.h"
33 #include "ucase.h"
34 #include "ustrtest.h"
35 #include "unicode/tstdtmod.h"
36 #include "cmemory.h"
37 #include "testutil.h"
38
39 class StringCaseTest: public IntlTest {
40 public:
41 StringCaseTest();
42 virtual ~StringCaseTest();
43
44 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
45
46 void TestCaseConversion();
47
48 void TestCasingImpl(const UnicodeString &input,
49 const UnicodeString &output,
50 int32_t whichCase,
51 void *iter, const char *localeID, uint32_t options);
52 void TestCasing();
53 void TestTitleOptions();
54 void TestFullCaseFoldingIterator();
55 void TestGreekUpper();
56 void TestLongUpper();
57 void TestMalformedUTF8();
58 void TestBufferOverflow();
59 void TestEdits();
60 void TestCopyMoveEdits();
61 void TestEditsFindFwdBwd();
62 void TestMergeEdits();
63 void TestCaseMapWithEdits();
64 void TestCaseMapUTF8WithEdits();
65 void TestCaseMapToString();
66 void TestCaseMapUTF8ToString();
67 void TestLongUnicodeString();
68 void TestBug13127();
69 void TestInPlaceTitle();
70 void TestCaseMapEditsIteratorDocs();
71 void TestCaseMapGreekExtended();
72
73 private:
74 void assertGreekUpper(const char16_t *s, const char16_t *expected);
75
76 Locale GREEK_LOCALE_;
77 };
78
79 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
80
81 StringCaseTest::~StringCaseTest() {}
82
83 extern IntlTest *createStringCaseTest() {
84 return new StringCaseTest();
85 }
86
87 void
88 StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
89 if(exec) {
90 logln("TestSuite StringCaseTest: ");
91 }
92 TESTCASE_AUTO_BEGIN;
93 TESTCASE_AUTO(TestCaseConversion);
94 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
95 TESTCASE_AUTO(TestCasing);
96 TESTCASE_AUTO(TestTitleOptions);
97 #endif
98 TESTCASE_AUTO(TestFullCaseFoldingIterator);
99 TESTCASE_AUTO(TestGreekUpper);
100 TESTCASE_AUTO(TestLongUpper);
101 TESTCASE_AUTO(TestMalformedUTF8);
102 TESTCASE_AUTO(TestBufferOverflow);
103 TESTCASE_AUTO(TestEdits);
104 TESTCASE_AUTO(TestCopyMoveEdits);
105 TESTCASE_AUTO(TestEditsFindFwdBwd);
106 TESTCASE_AUTO(TestMergeEdits);
107 TESTCASE_AUTO(TestCaseMapWithEdits);
108 TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
109 TESTCASE_AUTO(TestCaseMapToString);
110 TESTCASE_AUTO(TestCaseMapUTF8ToString);
111 TESTCASE_AUTO(TestLongUnicodeString);
112 #if !UCONFIG_NO_BREAK_ITERATION
113 TESTCASE_AUTO(TestBug13127);
114 TESTCASE_AUTO(TestInPlaceTitle);
115 #endif
116 TESTCASE_AUTO(TestCaseMapEditsIteratorDocs);
117 TESTCASE_AUTO(TestCaseMapGreekExtended);
118 TESTCASE_AUTO_END;
119 }
120
121 void
122 StringCaseTest::TestCaseConversion()
123 {
124 static const UChar uppercaseGreek[] =
125 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
126 0x39f, 0x3a3, 0 };
127 // "IESUS CHRISTOS"
128
129 static const UChar lowercaseGreek[] =
130 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
131 0x3bf, 0x3c2, 0 };
132 // "iesus christos"
133
134 static const UChar lowercaseTurkish[] =
135 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
136 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
137
138 static const UChar uppercaseTurkish[] =
139 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
140 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
141
142 UnicodeString expectedResult;
143 UnicodeString test3;
144
145 test3 += (UChar32)0x0130;
146 test3 += "STANBUL, NOT CONSTANTINOPLE!";
147
148 UnicodeString test4(test3);
149 test4.toLower(Locale(""));
150 expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
151 if (test4 != expectedResult)
152 errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
153
154 test4 = test3;
155 test4.toLower(Locale("tr", "TR"));
156 expectedResult = lowercaseTurkish;
157 if (test4 != expectedResult)
158 errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
159
160 test3 = "topkap";
161 test3 += (UChar32)0x0131;
162 test3 += " palace, istanbul";
163 test4 = test3;
164
165 test4.toUpper(Locale(""));
166 expectedResult = "TOPKAPI PALACE, ISTANBUL";
167 if (test4 != expectedResult)
168 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
169
170 test4 = test3;
171 test4.toUpper(Locale("tr", "TR"));
172 expectedResult = uppercaseTurkish;
173 if (test4 != expectedResult)
174 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
175
176 test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
177
178 test3.toUpper(Locale("de", "DE"));
179 expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
180 if (test3 != expectedResult)
181 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
182
183 test4.replace(0, test4.length(), uppercaseGreek);
184
185 test4.toLower(Locale("el", "GR"));
186 expectedResult = lowercaseGreek;
187 if (test4 != expectedResult)
188 errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
189
190 test4.replace(0, test4.length(), lowercaseGreek);
191
192 test4.toUpper();
193 expectedResult = uppercaseGreek;
194 if (test4 != expectedResult)
195 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
196
197 // more string case mapping tests with the new implementation
198 {
199 static const UChar
200
201 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
202 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
203 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
204
205 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
206 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
207 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
208
209 beforeMiniUpper[]= { 0xdf, 0x61 },
210 miniUpper[]= { 0x53, 0x53, 0x41 };
211
212 UnicodeString s;
213
214 /* lowercase with root locale */
215 s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
216 s.toLower("");
217 if( s.length()!=UPRV_LENGTHOF(lowerRoot) ||
218 s!=UnicodeString(FALSE, lowerRoot, s.length())
219 ) {
220 errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\"");
221 }
222
223 /* lowercase with turkish locale */
224 s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower));
225 s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
226 if( s.length()!=UPRV_LENGTHOF(lowerTurkish) ||
227 s!=UnicodeString(FALSE, lowerTurkish, s.length())
228 ) {
229 errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\"");
230 }
231
232 /* uppercase with root locale */
233 s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
234 s.setCharAt(0, beforeUpper[0]).toUpper(Locale(""));
235 if( s.length()!=UPRV_LENGTHOF(upperRoot) ||
236 s!=UnicodeString(FALSE, upperRoot, s.length())
237 ) {
238 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\"");
239 }
240
241 /* uppercase with turkish locale */
242 s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper));
243 s.toUpper(Locale("tr"));
244 if( s.length()!=UPRV_LENGTHOF(upperTurkish) ||
245 s!=UnicodeString(FALSE, upperTurkish, s.length())
246 ) {
247 errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\"");
248 }
249
250 /* uppercase a short string with root locale */
251 s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper));
252 s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
253 if( s.length()!=UPRV_LENGTHOF(miniUpper) ||
254 s!=UnicodeString(FALSE, miniUpper, s.length())
255 ) {
256 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\"");
257 }
258 }
259
260 // test some supplementary characters (>= Unicode 3.1)
261 {
262 UnicodeString t;
263
264 UnicodeString
265 deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
266 deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
267 deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
268 (t=deseretInput).toLower();
269 if(t!=deseretLower) {
270 errln("error lowercasing Deseret (plane 1) characters");
271 }
272 (t=deseretInput).toUpper();
273 if(t!=deseretUpper) {
274 errln("error uppercasing Deseret (plane 1) characters");
275 }
276 }
277
278 // test some more cases that looked like problems
279 {
280 UnicodeString t;
281
282 UnicodeString
283 ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
284 ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
285 ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
286 (t=ljInput).toLower("en");
287 if(t!=ljLower) {
288 errln("error lowercasing LJ characters");
289 }
290 (t=ljInput).toUpper("en");
291 if(t!=ljUpper) {
292 errln("error uppercasing LJ characters");
293 }
294 }
295
296 #if !UCONFIG_NO_NORMALIZATION
297 // some context-sensitive casing depends on normalization data being present
298
299 // Unicode 3.1.1 SpecialCasing tests
300 {
301 UnicodeString t;
302
303 // sigmas preceded and/or followed by cased letters
304 UnicodeString
305 sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
306 sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
307 sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
308
309 (t=sigmas).toLower();
310 if(t!=sigmasLower) {
311 errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
312 }
313
314 (t=sigmas).toUpper(Locale(""));
315 if(t!=sigmasUpper) {
316 errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
317 }
318
319 // turkish & azerbaijani dotless i & dotted I
320 // remove dot above if there was a capital I before and there are no more accents above
321 UnicodeString
322 dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
323 dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
324 dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
325
326 (t=dots).toLower("tr");
327 if(t!=dotsTurkish) {
328 errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
329 }
330
331 (t=dots).toLower("de");
332 if(t!=dotsDefault) {
333 errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
334 }
335 }
336
337 // more Unicode 3.1.1 tests
338 {
339 UnicodeString t;
340
341 // lithuanian dot above in uppercasing
342 UnicodeString
343 dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
344 dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
345 dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
346
347 (t=dots).toUpper("lt");
348 if(t!=dotsLithuanian) {
349 errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
350 }
351
352 (t=dots).toUpper("de");
353 if(t!=dotsDefault) {
354 errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
355 }
356
357 // lithuanian adds dot above to i in lowercasing if there are more above accents
358 UnicodeString
359 i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
360 iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
361 iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
362
363 (t=i).toLower("lt");
364 if(t!=iLithuanian) {
365 errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
366 }
367
368 (t=i).toLower("de");
369 if(t!=iDefault) {
370 errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
371 }
372 }
373
374 #endif
375
376 // test case folding
377 {
378 UnicodeString
379 s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
380 f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
381 g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
382 t;
383
384 (t=s).foldCase();
385 if(f!=t) {
386 errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
387 }
388
389 // alternate handling for dotted I/dotless i (U+0130, U+0131)
390 (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
391 if(g!=t) {
392 errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
393 }
394 }
395 }
396
397 // data-driven case mapping tests ------------------------------------------ ***
398
399 enum {
400 TEST_LOWER,
401 TEST_UPPER,
402 TEST_TITLE,
403 TEST_FOLD,
404 TEST_COUNT
405 };
406
407 // names of TestData children in casing.txt
408 static const char *const dataNames[TEST_COUNT+1]={
409 "lowercasing",
410 "uppercasing",
411 "titlecasing",
412 "casefolding",
413 ""
414 };
415
416 void
417 StringCaseTest::TestCasingImpl(const UnicodeString &input,
418 const UnicodeString &output,
419 int32_t whichCase,
420 void *iter, const char *localeID, uint32_t options) {
421 // UnicodeString
422 UnicodeString result;
423 const char *name;
424 Locale locale(localeID);
425
426 result=input;
427 switch(whichCase) {
428 case TEST_LOWER:
429 name="toLower";
430 result.toLower(locale);
431 break;
432 case TEST_UPPER:
433 name="toUpper";
434 result.toUpper(locale);
435 break;
436 #if !UCONFIG_NO_BREAK_ITERATION
437 case TEST_TITLE:
438 name="toTitle";
439 result.toTitle((BreakIterator *)iter, locale, options);
440 break;
441 #endif
442 case TEST_FOLD:
443 name="foldCase";
444 result.foldCase(options);
445 break;
446 default:
447 name="";
448 break; // won't happen
449 }
450 if(result!=output) {
451 dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
452 }
453 #if !UCONFIG_NO_BREAK_ITERATION
454 if(whichCase==TEST_TITLE && options==0) {
455 result=input;
456 result.toTitle((BreakIterator *)iter, locale);
457 if(result!=output) {
458 dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
459 }
460 }
461 #endif
462
463 // UTF-8
464 char utf8In[100], utf8Out[100];
465 int32_t utf8InLength, utf8OutLength, resultLength;
466 UChar *buffer;
467
468 IcuTestErrorCode errorCode(*this, "TestCasingImpl");
469 LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
470 #if !UCONFIG_NO_BREAK_ITERATION
471 if(iter!=NULL) {
472 // Clone the break iterator so that the UCaseMap can safely adopt it.
473 UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
474 ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
475 }
476 #endif
477
478 u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
479 switch(whichCase) {
480 case TEST_LOWER:
481 name="ucasemap_utf8ToLower";
482 utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
483 utf8Out, (int32_t)sizeof(utf8Out),
484 utf8In, utf8InLength, errorCode);
485 break;
486 case TEST_UPPER:
487 name="ucasemap_utf8ToUpper";
488 utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
489 utf8Out, (int32_t)sizeof(utf8Out),
490 utf8In, utf8InLength, errorCode);
491 break;
492 #if !UCONFIG_NO_BREAK_ITERATION
493 case TEST_TITLE:
494 name="ucasemap_utf8ToTitle";
495 utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
496 utf8Out, (int32_t)sizeof(utf8Out),
497 utf8In, utf8InLength, errorCode);
498 break;
499 #endif
500 case TEST_FOLD:
501 name="ucasemap_utf8FoldCase";
502 utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
503 utf8Out, (int32_t)sizeof(utf8Out),
504 utf8In, utf8InLength, errorCode);
505 break;
506 default:
507 name="";
508 utf8OutLength=0;
509 break; // won't happen
510 }
511 buffer=result.getBuffer(utf8OutLength);
512 u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
513 result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);
514
515 if(errorCode.isFailure()) {
516 errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
517 errorCode.reset();
518 } else if(result!=output) {
519 errln("error: %s() got a wrong result for a test case from casing.res", name);
520 errln("expected \"" + output + "\" got \"" + result + "\"" );
521 }
522 }
523
524 void
525 StringCaseTest::TestCasing() {
526 UErrorCode status = U_ZERO_ERROR;
527 #if !UCONFIG_NO_BREAK_ITERATION
528 LocalUBreakIteratorPointer iter;
529 #endif
530 char cLocaleID[100];
531 UnicodeString locale, input, output, optionsString, result;
532 uint32_t options;
533 int32_t whichCase, type;
534 LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status));
535 if(U_SUCCESS(status)) {
536 for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) {
537 #if UCONFIG_NO_BREAK_ITERATION
538 if(whichCase==TEST_TITLE) {
539 continue;
540 }
541 #endif
542 LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status));
543 if(U_FAILURE(status)) {
544 errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status));
545 break;
546 }
547 const DataMap *myCase = NULL;
548 while(casingTest->nextCase(myCase, status)) {
549 input = myCase->getString("Input", status);
550 output = myCase->getString("Output", status);
551
552 if(whichCase!=TEST_FOLD) {
553 locale = myCase->getString("Locale", status);
554 }
555 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
556
557 #if !UCONFIG_NO_BREAK_ITERATION
558 if(whichCase==TEST_TITLE) {
559 type = myCase->getInt("Type", status);
560 if(type>=0) {
561 iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status));
562 } else if(type==-2) {
563 // Open a trivial break iterator that only delivers { 0, length }
564 // or even just { 0 } as boundaries.
565 static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
566 UParseError parseError;
567 iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status));
568 }
569 }
570 #endif
571 options = 0;
572 if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) {
573 optionsString = myCase->getString("Options", status);
574 if(optionsString.indexOf((UChar)0x54)>=0) { // T
575 options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I;
576 }
577 if(optionsString.indexOf((UChar)0x4c)>=0) { // L
578 options|=U_TITLECASE_NO_LOWERCASE;
579 }
580 if(optionsString.indexOf((UChar)0x41)>=0) { // A
581 options|=U_TITLECASE_NO_BREAK_ADJUSTMENT;
582 }
583 }
584
585 if(U_FAILURE(status)) {
586 dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status));
587 status = U_ZERO_ERROR;
588 } else {
589 #if UCONFIG_NO_BREAK_ITERATION
590 LocalPointer<UMemory> iter;
591 #endif
592 TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options);
593 }
594
595 #if !UCONFIG_NO_BREAK_ITERATION
596 iter.adoptInstead(NULL);
597 #endif
598 }
599 }
600 }
601
602 #if !UCONFIG_NO_BREAK_ITERATION
603 // more tests for API coverage
604 status=U_ZERO_ERROR;
605 input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
606 (result=input).toTitle(NULL);
607 if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
608 dataerrln("UnicodeString::toTitle(NULL) failed.");
609 }
610 #endif
611 }
612
613 void
614 StringCaseTest::TestTitleOptions() {
615 // New options in ICU 60.
616 TestCasingImpl(u"ʻcAt! ʻeTc.", u"ʻCat! ʻetc.", TEST_TITLE,
617 nullptr, "", U_TITLECASE_WHOLE_STRING);
618 TestCasingImpl(u"a ʻCaT. A ʻdOg! ʻeTc.", u"A ʻCaT. A ʻdOg! ʻETc.", TEST_TITLE,
619 nullptr, "", U_TITLECASE_SENTENCES|U_TITLECASE_NO_LOWERCASE);
620 TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE,
621 nullptr, "", U_TITLECASE_WHOLE_STRING);
622 TestCasingImpl(u"«丰(aBc)»", u"«丰(abc)»", TEST_TITLE,
623 nullptr, "", U_TITLECASE_WHOLE_STRING);
624 TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE,
625 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
626 TestCasingImpl(u"«丰(aBc)»", u"«丰(Abc)»", TEST_TITLE,
627 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED);
628 TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE,
629 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_LOWERCASE);
630 TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE,
631 nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_BREAK_ADJUSTMENT);
632 TestCasingImpl(u"«ijs»", u"«IJs»", TEST_TITLE,
633 nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING);
634 TestCasingImpl(u"«ijs»", u"«İjs»", TEST_TITLE,
635 nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING);
636
637 #if !UCONFIG_NO_BREAK_ITERATION
638 // Test conflicting settings.
639 // If & when we add more options, then the ORed combinations may become
640 // indistinguishable from valid values.
641 IcuTestErrorCode errorCode(*this, "TestTitleOptions");
642 CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT|U_TITLECASE_ADJUST_TO_CASED, nullptr,
643 u"", 0, nullptr, 0, nullptr, errorCode);
644 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
645 errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument",
646 errorCode.errorName());
647 }
648 errorCode.reset();
649 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING|U_TITLECASE_SENTENCES, nullptr,
650 u"", 0, nullptr, 0, nullptr, errorCode);
651 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
652 errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument",
653 errorCode.errorName());
654 }
655 errorCode.reset();
656 LocalPointer<BreakIterator> iter(
657 BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode));
658 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(),
659 u"", 0, nullptr, 0, nullptr, errorCode);
660 if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) {
661 errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument",
662 errorCode.errorName());
663 }
664 errorCode.reset();
665 #endif
666 }
667
668 void
669 StringCaseTest::TestFullCaseFoldingIterator() {
670 UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi");
671 UnicodeString ss=UNICODE_STRING_SIMPLE("ss");
672 FullCaseFoldingIterator iter;
673 int32_t count=0;
674 int32_t countSpecific=0;
675 UChar32 c;
676 UnicodeString full;
677 while((c=iter.next(full))>=0) {
678 ++count;
679 // Check that the full Case_Folding has more than 1 code point.
680 if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) {
681 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c);
682 continue;
683 }
684 // Check that full == Case_Folding(c).
685 UnicodeString cf(c);
686 cf.foldCase();
687 if(full!=cf) {
688 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c);
689 continue;
690 }
691 // Spot-check a couple of specific cases.
692 if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) {
693 ++countSpecific;
694 }
695 }
696 if(countSpecific!=3) {
697 errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
698 }
699 if(count<70) {
700 errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count);
701 }
702 }
703
704 void
705 StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) {
706 UnicodeString s16(s);
707 UnicodeString expected16(expected);
708 UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
709 UnicodeString result16(s16);
710 result16.toUpper(GREEK_LOCALE_);
711 assertEquals(msg, expected16, result16);
712
713 msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap=";
714 int32_t length = expected16.length();
715 int32_t capacities[] = {
716 // Keep in sync with the UTF-8 capacities near the bottom of this function.
717 0, length / 2, length - 1, length, length + 1
718 };
719 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
720 int32_t cap = capacities[i];
721 UChar *dest16 = result16.getBuffer(expected16.length() + 1);
722 u_memset(dest16, 0x55AA, result16.getCapacity());
723 UErrorCode errorCode = U_ZERO_ERROR;
724 length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode);
725 assertEquals(msg + cap, expected16.length(), length);
726 UErrorCode expectedErrorCode;
727 if (cap < expected16.length()) {
728 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
729 } else if (cap == expected16.length()) {
730 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
731 } else {
732 expectedErrorCode = U_ZERO_ERROR;
733 assertEquals(msg + cap + " NUL", 0, dest16[length]);
734 }
735 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
736 result16.releaseBuffer(length);
737 if (cap >= expected16.length()) {
738 assertEquals(msg + cap, expected16, result16);
739 }
740 }
741
742 UErrorCode errorCode = U_ZERO_ERROR;
743 LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode));
744 assertSuccess("ucasemap_open", errorCode);
745 std::string s8;
746 s16.toUTF8String(s8);
747 msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")";
748 char dest8[1000];
749 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8),
750 s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
751 assertSuccess("ucasemap_utf8ToUpper", errorCode);
752 StringPiece result8(dest8, length);
753 UnicodeString result16From8 = UnicodeString::fromUTF8(result8);
754 assertEquals(msg, expected16, result16From8);
755
756 msg += " cap=";
757 capacities[1] = length / 2;
758 capacities[2] = length - 1;
759 capacities[3] = length;
760 capacities[4] = length + 1;
761 char dest8b[1000];
762 int32_t expected8Length = length; // Assuming the previous call worked.
763 for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) {
764 int32_t cap = capacities[i];
765 memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b));
766 UErrorCode errorCode = U_ZERO_ERROR;
767 length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap,
768 s8.data(), static_cast<int32_t>(s8.length()), &errorCode);
769 assertEquals(msg + cap, expected8Length, length);
770 UErrorCode expectedErrorCode;
771 if (cap < expected8Length) {
772 expectedErrorCode = U_BUFFER_OVERFLOW_ERROR;
773 } else if (cap == expected8Length) {
774 expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING;
775 } else {
776 expectedErrorCode = U_ZERO_ERROR;
777 // Casts to int32_t to avoid matching UBool.
778 assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]);
779 }
780 assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode);
781 if (cap >= expected8Length) {
782 assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length));
783 }
784 }
785 }
786
787 void
788 StringCaseTest::TestGreekUpper() {
789 // http://bugs.icu-project.org/trac/ticket/5456
790 assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
791 // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
792 // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
793 assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ");
794 assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
795 assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
796 assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
797 assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ");
798 assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ");
799 assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
800 // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
801 assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
802 assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
803 // http://unicode.org/udhr/d/udhr_ell_polytonic.html
804 assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
805 assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
806 // From Google bug report
807 assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
808 // http://crbug.com/234797
809 assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
810 assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
811 assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
812 // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
813 assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ");
814 assertGreekUpper(u"ή.", u"Ή.");
815 }
816
817 void
818 StringCaseTest::TestLongUpper() {
819 if (quick) {
820 logln("not exhaustive mode: skipping this test");
821 return;
822 }
823 // Ticket #12663, crash with an extremely long string where
824 // U+0390 maps to 0399 0308 0301 so that the result is three times as long
825 // and overflows an int32_t.
826 int32_t length = 0x40000004; // more than 1G UChars
827 UnicodeString s(length, (UChar32)0x390, length);
828 UnicodeString result;
829 UChar *dest = result.getBuffer(length + 1);
830 if (s.isBogus() || dest == NULL) {
831 logln("Out of memory, unable to run this test on this machine.");
832 return;
833 }
834 IcuTestErrorCode errorCode(*this, "TestLongUpper");
835 int32_t destLength = u_strToUpper(dest, result.getCapacity(),
836 s.getBuffer(), s.length(), "", errorCode);
837 result.releaseBuffer(destLength);
838 if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) {
839 errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
840 errorCode.errorName(), (long)destLength);
841 }
842 }
843
844 void StringCaseTest::TestMalformedUTF8() {
845 // ticket #12639
846 IcuTestErrorCode errorCode(*this, "TestMalformedUTF8");
847 LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode));
848 if (errorCode.isFailure()) {
849 errln("ucasemap_open(English) failed - %s", errorCode.errorName());
850 return;
851 }
852 char src[1] = { (char)0x85 }; // malformed UTF-8
853 char dest[3] = { 0, 0, 0 };
854 int32_t destLength;
855 #if !UCONFIG_NO_BREAK_ITERATION
856 destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode);
857 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
858 errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
859 errorCode.errorName(), (int)destLength, dest[0]);
860 }
861 #endif
862
863 errorCode.reset();
864 dest[0] = 0;
865 destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode);
866 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
867 errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
868 errorCode.errorName(), (int)destLength, dest[0]);
869 }
870
871 errorCode.reset();
872 dest[0] = 0;
873 destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode);
874 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
875 errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
876 errorCode.errorName(), (int)destLength, dest[0]);
877 }
878
879 errorCode.reset();
880 dest[0] = 0;
881 destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode);
882 if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) {
883 errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
884 errorCode.errorName(), (int)destLength, dest[0]);
885 }
886 }
887
888 void StringCaseTest::TestBufferOverflow() {
889 // Ticket #12849, incorrect result from Title Case preflight operation,
890 // when buffer overflow error is expected.
891 IcuTestErrorCode errorCode(*this, "TestBufferOverflow");
892 LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode));
893 if (errorCode.isFailure()) {
894 errln("ucasemap_open(English) failed - %s", errorCode.errorName());
895 return;
896 }
897
898 UnicodeString data("hello world");
899 int32_t result;
900 #if !UCONFIG_NO_BREAK_ITERATION
901 result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode);
902 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) {
903 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
904 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
905 __FILE__, __LINE__, data.length(), errorCode.errorName(), result);
906 }
907 #endif
908 errorCode.reset();
909
910 std::string data_utf8;
911 data.toUTF8String(data_utf8);
912 #if !UCONFIG_NO_BREAK_ITERATION
913 result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), static_cast<int32_t>(data_utf8.length()), errorCode);
914 if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) {
915 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
916 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
917 __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result);
918 }
919 #endif
920 errorCode.reset();
921 }
922
923 void StringCaseTest::TestEdits() {
924 IcuTestErrorCode errorCode(*this, "TestEdits");
925 Edits edits;
926 assertFalse("new Edits hasChanges", edits.hasChanges());
927 assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges());
928 assertEquals("new Edits", 0, edits.lengthDelta());
929 edits.addUnchanged(1); // multiple unchanged ranges are combined
930 edits.addUnchanged(10000); // too long, and they are split
931 edits.addReplace(0, 0);
932 edits.addUnchanged(2);
933 assertFalse("unchanged 10003 hasChanges", edits.hasChanges());
934 assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges());
935 assertEquals("unchanged 10003", 0, edits.lengthDelta());
936 edits.addReplace(2, 1); // multiple short equal-lengths edits are compressed
937 edits.addUnchanged(0);
938 edits.addReplace(2, 1);
939 edits.addReplace(2, 1);
940 edits.addReplace(0, 10);
941 edits.addReplace(100, 0);
942 edits.addReplace(3000, 4000); // variable-length encoding
943 edits.addReplace(100000, 100000);
944 assertTrue("some edits hasChanges", edits.hasChanges());
945 assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges());
946 assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta());
947 UErrorCode outErrorCode = U_ZERO_ERROR;
948 assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode));
949
950 static const EditChange coarseExpectedChanges[] = {
951 { FALSE, 10003, 10003 },
952 { TRUE, 103106, 104013 }
953 };
954 TestUtility::checkEditsIter(*this, u"coarse",
955 edits.getCoarseIterator(), edits.getCoarseIterator(),
956 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
957 TestUtility::checkEditsIter(*this, u"coarse changes",
958 edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
959 coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
960
961 static const EditChange fineExpectedChanges[] = {
962 { FALSE, 10003, 10003 },
963 { TRUE, 2, 1 },
964 { TRUE, 2, 1 },
965 { TRUE, 2, 1 },
966 { TRUE, 0, 10 },
967 { TRUE, 100, 0 },
968 { TRUE, 3000, 4000 },
969 { TRUE, 100000, 100000 }
970 };
971 TestUtility::checkEditsIter(*this, u"fine",
972 edits.getFineIterator(), edits.getFineIterator(),
973 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
974 TestUtility::checkEditsIter(*this, u"fine changes",
975 edits.getFineChangesIterator(), edits.getFineChangesIterator(),
976 fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
977
978 edits.reset();
979 assertFalse("reset hasChanges", edits.hasChanges());
980 assertEquals("reset numberOfChanges", 0, edits.numberOfChanges());
981 assertEquals("reset", 0, edits.lengthDelta());
982 Edits::Iterator ei = edits.getCoarseChangesIterator();
983 assertFalse("reset then iterator", ei.next(errorCode));
984 }
985
986 void StringCaseTest::TestCopyMoveEdits() {
987 IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits");
988 // Exceed the stack array capacity.
989 Edits a;
990 for (int32_t i = 0; i < 250; ++i) {
991 a.addReplace(i % 10, (i % 10) + 1);
992 }
993 assertEquals("a: many edits, length delta", 250, a.lengthDelta());
994
995 // copy
996 Edits b(a);
997 assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta());
998 assertEquals("a remains: many edits, length delta", 250, a.lengthDelta());
999 TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode);
1000
1001 // assign
1002 Edits c;
1003 c.addUnchanged(99);
1004 c.addReplace(88, 77);
1005 c = b;
1006 assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta());
1007 assertEquals("b remains: many edits, length delta", 250, b.lengthDelta());
1008 TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode);
1009
1010 // std::move trouble on these platforms.
1011 // See https://ssl.icu-project.org/trac/ticket/13393
1012 #if !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390)
1013 // move constructor empties object with heap array
1014 Edits d(std::move(a));
1015 assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta());
1016 assertFalse("a moved away: no more hasChanges", a.hasChanges());
1017 TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode);
1018 Edits empty;
1019 TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode);
1020
1021 // move assignment empties object with heap array
1022 Edits e;
1023 e.addReplace(0, 1000);
1024 e = std::move(b);
1025 assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta());
1026 assertFalse("b moved away: no more hasChanges", b.hasChanges());
1027 TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode);
1028 TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode);
1029
1030 // Edits::Iterator default constructor.
1031 Edits::Iterator iter;
1032 assertFalse("Edits::Iterator().next()", iter.next(errorCode));
1033 assertSuccess("Edits::Iterator().next()", errorCode);
1034 iter = e.getFineChangesIterator();
1035 assertTrue("iter.next()", iter.next(errorCode));
1036 assertSuccess("iter.next()", errorCode);
1037 assertTrue("iter.hasChange()", iter.hasChange());
1038 assertEquals("iter.newLength()", 1, iter.newLength());
1039 #endif
1040 }
1041
1042 void StringCaseTest::TestEditsFindFwdBwd() {
1043 IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd");
1044 // Some users need index mappings to be efficient when they are out of order.
1045 // The most interesting failure case for this test is it taking a very long time.
1046 Edits e;
1047 constexpr int32_t N = 200000;
1048 for (int32_t i = 0; i < N; ++i) {
1049 e.addUnchanged(1);
1050 e.addReplace(3, 1);
1051 }
1052 Edits::Iterator iter = e.getFineIterator();
1053 for (int32_t i = 0; i <= N; i += 2) {
1054 assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1055 assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1056 }
1057 for (int32_t i = N; i >= 0; i -= 2) {
1058 assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode));
1059 assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode));
1060 }
1061 }
1062
1063 void StringCaseTest::TestMergeEdits() {
1064 // For debugging, set -v to see matching edits up to a failure.
1065 IcuTestErrorCode errorCode(*this, "TestMergeEdits");
1066 Edits ab, bc, ac, expected_ac;
1067
1068 // Simple: Two parallel non-changes.
1069 ab.addUnchanged(2);
1070 bc.addUnchanged(2);
1071 expected_ac.addUnchanged(2);
1072
1073 // Simple: Two aligned changes.
1074 ab.addReplace(3, 2);
1075 bc.addReplace(2, 1);
1076 expected_ac.addReplace(3, 1);
1077
1078 // Unequal non-changes.
1079 ab.addUnchanged(5);
1080 bc.addUnchanged(3);
1081 expected_ac.addUnchanged(3);
1082 // ab ahead by 2
1083
1084 // Overlapping changes accumulate until they share a boundary.
1085 ab.addReplace(4, 3);
1086 bc.addReplace(3, 2);
1087 ab.addReplace(4, 3);
1088 bc.addReplace(3, 2);
1089 ab.addReplace(4, 3);
1090 bc.addReplace(3, 2);
1091 bc.addUnchanged(4);
1092 expected_ac.addReplace(14, 8);
1093 // bc ahead by 2
1094
1095 // Balance out intermediate-string lengths.
1096 ab.addUnchanged(2);
1097 expected_ac.addUnchanged(2);
1098
1099 // Insert something and delete it: Should disappear.
1100 ab.addReplace(0, 5);
1101 ab.addReplace(0, 2);
1102 bc.addReplace(7, 0);
1103
1104 // Parallel change to make a new boundary.
1105 ab.addReplace(1, 2);
1106 bc.addReplace(2, 3);
1107 expected_ac.addReplace(1, 3);
1108
1109 // Multiple ab deletions should remain separate at the boundary.
1110 ab.addReplace(1, 0);
1111 ab.addReplace(2, 0);
1112 ab.addReplace(3, 0);
1113 expected_ac.addReplace(1, 0);
1114 expected_ac.addReplace(2, 0);
1115 expected_ac.addReplace(3, 0);
1116
1117 // Unequal non-changes can be split for another boundary.
1118 ab.addUnchanged(2);
1119 bc.addUnchanged(1);
1120 expected_ac.addUnchanged(1);
1121 // ab ahead by 1
1122
1123 // Multiple bc insertions should create a boundary and remain separate.
1124 bc.addReplace(0, 4);
1125 bc.addReplace(0, 5);
1126 bc.addReplace(0, 6);
1127 expected_ac.addReplace(0, 4);
1128 expected_ac.addReplace(0, 5);
1129 expected_ac.addReplace(0, 6);
1130 // ab ahead by 1
1131
1132 // Multiple ab deletions in the middle of a bc change are merged.
1133 bc.addReplace(2, 2);
1134 // bc ahead by 1
1135 ab.addReplace(1, 0);
1136 ab.addReplace(2, 0);
1137 ab.addReplace(3, 0);
1138 ab.addReplace(4, 1);
1139 expected_ac.addReplace(11, 2);
1140
1141 // Multiple bc insertions in the middle of an ab change are merged.
1142 ab.addReplace(5, 6);
1143 bc.addReplace(3, 3);
1144 // ab ahead by 3
1145 bc.addReplace(0, 4);
1146 bc.addReplace(0, 5);
1147 bc.addReplace(0, 6);
1148 bc.addReplace(3, 7);
1149 expected_ac.addReplace(5, 25);
1150
1151 // Delete around a deletion.
1152 ab.addReplace(4, 4);
1153 ab.addReplace(3, 0);
1154 ab.addUnchanged(2);
1155 bc.addReplace(2, 2);
1156 bc.addReplace(4, 0);
1157 expected_ac.addReplace(9, 2);
1158
1159 // Insert into an insertion.
1160 ab.addReplace(0, 2);
1161 bc.addReplace(1, 1);
1162 bc.addReplace(0, 8);
1163 bc.addUnchanged(4);
1164 expected_ac.addReplace(0, 10);
1165 // bc ahead by 3
1166
1167 // Balance out intermediate-string lengths.
1168 ab.addUnchanged(3);
1169 expected_ac.addUnchanged(3);
1170
1171 // Deletions meet insertions.
1172 // Output order is arbitrary in principle, but we expect insertions first
1173 // and want to keep it that way.
1174 ab.addReplace(2, 0);
1175 ab.addReplace(4, 0);
1176 ab.addReplace(6, 0);
1177 bc.addReplace(0, 1);
1178 bc.addReplace(0, 3);
1179 bc.addReplace(0, 5);
1180 expected_ac.addReplace(0, 1);
1181 expected_ac.addReplace(0, 3);
1182 expected_ac.addReplace(0, 5);
1183 expected_ac.addReplace(2, 0);
1184 expected_ac.addReplace(4, 0);
1185 expected_ac.addReplace(6, 0);
1186
1187 // End with a non-change, so that further edits are never reordered.
1188 ab.addUnchanged(1);
1189 bc.addUnchanged(1);
1190 expected_ac.addUnchanged(1);
1191
1192 ac.mergeAndAppend(ab, bc, errorCode);
1193 assertSuccess("ab+bc", errorCode);
1194 if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) {
1195 return;
1196 }
1197
1198 // Append more Edits.
1199 Edits ab2, bc2;
1200 ab2.addUnchanged(5);
1201 bc2.addReplace(1, 2);
1202 bc2.addUnchanged(4);
1203 expected_ac.addReplace(1, 2);
1204 expected_ac.addUnchanged(4);
1205 ac.mergeAndAppend(ab2, bc2, errorCode);
1206 assertSuccess("ab2+bc2", errorCode);
1207 if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) {
1208 return;
1209 }
1210
1211 // Append empty edits.
1212 Edits empty;
1213 ac.mergeAndAppend(empty, empty, errorCode);
1214 assertSuccess("empty+empty", errorCode);
1215 if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) {
1216 return;
1217 }
1218
1219 // Error: Append more edits with mismatched intermediate-string lengths.
1220 Edits mismatch;
1221 mismatch.addReplace(1, 1);
1222 ac.mergeAndAppend(ab2, mismatch, errorCode);
1223 assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1224 errorCode.reset();
1225 ac.mergeAndAppend(mismatch, bc2, errorCode);
1226 assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get());
1227 errorCode.reset();
1228 }
1229
1230 void StringCaseTest::TestCaseMapWithEdits() {
1231 IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits");
1232 UChar dest[20];
1233 Edits edits;
1234
1235 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1236 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1237 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1238 static const EditChange lowerExpectedChanges[] = {
1239 { TRUE, 1, 1 },
1240 { FALSE, 4, 4 },
1241 { TRUE, 1, 1 },
1242 { FALSE, 2, 2 }
1243 };
1244 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1245 edits.getFineIterator(), edits.getFineIterator(),
1246 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1247 TRUE, errorCode);
1248
1249 edits.reset();
1250 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1251 u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1252 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1253 static const EditChange upperExpectedChanges[] = {
1254 { FALSE, 1, 1 },
1255 { TRUE, 1, 1 },
1256 { TRUE, 1, 1 },
1257 { TRUE, 1, 1 },
1258 { TRUE, 1, 1 },
1259 { TRUE, 1, 1 }
1260 };
1261 TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1262 edits.getFineIterator(), edits.getFineIterator(),
1263 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1264 TRUE, errorCode);
1265
1266 edits.reset();
1267
1268 #if !UCONFIG_NO_BREAK_ITERATION
1269 length = CaseMap::toTitle("nl",
1270 U_OMIT_UNCHANGED_TEXT |
1271 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1272 U_TITLECASE_NO_LOWERCASE,
1273 nullptr, u"IjssEL IglOo", 12,
1274 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1275 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1276 static const EditChange titleExpectedChanges[] = {
1277 { FALSE, 1, 1 },
1278 { TRUE, 1, 1 },
1279 { FALSE, 10, 10 }
1280 };
1281 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1282 edits.getFineIterator(), edits.getFineIterator(),
1283 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1284 TRUE, errorCode);
1285 #endif
1286
1287 // No explicit nor automatic edits.reset(). Edits should be appended.
1288 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1289 u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1290 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1291 static const EditChange foldExpectedChanges[] = {
1292 #if !UCONFIG_NO_BREAK_ITERATION
1293 // From titlecasing.
1294 { FALSE, 1, 1 },
1295 { TRUE, 1, 1 },
1296 { FALSE, 10, 10 },
1297 #endif
1298 // From case folding.
1299 { TRUE, 1, 1 },
1300 { TRUE, 1, 2 },
1301 { FALSE, 3, 3 },
1302 { TRUE, 1, 1 },
1303 { FALSE, 2, 2 }
1304 };
1305 TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)",
1306 edits.getFineIterator(), edits.getFineIterator(),
1307 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1308 TRUE, errorCode);
1309 }
1310
1311 void StringCaseTest::TestCaseMapUTF8WithEdits() {
1312 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits");
1313 char dest[50];
1314 Edits edits;
1315
1316 int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT,
1317 u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1318 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
1319 UnicodeString::fromUTF8(StringPiece(dest, length)));
1320 static const EditChange lowerExpectedChanges[] = {
1321 { TRUE, 1, 2 },
1322 { FALSE, 4, 4 },
1323 { TRUE, 1, 1 },
1324 { FALSE, 2, 2 }
1325 };
1326 TestUtility::checkEditsIter(*this, u"toLower(IstanBul)",
1327 edits.getFineIterator(), edits.getFineIterator(),
1328 lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
1329 TRUE, errorCode);
1330
1331 edits.reset();
1332 length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT,
1333 u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1334 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1335 UnicodeString::fromUTF8(StringPiece(dest, length)));
1336 static const EditChange upperExpectedChanges[] = {
1337 { FALSE, 2, 2 },
1338 { TRUE, 2, 2 },
1339 { TRUE, 2, 2 },
1340 { TRUE, 2, 2 },
1341 { TRUE, 2, 2 },
1342 { TRUE, 2, 2 }
1343 };
1344 TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)",
1345 edits.getFineIterator(), edits.getFineIterator(),
1346 upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
1347 TRUE, errorCode);
1348
1349 edits.reset();
1350 #if !UCONFIG_NO_BREAK_ITERATION
1351 length = CaseMap::utf8ToTitle("nl",
1352 U_OMIT_UNCHANGED_TEXT |
1353 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1354 U_TITLECASE_NO_LOWERCASE,
1355 nullptr, u8"IjssEL IglOo", 12,
1356 dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1357 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1358 UnicodeString::fromUTF8(StringPiece(dest, length)));
1359 static const EditChange titleExpectedChanges[] = {
1360 { FALSE, 1, 1 },
1361 { TRUE, 1, 1 },
1362 { FALSE, 10, 10 }
1363 };
1364 TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)",
1365 edits.getFineIterator(), edits.getFineIterator(),
1366 titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
1367 TRUE, errorCode);
1368 #endif
1369
1370 // No explicit nor automatic edits.reset(). Edits should be appended.
1371 length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET |
1372 U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1373 u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
1374 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1375 UnicodeString::fromUTF8(StringPiece(dest, length)));
1376 static const EditChange foldExpectedChanges[] = {
1377 #if !UCONFIG_NO_BREAK_ITERATION
1378 // From titlecasing.
1379 { FALSE, 1, 1 },
1380 { TRUE, 1, 1 },
1381 { FALSE, 10, 10 },
1382 #endif
1383 // From case folding.
1384 { TRUE, 1, 2 },
1385 { TRUE, 2, 2 },
1386 { FALSE, 3, 3 },
1387 { TRUE, 1, 1 },
1388 { FALSE, 2, 2 }
1389 };
1390 TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)",
1391 edits.getFineIterator(), edits.getFineIterator(),
1392 foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
1393 TRUE, errorCode);
1394 }
1395
1396 void StringCaseTest::TestCaseMapToString() {
1397 // This test function name is parallel with one in UCharacterCaseTest.java.
1398 // It is a bit of a misnomer until we have CaseMap API that writes to
1399 // a UnicodeString, at which point we should change this code here.
1400 IcuTestErrorCode errorCode(*this, "TestCaseMapToString");
1401 UChar dest[20];
1402
1403 // Omit unchanged text.
1404 int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT,
1405 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1406 assertEquals(u"toLower(IstanBul)",
1407 UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
1408 length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT,
1409 u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1410 assertEquals(u"toUpper(Πατάτα)",
1411 UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1412 #if !UCONFIG_NO_BREAK_ITERATION
1413 length = CaseMap::toTitle("nl",
1414 U_OMIT_UNCHANGED_TEXT |
1415 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1416 U_TITLECASE_NO_LOWERCASE,
1417 nullptr, u"IjssEL IglOo", 12,
1418 dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1419 assertEquals(u"toTitle(IjssEL IglOo)",
1420 UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
1421 #endif
1422 length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1423 u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1424 assertEquals(u"foldCase(IßtanBul)",
1425 UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
1426
1427 // Return the whole result string.
1428 length = CaseMap::toLower("tr", 0,
1429 u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1430 assertEquals(u"toLower(IstanBul)",
1431 UnicodeString(u"ıstanbul"), UnicodeString(TRUE, dest, length));
1432 length = CaseMap::toUpper("el", 0,
1433 u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1434 assertEquals(u"toUpper(Πατάτα)",
1435 UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
1436 #if !UCONFIG_NO_BREAK_ITERATION
1437 length = CaseMap::toTitle("nl",
1438 U_TITLECASE_NO_BREAK_ADJUSTMENT |
1439 U_TITLECASE_NO_LOWERCASE,
1440 nullptr, u"IjssEL IglOo", 12,
1441 dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1442 assertEquals(u"toTitle(IjssEL IglOo)",
1443 UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length));
1444 #endif
1445 length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1446 u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode);
1447 assertEquals(u"foldCase(IßtanBul)",
1448 UnicodeString(u"ısstanbul"), UnicodeString(TRUE, dest, length));
1449 }
1450
1451 void StringCaseTest::TestCaseMapUTF8ToString() {
1452 IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString");
1453 std::string dest;
1454 StringByteSink<std::string> sink(&dest);
1455
1456 // Omit unchanged text.
1457 CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, u8"IstanBul", sink, nullptr, errorCode);
1458 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString::fromUTF8(dest));
1459 dest.clear();
1460 CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, u8"Πατάτα", sink, nullptr, errorCode);
1461 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
1462 UnicodeString::fromUTF8(dest));
1463 #if !UCONFIG_NO_BREAK_ITERATION
1464 dest.clear();
1465 CaseMap::utf8ToTitle(
1466 "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1467 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1468 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
1469 UnicodeString::fromUTF8(dest));
1470 #endif
1471 dest.clear();
1472 CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
1473 u8"IßtanBul", sink, nullptr, errorCode);
1474 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
1475 UnicodeString::fromUTF8(dest));
1476
1477 // Return the whole result string.
1478 dest.clear();
1479 CaseMap::utf8ToLower("tr", 0, u8"IstanBul", sink, nullptr, errorCode);
1480 assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"),
1481 UnicodeString::fromUTF8(dest));
1482 dest.clear();
1483 CaseMap::utf8ToUpper("el", 0, u8"Πατάτα", sink, nullptr, errorCode);
1484 assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"),
1485 UnicodeString::fromUTF8(dest));
1486 #if !UCONFIG_NO_BREAK_ITERATION
1487 dest.clear();
1488 CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE,
1489 nullptr, u8"IjssEL IglOo", sink, nullptr, errorCode);
1490 assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"),
1491 UnicodeString::fromUTF8(dest));
1492 #endif
1493 dest.clear();
1494 CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, u8"IßtanBul", sink, nullptr, errorCode);
1495 assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"),
1496 UnicodeString::fromUTF8(dest));
1497 }
1498
1499 void StringCaseTest::TestLongUnicodeString() {
1500 // Code coverage for UnicodeString case mapping code handling
1501 // long strings or many changes in a string.
1502 UnicodeString s(TRUE,
1503 (const UChar *)
1504 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1505 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1506 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1507 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1508 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1509 u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
1510 UnicodeString expected(TRUE,
1511 (const UChar *)
1512 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1513 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1514 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1515 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1516 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1517 u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
1518 s.toUpper(Locale::getRoot());
1519 assertEquals("string length 306", expected, s);
1520 }
1521
1522 #if !UCONFIG_NO_BREAK_ITERATION
1523 void StringCaseTest::TestBug13127() {
1524 // Test case crashed when the bug was present.
1525 const char16_t *s16 = u"日本語";
1526 UnicodeString s(TRUE, s16, -1);
1527 s.toTitle(0, Locale::getEnglish());
1528 }
1529
1530 void StringCaseTest::TestInPlaceTitle() {
1531 // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place.
1532 IcuTestErrorCode errorCode(*this, "TestInPlaceTitle");
1533 char16_t s[32] = u"ß ß ß日本語 abcdef";
1534 const char16_t *expected = u"Ss Ss Ss日本語 Abcdef";
1535 int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode);
1536 assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length);
1537 assertEquals("u_strToTitle(in-place)", expected, s);
1538 }
1539 #endif
1540
1541 void StringCaseTest::TestCaseMapEditsIteratorDocs() {
1542 IcuTestErrorCode status(*this, "TestCaseMapEditsIteratorDocs");
1543 const char16_t* input = u"abcßDeF";
1544 int32_t inputLength = u_strlen(input);
1545 // output: "abcssdef"
1546
1547 char16_t output[10];
1548 Edits edits;
1549 CaseMap::fold(0, input, -1, output, 10, &edits, status);
1550
1551 static const char16_t* fineIteratorExpected[] = {
1552 u"{ src[0..3] ≡ dest[0..3] (no-change) }",
1553 u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1554 u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1555 u"{ src[5..6] ≡ dest[6..7] (no-change) }",
1556 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1557 };
1558 static const char16_t* fineChangesIteratorExpected[] = {
1559 u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1560 u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1561 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1562 };
1563 static const char16_t* coarseIteratorExpected[] = {
1564 u"{ src[0..3] ≡ dest[0..3] (no-change) }",
1565 u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1566 u"{ src[5..6] ≡ dest[6..7] (no-change) }",
1567 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1568 };
1569 static const char16_t* coarseChangesIteratorExpected[] = {
1570 u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1571 u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1572 };
1573
1574 // Expected destination indices when source index is queried
1575 static int32_t expectedDestFineEditIndices[] = {0, 0, 0, 3, 5, 6, 7};
1576 static int32_t expectedDestCoarseEditIndices[] = {0, 0, 0, 3, 3, 6, 7};
1577 static int32_t expectedDestFineStringIndices[] = {0, 1, 2, 3, 5, 6, 7};
1578 static int32_t expectedDestCoarseStringIndices[] = {0, 1, 2, 3, 6, 6, 7};
1579
1580 // Expected source indices when destination index is queried
1581 static int32_t expectedSrcFineEditIndices[] = { 0, 0, 0, 3, 3, 4, 5, 6 };
1582 static int32_t expectedSrcCoarseEditIndices[] = { 0, 0, 0, 3, 3, 3, 5, 6 };
1583 static int32_t expectedSrcFineStringIndices[] = { 0, 1, 2, 3, 4, 4, 5, 6 };
1584 static int32_t expectedSrcCoarseStringIndices[] = { 0, 1, 2, 3, 5, 5, 5, 6 };
1585
1586 // Demonstrate the iterator next() method:
1587 Edits::Iterator fineIterator = edits.getFineIterator();
1588 int i = 0;
1589 UnicodeString toString;
1590 while (fineIterator.next(status)) {
1591 UnicodeString expected = fineIteratorExpected[i++];
1592 assertEquals(UnicodeString(u"Iteration #") + i,
1593 expected,
1594 fineIterator.toString(toString.remove()));
1595 }
1596 Edits::Iterator fineChangesIterator = edits.getFineChangesIterator();
1597 i = 0;
1598 while (fineChangesIterator.next(status)) {
1599 UnicodeString expected = fineChangesIteratorExpected[i++];
1600 assertEquals(UnicodeString(u"Iteration #") + i,
1601 expected,
1602 fineChangesIterator.toString(toString.remove()));
1603 }
1604 Edits::Iterator coarseIterator = edits.getCoarseIterator();
1605 i = 0;
1606 while (coarseIterator.next(status)) {
1607 UnicodeString expected = coarseIteratorExpected[i++];
1608 assertEquals(UnicodeString(u"Iteration #") + i,
1609 expected,
1610 coarseIterator.toString(toString.remove()));
1611 }
1612 Edits::Iterator coarseChangesIterator = edits.getCoarseChangesIterator();
1613 i = 0;
1614 while (coarseChangesIterator.next(status)) {
1615 UnicodeString expected = coarseChangesIteratorExpected[i++];
1616 assertEquals(UnicodeString(u"Iteration #") + i,
1617 expected,
1618 coarseChangesIterator.toString(toString.remove()));
1619 }
1620
1621 // Demonstrate the iterator indexing methods:
1622 // fineIterator should have the same behavior as fineChangesIterator, and
1623 // coarseIterator should have the same behavior as coarseChangesIterator.
1624 for (int32_t srcIndex=0; srcIndex<inputLength; srcIndex++) {
1625 fineIterator.findSourceIndex(srcIndex, status);
1626 fineChangesIterator.findSourceIndex(srcIndex, status);
1627 coarseIterator.findSourceIndex(srcIndex, status);
1628 coarseChangesIterator.findSourceIndex(srcIndex, status);
1629
1630 assertEquals(UnicodeString("Source index: ") + srcIndex,
1631 expectedDestFineEditIndices[srcIndex],
1632 fineIterator.destinationIndex());
1633 assertEquals(UnicodeString("Source index: ") + srcIndex,
1634 expectedDestFineEditIndices[srcIndex],
1635 fineChangesIterator.destinationIndex());
1636 assertEquals(UnicodeString("Source index: ") + srcIndex,
1637 expectedDestCoarseEditIndices[srcIndex],
1638 coarseIterator.destinationIndex());
1639 assertEquals(UnicodeString("Source index: ") + srcIndex,
1640 expectedDestCoarseEditIndices[srcIndex],
1641 coarseChangesIterator.destinationIndex());
1642
1643 assertEquals(UnicodeString("Source index: ") + srcIndex,
1644 expectedDestFineStringIndices[srcIndex],
1645 fineIterator.destinationIndexFromSourceIndex(srcIndex, status));
1646 assertEquals(UnicodeString("Source index: ") + srcIndex,
1647 expectedDestFineStringIndices[srcIndex],
1648 fineChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
1649 assertEquals(UnicodeString("Source index: ") + srcIndex,
1650 expectedDestCoarseStringIndices[srcIndex],
1651 coarseIterator.destinationIndexFromSourceIndex(srcIndex, status));
1652 assertEquals(UnicodeString("Source index: ") + srcIndex,
1653 expectedDestCoarseStringIndices[srcIndex],
1654 coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex, status));
1655 }
1656 for (int32_t destIndex=0; destIndex<inputLength; destIndex++) {
1657 fineIterator.findDestinationIndex(destIndex, status);
1658 fineChangesIterator.findDestinationIndex(destIndex, status);
1659 coarseIterator.findDestinationIndex(destIndex, status);
1660 coarseChangesIterator.findDestinationIndex(destIndex, status);
1661
1662 assertEquals(UnicodeString("Destination index: ") + destIndex,
1663 expectedSrcFineEditIndices[destIndex],
1664 fineIterator.sourceIndex());
1665 assertEquals(UnicodeString("Destination index: ") + destIndex,
1666 expectedSrcFineEditIndices[destIndex],
1667 fineChangesIterator.sourceIndex());
1668 assertEquals(UnicodeString("Destination index: ") + destIndex,
1669 expectedSrcCoarseEditIndices[destIndex],
1670 coarseIterator.sourceIndex());
1671 assertEquals(UnicodeString("Destination index: ") + destIndex,
1672 expectedSrcCoarseEditIndices[destIndex],
1673 coarseChangesIterator.sourceIndex());
1674
1675 assertEquals(UnicodeString("Destination index: ") + destIndex,
1676 expectedSrcFineStringIndices[destIndex],
1677 fineIterator.sourceIndexFromDestinationIndex(destIndex, status));
1678 assertEquals(UnicodeString("Destination index: ") + destIndex,
1679 expectedSrcFineStringIndices[destIndex],
1680 fineChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
1681 assertEquals(UnicodeString("Destination index: ") + destIndex,
1682 expectedSrcCoarseStringIndices[destIndex],
1683 coarseIterator.sourceIndexFromDestinationIndex(destIndex, status));
1684 assertEquals(UnicodeString("Destination index: ") + destIndex,
1685 expectedSrcCoarseStringIndices[destIndex],
1686 coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex, status));
1687 }
1688 }
1689
1690 void StringCaseTest::TestCaseMapGreekExtended() {
1691 // Ticket 13851
1692 UnicodeString s(u"\u1F80\u1F88\u1FFC");
1693 UnicodeString result(s);
1694 result.toLower(Locale::getRoot());
1695 assertEquals(u"lower", u"\u1F80\u1F80\u1FF3", result);
1696 #if !UCONFIG_NO_BREAK_ITERATION
1697 result = s;
1698 result.toTitle(nullptr, Locale::getRoot());
1699 assertEquals(u"title", u"\u1F88\u1F80\u1FF3", result);
1700 #endif
1701 }
1702
1703 //#endif