1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: strcase.cpp
12 * tab size: 8 (not used)
15 * created on: 2002mar12
16 * created by: Markus W. Scherer
18 * Test file for string casing C++ API functions.
21 #include "unicode/std_string.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/casemap.h"
24 #include "unicode/edits.h"
25 #include "unicode/uchar.h"
26 #include "unicode/ures.h"
27 #include "unicode/uloc.h"
28 #include "unicode/locid.h"
29 #include "unicode/ubrk.h"
30 #include "unicode/unistr.h"
31 #include "unicode/ucasemap.h"
32 #include "unicode/ustring.h"
35 #include "unicode/tstdtmod.h"
39 class StringCaseTest
: public IntlTest
{
42 virtual ~StringCaseTest();
44 void runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char *par
=0);
46 void TestCaseConversion();
48 void TestCasingImpl(const UnicodeString
&input
,
49 const UnicodeString
&output
,
51 void *iter
, const char *localeID
, uint32_t options
);
53 void TestTitleOptions();
54 void TestFullCaseFoldingIterator();
55 void TestGreekUpper();
57 void TestMalformedUTF8();
58 void TestBufferOverflow();
60 void TestCopyMoveEdits();
61 void TestEditsFindFwdBwd();
62 void TestMergeEdits();
63 void TestCaseMapWithEdits();
64 void TestCaseMapUTF8WithEdits();
65 void TestCaseMapToString();
66 void TestCaseMapUTF8ToString();
67 void TestLongUnicodeString();
69 void TestInPlaceTitle();
70 void TestCaseMapEditsIteratorDocs();
71 void TestCaseMapGreekExtended();
74 void assertGreekUpper(const char16_t *s
, const char16_t *expected
);
79 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
81 StringCaseTest::~StringCaseTest() {}
83 extern IntlTest
*createStringCaseTest() {
84 return new StringCaseTest();
88 StringCaseTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
90 logln("TestSuite StringCaseTest: ");
93 TESTCASE_AUTO(TestCaseConversion
);
94 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
95 TESTCASE_AUTO(TestCasing
);
96 TESTCASE_AUTO(TestTitleOptions
);
98 TESTCASE_AUTO(TestFullCaseFoldingIterator
);
99 TESTCASE_AUTO(TestGreekUpper
);
100 TESTCASE_AUTO(TestLongUpper
);
101 TESTCASE_AUTO(TestMalformedUTF8
);
102 TESTCASE_AUTO(TestBufferOverflow
);
103 TESTCASE_AUTO(TestEdits
);
104 TESTCASE_AUTO(TestCopyMoveEdits
);
105 TESTCASE_AUTO(TestEditsFindFwdBwd
);
106 TESTCASE_AUTO(TestMergeEdits
);
107 TESTCASE_AUTO(TestCaseMapWithEdits
);
108 TESTCASE_AUTO(TestCaseMapUTF8WithEdits
);
109 TESTCASE_AUTO(TestCaseMapToString
);
110 TESTCASE_AUTO(TestCaseMapUTF8ToString
);
111 TESTCASE_AUTO(TestLongUnicodeString
);
112 #if !UCONFIG_NO_BREAK_ITERATION
113 TESTCASE_AUTO(TestBug13127
);
114 TESTCASE_AUTO(TestInPlaceTitle
);
116 TESTCASE_AUTO(TestCaseMapEditsIteratorDocs
);
117 TESTCASE_AUTO(TestCaseMapGreekExtended
);
122 StringCaseTest::TestCaseConversion()
124 static const UChar uppercaseGreek
[] =
125 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
129 static const UChar lowercaseGreek
[] =
130 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
134 static const UChar lowercaseTurkish
[] =
135 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
136 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
138 static const UChar uppercaseTurkish
[] =
139 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
140 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
142 UnicodeString expectedResult
;
145 test3
+= (UChar32
)0x0130;
146 test3
+= "STANBUL, NOT CONSTANTINOPLE!";
148 UnicodeString
test4(test3
);
149 test4
.toLower(Locale(""));
150 expectedResult
= UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
151 if (test4
!= expectedResult
)
152 errln("1. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
155 test4
.toLower(Locale("tr", "TR"));
156 expectedResult
= lowercaseTurkish
;
157 if (test4
!= expectedResult
)
158 errln("2. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
161 test3
+= (UChar32
)0x0131;
162 test3
+= " palace, istanbul";
165 test4
.toUpper(Locale(""));
166 expectedResult
= "TOPKAPI PALACE, ISTANBUL";
167 if (test4
!= expectedResult
)
168 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
171 test4
.toUpper(Locale("tr", "TR"));
172 expectedResult
= uppercaseTurkish
;
173 if (test4
!= expectedResult
)
174 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
176 test3
= CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
178 test3
.toUpper(Locale("de", "DE"));
179 expectedResult
= CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
180 if (test3
!= expectedResult
)
181 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test3
+ "\".");
183 test4
.replace(0, test4
.length(), uppercaseGreek
);
185 test4
.toLower(Locale("el", "GR"));
186 expectedResult
= lowercaseGreek
;
187 if (test4
!= expectedResult
)
188 errln("toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
190 test4
.replace(0, test4
.length(), lowercaseGreek
);
193 expectedResult
= uppercaseGreek
;
194 if (test4
!= expectedResult
)
195 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
197 // more string case mapping tests with the new implementation
201 beforeLower
[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
202 lowerRoot
[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
203 lowerTurkish
[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
205 beforeUpper
[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
206 upperRoot
[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
207 upperTurkish
[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
209 beforeMiniUpper
[]= { 0xdf, 0x61 },
210 miniUpper
[]= { 0x53, 0x53, 0x41 };
214 /* lowercase with root locale */
215 s
=UnicodeString(FALSE
, beforeLower
, UPRV_LENGTHOF(beforeLower
));
217 if( s
.length()!=UPRV_LENGTHOF(lowerRoot
) ||
218 s
!=UnicodeString(FALSE
, lowerRoot
, s
.length())
220 errln("error in toLower(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerRoot
, UPRV_LENGTHOF(lowerRoot
)) + "\"");
223 /* lowercase with turkish locale */
224 s
=UnicodeString(FALSE
, beforeLower
, UPRV_LENGTHOF(beforeLower
));
225 s
.setCharAt(0, beforeLower
[0]).toLower(Locale("tr"));
226 if( s
.length()!=UPRV_LENGTHOF(lowerTurkish
) ||
227 s
!=UnicodeString(FALSE
, lowerTurkish
, s
.length())
229 errln("error in toLower(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerTurkish
, UPRV_LENGTHOF(lowerTurkish
)) + "\"");
232 /* uppercase with root locale */
233 s
=UnicodeString(FALSE
, beforeUpper
, UPRV_LENGTHOF(beforeUpper
));
234 s
.setCharAt(0, beforeUpper
[0]).toUpper(Locale(""));
235 if( s
.length()!=UPRV_LENGTHOF(upperRoot
) ||
236 s
!=UnicodeString(FALSE
, upperRoot
, s
.length())
238 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperRoot
, UPRV_LENGTHOF(upperRoot
)) + "\"");
241 /* uppercase with turkish locale */
242 s
=UnicodeString(FALSE
, beforeUpper
, UPRV_LENGTHOF(beforeUpper
));
243 s
.toUpper(Locale("tr"));
244 if( s
.length()!=UPRV_LENGTHOF(upperTurkish
) ||
245 s
!=UnicodeString(FALSE
, upperTurkish
, s
.length())
247 errln("error in toUpper(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperTurkish
, UPRV_LENGTHOF(upperTurkish
)) + "\"");
250 /* uppercase a short string with root locale */
251 s
=UnicodeString(FALSE
, beforeMiniUpper
, UPRV_LENGTHOF(beforeMiniUpper
));
252 s
.setCharAt(0, beforeMiniUpper
[0]).toUpper("");
253 if( s
.length()!=UPRV_LENGTHOF(miniUpper
) ||
254 s
!=UnicodeString(FALSE
, miniUpper
, s
.length())
256 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, miniUpper
, UPRV_LENGTHOF(miniUpper
)) + "\"");
260 // test some supplementary characters (>= Unicode 3.1)
265 deseretInput
=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
266 deseretLower
=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
267 deseretUpper
=UnicodeString("\\U00010414\\U00010414", "").unescape();
268 (t
=deseretInput
).toLower();
269 if(t
!=deseretLower
) {
270 errln("error lowercasing Deseret (plane 1) characters");
272 (t
=deseretInput
).toUpper();
273 if(t
!=deseretUpper
) {
274 errln("error uppercasing Deseret (plane 1) characters");
278 // test some more cases that looked like problems
283 ljInput
=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
284 ljLower
=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
285 ljUpper
=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
286 (t
=ljInput
).toLower("en");
288 errln("error lowercasing LJ characters");
290 (t
=ljInput
).toUpper("en");
292 errln("error uppercasing LJ characters");
296 #if !UCONFIG_NO_NORMALIZATION
297 // some context-sensitive casing depends on normalization data being present
299 // Unicode 3.1.1 SpecialCasing tests
303 // sigmas preceded and/or followed by cased letters
305 sigmas
=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
306 sigmasLower
=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
307 sigmasUpper
=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
309 (t
=sigmas
).toLower();
311 errln("error in sigmas.toLower()=\"" + t
+ "\" expected \"" + sigmasLower
+ "\"");
314 (t
=sigmas
).toUpper(Locale(""));
316 errln("error in sigmas.toUpper()=\"" + t
+ "\" expected \"" + sigmasUpper
+ "\"");
319 // turkish & azerbaijani dotless i & dotted I
320 // remove dot above if there was a capital I before and there are no more accents above
322 dots
=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
323 dotsTurkish
=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
324 dotsDefault
=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
326 (t
=dots
).toLower("tr");
328 errln("error in dots.toLower(tr)=\"" + t
+ "\" expected \"" + dotsTurkish
+ "\"");
331 (t
=dots
).toLower("de");
333 errln("error in dots.toLower(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
337 // more Unicode 3.1.1 tests
341 // lithuanian dot above in uppercasing
343 dots
=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
344 dotsLithuanian
=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
345 dotsDefault
=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
347 (t
=dots
).toUpper("lt");
348 if(t
!=dotsLithuanian
) {
349 errln("error in dots.toUpper(lt)=\"" + t
+ "\" expected \"" + dotsLithuanian
+ "\"");
352 (t
=dots
).toUpper("de");
354 errln("error in dots.toUpper(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
357 // lithuanian adds dot above to i in lowercasing if there are more above accents
359 i
=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
360 iLithuanian
=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
361 iDefault
=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
365 errln("error in i.toLower(lt)=\"" + t
+ "\" expected \"" + iLithuanian
+ "\"");
370 errln("error in i.toLower(de)=\"" + t
+ "\" expected \"" + iDefault
+ "\"");
379 s
=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
380 f
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
381 g
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
386 errln("error in foldCase(\"" + s
+ "\", default)=\"" + t
+ "\" but expected \"" + f
+ "\"");
389 // alternate handling for dotted I/dotless i (U+0130, U+0131)
390 (t
=s
).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I
);
392 errln("error in foldCase(\"" + s
+ "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t
+ "\" but expected \"" + g
+ "\"");
397 // data-driven case mapping tests ------------------------------------------ ***
407 // names of TestData children in casing.txt
408 static const char *const dataNames
[TEST_COUNT
+1]={
417 StringCaseTest::TestCasingImpl(const UnicodeString
&input
,
418 const UnicodeString
&output
,
420 void *iter
, const char *localeID
, uint32_t options
) {
422 UnicodeString result
;
424 Locale
locale(localeID
);
430 result
.toLower(locale
);
434 result
.toUpper(locale
);
436 #if !UCONFIG_NO_BREAK_ITERATION
439 result
.toTitle((BreakIterator
*)iter
, locale
, options
);
444 result
.foldCase(options
);
448 break; // won't happen
451 dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name
);
453 #if !UCONFIG_NO_BREAK_ITERATION
454 if(whichCase
==TEST_TITLE
&& options
==0) {
456 result
.toTitle((BreakIterator
*)iter
, locale
);
458 dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
464 char utf8In
[100], utf8Out
[100];
465 int32_t utf8InLength
, utf8OutLength
, resultLength
;
468 IcuTestErrorCode
errorCode(*this, "TestCasingImpl");
469 LocalUCaseMapPointer
csm(ucasemap_open(localeID
, options
, errorCode
));
470 #if !UCONFIG_NO_BREAK_ITERATION
472 // Clone the break iterator so that the UCaseMap can safely adopt it.
473 UBreakIterator
*clone
=ubrk_safeClone((UBreakIterator
*)iter
, NULL
, NULL
, errorCode
);
474 ucasemap_setBreakIterator(csm
.getAlias(), clone
, errorCode
);
478 u_strToUTF8(utf8In
, (int32_t)sizeof(utf8In
), &utf8InLength
, input
.getBuffer(), input
.length(), errorCode
);
481 name
="ucasemap_utf8ToLower";
482 utf8OutLength
=ucasemap_utf8ToLower(csm
.getAlias(),
483 utf8Out
, (int32_t)sizeof(utf8Out
),
484 utf8In
, utf8InLength
, errorCode
);
487 name
="ucasemap_utf8ToUpper";
488 utf8OutLength
=ucasemap_utf8ToUpper(csm
.getAlias(),
489 utf8Out
, (int32_t)sizeof(utf8Out
),
490 utf8In
, utf8InLength
, errorCode
);
492 #if !UCONFIG_NO_BREAK_ITERATION
494 name
="ucasemap_utf8ToTitle";
495 utf8OutLength
=ucasemap_utf8ToTitle(csm
.getAlias(),
496 utf8Out
, (int32_t)sizeof(utf8Out
),
497 utf8In
, utf8InLength
, errorCode
);
501 name
="ucasemap_utf8FoldCase";
502 utf8OutLength
=ucasemap_utf8FoldCase(csm
.getAlias(),
503 utf8Out
, (int32_t)sizeof(utf8Out
),
504 utf8In
, utf8InLength
, errorCode
);
509 break; // won't happen
511 buffer
=result
.getBuffer(utf8OutLength
);
512 u_strFromUTF8(buffer
, result
.getCapacity(), &resultLength
, utf8Out
, utf8OutLength
, errorCode
);
513 result
.releaseBuffer(errorCode
.isSuccess() ? resultLength
: 0);
515 if(errorCode
.isFailure()) {
516 errcheckln(errorCode
, "error: %s() got an error for a test case from casing.res - %s", name
, u_errorName(errorCode
));
518 } else if(result
!=output
) {
519 errln("error: %s() got a wrong result for a test case from casing.res", name
);
520 errln("expected \"" + output
+ "\" got \"" + result
+ "\"" );
525 StringCaseTest::TestCasing() {
526 UErrorCode status
= U_ZERO_ERROR
;
527 #if !UCONFIG_NO_BREAK_ITERATION
528 LocalUBreakIteratorPointer iter
;
531 UnicodeString locale
, input
, output
, optionsString
, result
;
533 int32_t whichCase
, type
;
534 LocalPointer
<TestDataModule
> driver(TestDataModule::getTestDataModule("casing", *this, status
));
535 if(U_SUCCESS(status
)) {
536 for(whichCase
=0; whichCase
<TEST_COUNT
; ++whichCase
) {
537 #if UCONFIG_NO_BREAK_ITERATION
538 if(whichCase
==TEST_TITLE
) {
542 LocalPointer
<TestData
> casingTest(driver
->createTestData(dataNames
[whichCase
], status
));
543 if(U_FAILURE(status
)) {
544 errln("TestCasing failed to createTestData(%s) - %s", dataNames
[whichCase
], u_errorName(status
));
547 const DataMap
*myCase
= NULL
;
548 while(casingTest
->nextCase(myCase
, status
)) {
549 input
= myCase
->getString("Input", status
);
550 output
= myCase
->getString("Output", status
);
552 if(whichCase
!=TEST_FOLD
) {
553 locale
= myCase
->getString("Locale", status
);
555 locale
.extract(0, 0x7fffffff, cLocaleID
, sizeof(cLocaleID
), "");
557 #if !UCONFIG_NO_BREAK_ITERATION
558 if(whichCase
==TEST_TITLE
) {
559 type
= myCase
->getInt("Type", status
);
561 iter
.adoptInstead(ubrk_open((UBreakIteratorType
)type
, cLocaleID
, NULL
, 0, &status
));
562 } else if(type
==-2) {
563 // Open a trivial break iterator that only delivers { 0, length }
564 // or even just { 0 } as boundaries.
565 static const UChar rules
[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
566 UParseError parseError
;
567 iter
.adoptInstead(ubrk_openRules(rules
, UPRV_LENGTHOF(rules
), NULL
, 0, &parseError
, &status
));
572 if(whichCase
==TEST_TITLE
|| whichCase
==TEST_FOLD
) {
573 optionsString
= myCase
->getString("Options", status
);
574 if(optionsString
.indexOf((UChar
)0x54)>=0) { // T
575 options
|=U_FOLD_CASE_EXCLUDE_SPECIAL_I
;
577 if(optionsString
.indexOf((UChar
)0x4c)>=0) { // L
578 options
|=U_TITLECASE_NO_LOWERCASE
;
580 if(optionsString
.indexOf((UChar
)0x41)>=0) { // A
581 options
|=U_TITLECASE_NO_BREAK_ADJUSTMENT
;
585 if(U_FAILURE(status
)) {
586 dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames
[whichCase
], u_errorName(status
));
587 status
= U_ZERO_ERROR
;
589 #if UCONFIG_NO_BREAK_ITERATION
590 LocalPointer
<UMemory
> iter
;
592 TestCasingImpl(input
, output
, whichCase
, iter
.getAlias(), cLocaleID
, options
);
595 #if !UCONFIG_NO_BREAK_ITERATION
596 iter
.adoptInstead(NULL
);
602 #if !UCONFIG_NO_BREAK_ITERATION
603 // more tests for API coverage
605 input
=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
606 (result
=input
).toTitle(NULL
);
607 if(result
!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
608 dataerrln("UnicodeString::toTitle(NULL) failed.");
614 StringCaseTest::TestTitleOptions() {
615 // New options in ICU 60.
616 TestCasingImpl(u
"ʻcAt! ʻeTc.", u
"ʻCat! ʻetc.", TEST_TITLE
,
617 nullptr, "", U_TITLECASE_WHOLE_STRING
);
618 TestCasingImpl(u
"a ʻCaT. A ʻdOg! ʻeTc.", u
"A ʻCaT. A ʻdOg! ʻETc.", TEST_TITLE
,
619 nullptr, "", U_TITLECASE_SENTENCES
|U_TITLECASE_NO_LOWERCASE
);
620 TestCasingImpl(u
"49eRs", u
"49ers", TEST_TITLE
,
621 nullptr, "", U_TITLECASE_WHOLE_STRING
);
622 TestCasingImpl(u
"«丰(aBc)»", u
"«丰(abc)»", TEST_TITLE
,
623 nullptr, "", U_TITLECASE_WHOLE_STRING
);
624 TestCasingImpl(u
"49eRs", u
"49Ers", TEST_TITLE
,
625 nullptr, "", U_TITLECASE_WHOLE_STRING
|U_TITLECASE_ADJUST_TO_CASED
);
626 TestCasingImpl(u
"«丰(aBc)»", u
"«丰(Abc)»", TEST_TITLE
,
627 nullptr, "", U_TITLECASE_WHOLE_STRING
|U_TITLECASE_ADJUST_TO_CASED
);
628 TestCasingImpl(u
" john. Smith", u
" John. Smith", TEST_TITLE
,
629 nullptr, "", U_TITLECASE_WHOLE_STRING
|U_TITLECASE_NO_LOWERCASE
);
630 TestCasingImpl(u
" john. Smith", u
" john. smith", TEST_TITLE
,
631 nullptr, "", U_TITLECASE_WHOLE_STRING
|U_TITLECASE_NO_BREAK_ADJUSTMENT
);
632 TestCasingImpl(u
"«ijs»", u
"«IJs»", TEST_TITLE
,
633 nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING
);
634 TestCasingImpl(u
"«ijs»", u
"«İjs»", TEST_TITLE
,
635 nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING
);
637 #if !UCONFIG_NO_BREAK_ITERATION
638 // Test conflicting settings.
639 // If & when we add more options, then the ORed combinations may become
640 // indistinguishable from valid values.
641 IcuTestErrorCode
errorCode(*this, "TestTitleOptions");
642 CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT
|U_TITLECASE_ADJUST_TO_CASED
, nullptr,
643 u
"", 0, nullptr, 0, nullptr, errorCode
);
644 if (errorCode
.get() != U_ILLEGAL_ARGUMENT_ERROR
) {
645 errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument",
646 errorCode
.errorName());
649 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING
|U_TITLECASE_SENTENCES
, nullptr,
650 u
"", 0, nullptr, 0, nullptr, errorCode
);
651 if (errorCode
.get() != U_ILLEGAL_ARGUMENT_ERROR
) {
652 errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument",
653 errorCode
.errorName());
656 LocalPointer
<BreakIterator
> iter(
657 BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode
));
658 CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING
, iter
.getAlias(),
659 u
"", 0, nullptr, 0, nullptr, errorCode
);
660 if (errorCode
.get() != U_ILLEGAL_ARGUMENT_ERROR
) {
661 errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument",
662 errorCode
.errorName());
669 StringCaseTest::TestFullCaseFoldingIterator() {
670 UnicodeString ffi
=UNICODE_STRING_SIMPLE("ffi");
671 UnicodeString ss
=UNICODE_STRING_SIMPLE("ss");
672 FullCaseFoldingIterator iter
;
674 int32_t countSpecific
=0;
677 while((c
=iter
.next(full
))>=0) {
679 // Check that the full Case_Folding has more than 1 code point.
680 if(!full
.hasMoreChar32Than(0, 0x7fffffff, 1)) {
681 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c
);
684 // Check that full == Case_Folding(c).
688 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c
);
691 // Spot-check a couple of specific cases.
692 if((full
==ffi
&& c
==0xfb03) || (full
==ss
&& (c
==0xdf || c
==0x1e9e))) {
696 if(countSpecific
!=3) {
697 errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
700 errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count
);
705 StringCaseTest::assertGreekUpper(const char16_t *s
, const char16_t *expected
) {
706 UnicodeString
s16(s
);
707 UnicodeString
expected16(expected
);
708 UnicodeString msg
= UnicodeString("UnicodeString::toUpper/Greek(\"") + s16
+ "\")";
709 UnicodeString
result16(s16
);
710 result16
.toUpper(GREEK_LOCALE_
);
711 assertEquals(msg
, expected16
, result16
);
713 msg
= UnicodeString("u_strToUpper/Greek(\"") + s16
+ "\") cap=";
714 int32_t length
= expected16
.length();
715 int32_t capacities
[] = {
716 // Keep in sync with the UTF-8 capacities near the bottom of this function.
717 0, length
/ 2, length
- 1, length
, length
+ 1
719 for (int32_t i
= 0; i
< UPRV_LENGTHOF(capacities
); ++i
) {
720 int32_t cap
= capacities
[i
];
721 UChar
*dest16
= result16
.getBuffer(expected16
.length() + 1);
722 u_memset(dest16
, 0x55AA, result16
.getCapacity());
723 UErrorCode errorCode
= U_ZERO_ERROR
;
724 length
= u_strToUpper(dest16
, cap
, s16
.getBuffer(), s16
.length(), "el", &errorCode
);
725 assertEquals(msg
+ cap
, expected16
.length(), length
);
726 UErrorCode expectedErrorCode
;
727 if (cap
< expected16
.length()) {
728 expectedErrorCode
= U_BUFFER_OVERFLOW_ERROR
;
729 } else if (cap
== expected16
.length()) {
730 expectedErrorCode
= U_STRING_NOT_TERMINATED_WARNING
;
732 expectedErrorCode
= U_ZERO_ERROR
;
733 assertEquals(msg
+ cap
+ " NUL", 0, dest16
[length
]);
735 assertEquals(msg
+ cap
+ " errorCode", expectedErrorCode
, errorCode
);
736 result16
.releaseBuffer(length
);
737 if (cap
>= expected16
.length()) {
738 assertEquals(msg
+ cap
, expected16
, result16
);
742 UErrorCode errorCode
= U_ZERO_ERROR
;
743 LocalUCaseMapPointer
csm(ucasemap_open("el", 0, &errorCode
));
744 assertSuccess("ucasemap_open", errorCode
);
746 s16
.toUTF8String(s8
);
747 msg
= UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16
+ "\")";
749 length
= ucasemap_utf8ToUpper(csm
.getAlias(), dest8
, UPRV_LENGTHOF(dest8
),
750 s8
.data(), static_cast<int32_t>(s8
.length()), &errorCode
);
751 assertSuccess("ucasemap_utf8ToUpper", errorCode
);
752 StringPiece
result8(dest8
, length
);
753 UnicodeString result16From8
= UnicodeString::fromUTF8(result8
);
754 assertEquals(msg
, expected16
, result16From8
);
757 capacities
[1] = length
/ 2;
758 capacities
[2] = length
- 1;
759 capacities
[3] = length
;
760 capacities
[4] = length
+ 1;
762 int32_t expected8Length
= length
; // Assuming the previous call worked.
763 for (int32_t i
= 0; i
< UPRV_LENGTHOF(capacities
); ++i
) {
764 int32_t cap
= capacities
[i
];
765 memset(dest8b
, 0x5A, UPRV_LENGTHOF(dest8b
));
766 UErrorCode errorCode
= U_ZERO_ERROR
;
767 length
= ucasemap_utf8ToUpper(csm
.getAlias(), dest8b
, cap
,
768 s8
.data(), static_cast<int32_t>(s8
.length()), &errorCode
);
769 assertEquals(msg
+ cap
, expected8Length
, length
);
770 UErrorCode expectedErrorCode
;
771 if (cap
< expected8Length
) {
772 expectedErrorCode
= U_BUFFER_OVERFLOW_ERROR
;
773 } else if (cap
== expected8Length
) {
774 expectedErrorCode
= U_STRING_NOT_TERMINATED_WARNING
;
776 expectedErrorCode
= U_ZERO_ERROR
;
777 // Casts to int32_t to avoid matching UBool.
778 assertEquals(msg
+ cap
+ " NUL", (int32_t)0, (int32_t)dest8b
[length
]);
780 assertEquals(msg
+ cap
+ " errorCode", expectedErrorCode
, errorCode
);
781 if (cap
>= expected8Length
) {
782 assertEquals(msg
+ cap
+ " (memcmp)", 0, memcmp(dest8
, dest8b
, expected8Length
));
788 StringCaseTest::TestGreekUpper() {
789 // http://bugs.icu-project.org/trac/ticket/5456
790 assertGreekUpper(u
"άδικος, κείμενο, ίριδα", u
"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
791 // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
792 // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
793 assertGreekUpper(u
"Πατάτα", u
"ΠΑΤΑΤΑ");
794 assertGreekUpper(u
"Αέρας, Μυστήριο, Ωραίο", u
"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
795 assertGreekUpper(u
"Μαΐου, Πόρος, Ρύθμιση", u
"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
796 assertGreekUpper(u
"ΰ, Τηρώ, Μάιος", u
"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
797 assertGreekUpper(u
"άυλος", u
"ΑΫΛΟΣ");
798 assertGreekUpper(u
"ΑΫΛΟΣ", u
"ΑΫΛΟΣ");
799 assertGreekUpper(u
"Άκλιτα ρήματα ή άκλιτες μετοχές", u
"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
800 // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
801 assertGreekUpper(u
"Επειδή η αναγνώριση της αξιοπρέπειας", u
"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
802 assertGreekUpper(u
"νομικού ή διεθνούς", u
"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
803 // http://unicode.org/udhr/d/udhr_ell_polytonic.html
804 assertGreekUpper(u
"Ἐπειδὴ ἡ ἀναγνώριση", u
"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
805 assertGreekUpper(u
"νομικοῦ ἢ διεθνοῦς", u
"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
806 // From Google bug report
807 assertGreekUpper(u
"Νέο, Δημιουργία", u
"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
808 // http://crbug.com/234797
809 assertGreekUpper(u
"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u
"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
810 assertGreekUpper(u
"Μαΐου, τρόλεϊ", u
"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
811 assertGreekUpper(u
"Το ένα ή το άλλο.", u
"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
812 // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
813 assertGreekUpper(u
"ρωμέικα", u
"ΡΩΜΕΪΚΑ");
814 assertGreekUpper(u
"ή.", u
"Ή.");
818 StringCaseTest::TestLongUpper() {
820 logln("not exhaustive mode: skipping this test");
823 // Ticket #12663, crash with an extremely long string where
824 // U+0390 maps to 0399 0308 0301 so that the result is three times as long
825 // and overflows an int32_t.
826 int32_t length
= 0x40000004; // more than 1G UChars
827 UnicodeString
s(length
, (UChar32
)0x390, length
);
828 UnicodeString result
;
829 UChar
*dest
= result
.getBuffer(length
+ 1);
830 if (s
.isBogus() || dest
== NULL
) {
831 logln("Out of memory, unable to run this test on this machine.");
834 IcuTestErrorCode
errorCode(*this, "TestLongUpper");
835 int32_t destLength
= u_strToUpper(dest
, result
.getCapacity(),
836 s
.getBuffer(), s
.length(), "", errorCode
);
837 result
.releaseBuffer(destLength
);
838 if (errorCode
.reset() != U_INDEX_OUTOFBOUNDS_ERROR
) {
839 errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
840 errorCode
.errorName(), (long)destLength
);
844 void StringCaseTest::TestMalformedUTF8() {
846 IcuTestErrorCode
errorCode(*this, "TestMalformedUTF8");
847 LocalUCaseMapPointer
csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT
, errorCode
));
848 if (errorCode
.isFailure()) {
849 errln("ucasemap_open(English) failed - %s", errorCode
.errorName());
852 char src
[1] = { (char)0x85 }; // malformed UTF-8
853 char dest
[3] = { 0, 0, 0 };
855 #if !UCONFIG_NO_BREAK_ITERATION
856 destLength
= ucasemap_utf8ToTitle(csm
.getAlias(), dest
, 3, src
, 1, errorCode
);
857 if (errorCode
.isFailure() || destLength
!= 1 || dest
[0] != src
[0]) {
858 errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
859 errorCode
.errorName(), (int)destLength
, dest
[0]);
865 destLength
= ucasemap_utf8ToLower(csm
.getAlias(), dest
, 3, src
, 1, errorCode
);
866 if (errorCode
.isFailure() || destLength
!= 1 || dest
[0] != src
[0]) {
867 errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
868 errorCode
.errorName(), (int)destLength
, dest
[0]);
873 destLength
= ucasemap_utf8ToUpper(csm
.getAlias(), dest
, 3, src
, 1, errorCode
);
874 if (errorCode
.isFailure() || destLength
!= 1 || dest
[0] != src
[0]) {
875 errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
876 errorCode
.errorName(), (int)destLength
, dest
[0]);
881 destLength
= ucasemap_utf8FoldCase(csm
.getAlias(), dest
, 3, src
, 1, errorCode
);
882 if (errorCode
.isFailure() || destLength
!= 1 || dest
[0] != src
[0]) {
883 errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
884 errorCode
.errorName(), (int)destLength
, dest
[0]);
888 void StringCaseTest::TestBufferOverflow() {
889 // Ticket #12849, incorrect result from Title Case preflight operation,
890 // when buffer overflow error is expected.
891 IcuTestErrorCode
errorCode(*this, "TestBufferOverflow");
892 LocalUCaseMapPointer
csm(ucasemap_open("en", 0, errorCode
));
893 if (errorCode
.isFailure()) {
894 errln("ucasemap_open(English) failed - %s", errorCode
.errorName());
898 UnicodeString
data("hello world");
900 #if !UCONFIG_NO_BREAK_ITERATION
901 result
= ucasemap_toTitle(csm
.getAlias(), NULL
, 0, data
.getBuffer(), data
.length(), errorCode
);
902 if (errorCode
.get() != U_BUFFER_OVERFLOW_ERROR
|| result
!= data
.length()) {
903 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
904 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
905 __FILE__
, __LINE__
, data
.length(), errorCode
.errorName(), result
);
910 std::string data_utf8
;
911 data
.toUTF8String(data_utf8
);
912 #if !UCONFIG_NO_BREAK_ITERATION
913 result
= ucasemap_utf8ToTitle(csm
.getAlias(), NULL
, 0, data_utf8
.c_str(), static_cast<int32_t>(data_utf8
.length()), errorCode
);
914 if (errorCode
.get() != U_BUFFER_OVERFLOW_ERROR
|| result
!= (int32_t)data_utf8
.length()) {
915 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
916 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
917 __FILE__
, __LINE__
, data_utf8
.length(), errorCode
.errorName(), result
);
923 void StringCaseTest::TestEdits() {
924 IcuTestErrorCode
errorCode(*this, "TestEdits");
926 assertFalse("new Edits hasChanges", edits
.hasChanges());
927 assertEquals("new Edits numberOfChanges", 0, edits
.numberOfChanges());
928 assertEquals("new Edits", 0, edits
.lengthDelta());
929 edits
.addUnchanged(1); // multiple unchanged ranges are combined
930 edits
.addUnchanged(10000); // too long, and they are split
931 edits
.addReplace(0, 0);
932 edits
.addUnchanged(2);
933 assertFalse("unchanged 10003 hasChanges", edits
.hasChanges());
934 assertEquals("unchanged 10003 numberOfChanges", 0, edits
.numberOfChanges());
935 assertEquals("unchanged 10003", 0, edits
.lengthDelta());
936 edits
.addReplace(2, 1); // multiple short equal-lengths edits are compressed
937 edits
.addUnchanged(0);
938 edits
.addReplace(2, 1);
939 edits
.addReplace(2, 1);
940 edits
.addReplace(0, 10);
941 edits
.addReplace(100, 0);
942 edits
.addReplace(3000, 4000); // variable-length encoding
943 edits
.addReplace(100000, 100000);
944 assertTrue("some edits hasChanges", edits
.hasChanges());
945 assertEquals("some edits numberOfChanges", 7, edits
.numberOfChanges());
946 assertEquals("some edits", -3 + 10 - 100 + 1000, edits
.lengthDelta());
947 UErrorCode outErrorCode
= U_ZERO_ERROR
;
948 assertFalse("edits done: copyErrorTo", edits
.copyErrorTo(outErrorCode
));
950 static const EditChange coarseExpectedChanges
[] = {
951 { FALSE
, 10003, 10003 },
952 { TRUE
, 103106, 104013 }
954 TestUtility::checkEditsIter(*this, u
"coarse",
955 edits
.getCoarseIterator(), edits
.getCoarseIterator(),
956 coarseExpectedChanges
, UPRV_LENGTHOF(coarseExpectedChanges
), TRUE
, errorCode
);
957 TestUtility::checkEditsIter(*this, u
"coarse changes",
958 edits
.getCoarseChangesIterator(), edits
.getCoarseChangesIterator(),
959 coarseExpectedChanges
, UPRV_LENGTHOF(coarseExpectedChanges
), FALSE
, errorCode
);
961 static const EditChange fineExpectedChanges
[] = {
962 { FALSE
, 10003, 10003 },
968 { TRUE
, 3000, 4000 },
969 { TRUE
, 100000, 100000 }
971 TestUtility::checkEditsIter(*this, u
"fine",
972 edits
.getFineIterator(), edits
.getFineIterator(),
973 fineExpectedChanges
, UPRV_LENGTHOF(fineExpectedChanges
), TRUE
, errorCode
);
974 TestUtility::checkEditsIter(*this, u
"fine changes",
975 edits
.getFineChangesIterator(), edits
.getFineChangesIterator(),
976 fineExpectedChanges
, UPRV_LENGTHOF(fineExpectedChanges
), FALSE
, errorCode
);
979 assertFalse("reset hasChanges", edits
.hasChanges());
980 assertEquals("reset numberOfChanges", 0, edits
.numberOfChanges());
981 assertEquals("reset", 0, edits
.lengthDelta());
982 Edits::Iterator ei
= edits
.getCoarseChangesIterator();
983 assertFalse("reset then iterator", ei
.next(errorCode
));
986 void StringCaseTest::TestCopyMoveEdits() {
987 IcuTestErrorCode
errorCode(*this, "TestCopyMoveEdits");
988 // Exceed the stack array capacity.
990 for (int32_t i
= 0; i
< 250; ++i
) {
991 a
.addReplace(i
% 10, (i
% 10) + 1);
993 assertEquals("a: many edits, length delta", 250, a
.lengthDelta());
997 assertEquals("b: copy of many edits, length delta", 250, b
.lengthDelta());
998 assertEquals("a remains: many edits, length delta", 250, a
.lengthDelta());
999 TestUtility::checkEqualEdits(*this, u
"b copy of a", a
, b
, errorCode
);
1004 c
.addReplace(88, 77);
1006 assertEquals("c: assigned many edits, length delta", 250, c
.lengthDelta());
1007 assertEquals("b remains: many edits, length delta", 250, b
.lengthDelta());
1008 TestUtility::checkEqualEdits(*this, u
"c = b", b
, c
, errorCode
);
1010 // std::move trouble on these platforms.
1011 // See https://ssl.icu-project.org/trac/ticket/13393
1012 #if !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390)
1013 // move constructor empties object with heap array
1014 Edits
d(std::move(a
));
1015 assertEquals("d: move-constructed many edits, length delta", 250, d
.lengthDelta());
1016 assertFalse("a moved away: no more hasChanges", a
.hasChanges());
1017 TestUtility::checkEqualEdits(*this, u
"d() <- a", d
, b
, errorCode
);
1019 TestUtility::checkEqualEdits(*this, u
"a moved away", empty
, a
, errorCode
);
1021 // move assignment empties object with heap array
1023 e
.addReplace(0, 1000);
1025 assertEquals("e: move-assigned many edits, length delta", 250, e
.lengthDelta());
1026 assertFalse("b moved away: no more hasChanges", b
.hasChanges());
1027 TestUtility::checkEqualEdits(*this, u
"e <- b", e
, c
, errorCode
);
1028 TestUtility::checkEqualEdits(*this, u
"b moved away", empty
, b
, errorCode
);
1030 // Edits::Iterator default constructor.
1031 Edits::Iterator iter
;
1032 assertFalse("Edits::Iterator().next()", iter
.next(errorCode
));
1033 assertSuccess("Edits::Iterator().next()", errorCode
);
1034 iter
= e
.getFineChangesIterator();
1035 assertTrue("iter.next()", iter
.next(errorCode
));
1036 assertSuccess("iter.next()", errorCode
);
1037 assertTrue("iter.hasChange()", iter
.hasChange());
1038 assertEquals("iter.newLength()", 1, iter
.newLength());
1042 void StringCaseTest::TestEditsFindFwdBwd() {
1043 IcuTestErrorCode
errorCode(*this, "TestEditsFindFwdBwd");
1044 // Some users need index mappings to be efficient when they are out of order.
1045 // The most interesting failure case for this test is it taking a very long time.
1047 constexpr int32_t N
= 200000;
1048 for (int32_t i
= 0; i
< N
; ++i
) {
1052 Edits::Iterator iter
= e
.getFineIterator();
1053 for (int32_t i
= 0; i
<= N
; i
+= 2) {
1054 assertEquals("ascending", i
* 2, iter
.sourceIndexFromDestinationIndex(i
, errorCode
));
1055 assertEquals("ascending", i
* 2 + 1, iter
.sourceIndexFromDestinationIndex(i
+ 1, errorCode
));
1057 for (int32_t i
= N
; i
>= 0; i
-= 2) {
1058 assertEquals("descending", i
* 2 + 1, iter
.sourceIndexFromDestinationIndex(i
+ 1, errorCode
));
1059 assertEquals("descending", i
* 2, iter
.sourceIndexFromDestinationIndex(i
, errorCode
));
1063 void StringCaseTest::TestMergeEdits() {
1064 // For debugging, set -v to see matching edits up to a failure.
1065 IcuTestErrorCode
errorCode(*this, "TestMergeEdits");
1066 Edits ab
, bc
, ac
, expected_ac
;
1068 // Simple: Two parallel non-changes.
1071 expected_ac
.addUnchanged(2);
1073 // Simple: Two aligned changes.
1074 ab
.addReplace(3, 2);
1075 bc
.addReplace(2, 1);
1076 expected_ac
.addReplace(3, 1);
1078 // Unequal non-changes.
1081 expected_ac
.addUnchanged(3);
1084 // Overlapping changes accumulate until they share a boundary.
1085 ab
.addReplace(4, 3);
1086 bc
.addReplace(3, 2);
1087 ab
.addReplace(4, 3);
1088 bc
.addReplace(3, 2);
1089 ab
.addReplace(4, 3);
1090 bc
.addReplace(3, 2);
1092 expected_ac
.addReplace(14, 8);
1095 // Balance out intermediate-string lengths.
1097 expected_ac
.addUnchanged(2);
1099 // Insert something and delete it: Should disappear.
1100 ab
.addReplace(0, 5);
1101 ab
.addReplace(0, 2);
1102 bc
.addReplace(7, 0);
1104 // Parallel change to make a new boundary.
1105 ab
.addReplace(1, 2);
1106 bc
.addReplace(2, 3);
1107 expected_ac
.addReplace(1, 3);
1109 // Multiple ab deletions should remain separate at the boundary.
1110 ab
.addReplace(1, 0);
1111 ab
.addReplace(2, 0);
1112 ab
.addReplace(3, 0);
1113 expected_ac
.addReplace(1, 0);
1114 expected_ac
.addReplace(2, 0);
1115 expected_ac
.addReplace(3, 0);
1117 // Unequal non-changes can be split for another boundary.
1120 expected_ac
.addUnchanged(1);
1123 // Multiple bc insertions should create a boundary and remain separate.
1124 bc
.addReplace(0, 4);
1125 bc
.addReplace(0, 5);
1126 bc
.addReplace(0, 6);
1127 expected_ac
.addReplace(0, 4);
1128 expected_ac
.addReplace(0, 5);
1129 expected_ac
.addReplace(0, 6);
1132 // Multiple ab deletions in the middle of a bc change are merged.
1133 bc
.addReplace(2, 2);
1135 ab
.addReplace(1, 0);
1136 ab
.addReplace(2, 0);
1137 ab
.addReplace(3, 0);
1138 ab
.addReplace(4, 1);
1139 expected_ac
.addReplace(11, 2);
1141 // Multiple bc insertions in the middle of an ab change are merged.
1142 ab
.addReplace(5, 6);
1143 bc
.addReplace(3, 3);
1145 bc
.addReplace(0, 4);
1146 bc
.addReplace(0, 5);
1147 bc
.addReplace(0, 6);
1148 bc
.addReplace(3, 7);
1149 expected_ac
.addReplace(5, 25);
1151 // Delete around a deletion.
1152 ab
.addReplace(4, 4);
1153 ab
.addReplace(3, 0);
1155 bc
.addReplace(2, 2);
1156 bc
.addReplace(4, 0);
1157 expected_ac
.addReplace(9, 2);
1159 // Insert into an insertion.
1160 ab
.addReplace(0, 2);
1161 bc
.addReplace(1, 1);
1162 bc
.addReplace(0, 8);
1164 expected_ac
.addReplace(0, 10);
1167 // Balance out intermediate-string lengths.
1169 expected_ac
.addUnchanged(3);
1171 // Deletions meet insertions.
1172 // Output order is arbitrary in principle, but we expect insertions first
1173 // and want to keep it that way.
1174 ab
.addReplace(2, 0);
1175 ab
.addReplace(4, 0);
1176 ab
.addReplace(6, 0);
1177 bc
.addReplace(0, 1);
1178 bc
.addReplace(0, 3);
1179 bc
.addReplace(0, 5);
1180 expected_ac
.addReplace(0, 1);
1181 expected_ac
.addReplace(0, 3);
1182 expected_ac
.addReplace(0, 5);
1183 expected_ac
.addReplace(2, 0);
1184 expected_ac
.addReplace(4, 0);
1185 expected_ac
.addReplace(6, 0);
1187 // End with a non-change, so that further edits are never reordered.
1190 expected_ac
.addUnchanged(1);
1192 ac
.mergeAndAppend(ab
, bc
, errorCode
);
1193 assertSuccess("ab+bc", errorCode
);
1194 if (!TestUtility::checkEqualEdits(*this, u
"ab+bc", expected_ac
, ac
, errorCode
)) {
1198 // Append more Edits.
1200 ab2
.addUnchanged(5);
1201 bc2
.addReplace(1, 2);
1202 bc2
.addUnchanged(4);
1203 expected_ac
.addReplace(1, 2);
1204 expected_ac
.addUnchanged(4);
1205 ac
.mergeAndAppend(ab2
, bc2
, errorCode
);
1206 assertSuccess("ab2+bc2", errorCode
);
1207 if (!TestUtility::checkEqualEdits(*this, u
"ab2+bc2", expected_ac
, ac
, errorCode
)) {
1211 // Append empty edits.
1213 ac
.mergeAndAppend(empty
, empty
, errorCode
);
1214 assertSuccess("empty+empty", errorCode
);
1215 if (!TestUtility::checkEqualEdits(*this, u
"empty+empty", expected_ac
, ac
, errorCode
)) {
1219 // Error: Append more edits with mismatched intermediate-string lengths.
1221 mismatch
.addReplace(1, 1);
1222 ac
.mergeAndAppend(ab2
, mismatch
, errorCode
);
1223 assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR
, errorCode
.get());
1225 ac
.mergeAndAppend(mismatch
, bc2
, errorCode
);
1226 assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR
, errorCode
.get());
1230 void StringCaseTest::TestCaseMapWithEdits() {
1231 IcuTestErrorCode
errorCode(*this, "TestCaseMapWithEdits");
1235 int32_t length
= CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT
,
1236 u
"IstanBul", 8, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1237 assertEquals(u
"toLower(IstanBul)", UnicodeString(u
"ıb"), UnicodeString(TRUE
, dest
, length
));
1238 static const EditChange lowerExpectedChanges
[] = {
1244 TestUtility::checkEditsIter(*this, u
"toLower(IstanBul)",
1245 edits
.getFineIterator(), edits
.getFineIterator(),
1246 lowerExpectedChanges
, UPRV_LENGTHOF(lowerExpectedChanges
),
1250 length
= CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT
,
1251 u
"Πατάτα", 6, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1252 assertEquals(u
"toUpper(Πατάτα)", UnicodeString(u
"ΑΤΑΤΑ"), UnicodeString(TRUE
, dest
, length
));
1253 static const EditChange upperExpectedChanges
[] = {
1261 TestUtility::checkEditsIter(*this, u
"toUpper(Πατάτα)",
1262 edits
.getFineIterator(), edits
.getFineIterator(),
1263 upperExpectedChanges
, UPRV_LENGTHOF(upperExpectedChanges
),
1268 #if !UCONFIG_NO_BREAK_ITERATION
1269 length
= CaseMap::toTitle("nl",
1270 U_OMIT_UNCHANGED_TEXT
|
1271 U_TITLECASE_NO_BREAK_ADJUSTMENT
|
1272 U_TITLECASE_NO_LOWERCASE
,
1273 nullptr, u
"IjssEL IglOo", 12,
1274 dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1275 assertEquals(u
"toTitle(IjssEL IglOo)", UnicodeString(u
"J"), UnicodeString(TRUE
, dest
, length
));
1276 static const EditChange titleExpectedChanges
[] = {
1281 TestUtility::checkEditsIter(*this, u
"toTitle(IjssEL IglOo)",
1282 edits
.getFineIterator(), edits
.getFineIterator(),
1283 titleExpectedChanges
, UPRV_LENGTHOF(titleExpectedChanges
),
1287 // No explicit nor automatic edits.reset(). Edits should be appended.
1288 length
= CaseMap::fold(U_OMIT_UNCHANGED_TEXT
| U_EDITS_NO_RESET
| U_FOLD_CASE_EXCLUDE_SPECIAL_I
,
1289 u
"IßtanBul", 8, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1290 assertEquals(u
"foldCase(IßtanBul)", UnicodeString(u
"ıssb"), UnicodeString(TRUE
, dest
, length
));
1291 static const EditChange foldExpectedChanges
[] = {
1292 #if !UCONFIG_NO_BREAK_ITERATION
1293 // From titlecasing.
1298 // From case folding.
1305 TestUtility::checkEditsIter(*this, u
"foldCase(no Edits reset, IßtanBul)",
1306 edits
.getFineIterator(), edits
.getFineIterator(),
1307 foldExpectedChanges
, UPRV_LENGTHOF(foldExpectedChanges
),
1311 void StringCaseTest::TestCaseMapUTF8WithEdits() {
1312 IcuTestErrorCode
errorCode(*this, "TestCaseMapUTF8WithEdits");
1316 int32_t length
= CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT
,
1317 reinterpret_cast<const char*>(u8
"IstanBul"), 8, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1318 assertEquals(u
"toLower(IstanBul)", UnicodeString(u
"ıb"),
1319 UnicodeString::fromUTF8(StringPiece(dest
, length
)));
1320 static const EditChange lowerExpectedChanges
[] = {
1326 TestUtility::checkEditsIter(*this, u
"toLower(IstanBul)",
1327 edits
.getFineIterator(), edits
.getFineIterator(),
1328 lowerExpectedChanges
, UPRV_LENGTHOF(lowerExpectedChanges
),
1332 length
= CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT
,
1333 reinterpret_cast<const char*>(u8
"Πατάτα"), 6 * 2, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1334 assertEquals(u
"toUpper(Πατάτα)", UnicodeString(u
"ΑΤΑΤΑ"),
1335 UnicodeString::fromUTF8(StringPiece(dest
, length
)));
1336 static const EditChange upperExpectedChanges
[] = {
1344 TestUtility::checkEditsIter(*this, u
"toUpper(Πατάτα)",
1345 edits
.getFineIterator(), edits
.getFineIterator(),
1346 upperExpectedChanges
, UPRV_LENGTHOF(upperExpectedChanges
),
1350 #if !UCONFIG_NO_BREAK_ITERATION
1351 length
= CaseMap::utf8ToTitle("nl",
1352 U_OMIT_UNCHANGED_TEXT
|
1353 U_TITLECASE_NO_BREAK_ADJUSTMENT
|
1354 U_TITLECASE_NO_LOWERCASE
,
1355 nullptr, reinterpret_cast<const char*>(u8
"IjssEL IglOo"), 12,
1356 dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1357 assertEquals(u
"toTitle(IjssEL IglOo)", UnicodeString(u
"J"),
1358 UnicodeString::fromUTF8(StringPiece(dest
, length
)));
1359 static const EditChange titleExpectedChanges
[] = {
1364 TestUtility::checkEditsIter(*this, u
"toTitle(IjssEL IglOo)",
1365 edits
.getFineIterator(), edits
.getFineIterator(),
1366 titleExpectedChanges
, UPRV_LENGTHOF(titleExpectedChanges
),
1370 // No explicit nor automatic edits.reset(). Edits should be appended.
1371 length
= CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT
| U_EDITS_NO_RESET
|
1372 U_FOLD_CASE_EXCLUDE_SPECIAL_I
,
1373 reinterpret_cast<const char*>(u8
"IßtanBul"), 1 + 2 + 6, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1374 assertEquals(u
"foldCase(IßtanBul)", UnicodeString(u
"ıssb"),
1375 UnicodeString::fromUTF8(StringPiece(dest
, length
)));
1376 static const EditChange foldExpectedChanges
[] = {
1377 #if !UCONFIG_NO_BREAK_ITERATION
1378 // From titlecasing.
1383 // From case folding.
1390 TestUtility::checkEditsIter(*this, u
"foldCase(IßtanBul)",
1391 edits
.getFineIterator(), edits
.getFineIterator(),
1392 foldExpectedChanges
, UPRV_LENGTHOF(foldExpectedChanges
),
1396 void StringCaseTest::TestCaseMapToString() {
1397 // This test function name is parallel with one in UCharacterCaseTest.java.
1398 // It is a bit of a misnomer until we have CaseMap API that writes to
1399 // a UnicodeString, at which point we should change this code here.
1400 IcuTestErrorCode
errorCode(*this, "TestCaseMapToString");
1403 // Omit unchanged text.
1404 int32_t length
= CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT
,
1405 u
"IstanBul", 8, dest
, UPRV_LENGTHOF(dest
), nullptr, errorCode
);
1406 assertEquals(u
"toLower(IstanBul)",
1407 UnicodeString(u
"ıb"), UnicodeString(TRUE
, dest
, length
));
1408 length
= CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT
,
1409 u
"Πατάτα", 6, dest
, UPRV_LENGTHOF(dest
), nullptr, errorCode
);
1410 assertEquals(u
"toUpper(Πατάτα)",
1411 UnicodeString(u
"ΑΤΑΤΑ"), UnicodeString(TRUE
, dest
, length
));
1412 #if !UCONFIG_NO_BREAK_ITERATION
1413 length
= CaseMap::toTitle("nl",
1414 U_OMIT_UNCHANGED_TEXT
|
1415 U_TITLECASE_NO_BREAK_ADJUSTMENT
|
1416 U_TITLECASE_NO_LOWERCASE
,
1417 nullptr, u
"IjssEL IglOo", 12,
1418 dest
, UPRV_LENGTHOF(dest
), nullptr, errorCode
);
1419 assertEquals(u
"toTitle(IjssEL IglOo)",
1420 UnicodeString(u
"J"), UnicodeString(TRUE
, dest
, length
));
1422 length
= CaseMap::fold(U_OMIT_UNCHANGED_TEXT
| U_FOLD_CASE_EXCLUDE_SPECIAL_I
,
1423 u
"IßtanBul", 8, dest
, UPRV_LENGTHOF(dest
), nullptr, errorCode
);
1424 assertEquals(u
"foldCase(IßtanBul)",
1425 UnicodeString(u
"ıssb"), UnicodeString(TRUE
, dest
, length
));
1427 // Return the whole result string.
1428 length
= CaseMap::toLower("tr", 0,
1429 u
"IstanBul", 8, dest
, UPRV_LENGTHOF(dest
), nullptr, errorCode
);
1430 assertEquals(u
"toLower(IstanBul)",
1431 UnicodeString(u
"ıstanbul"), UnicodeString(TRUE
, dest
, length
));
1432 length
= CaseMap::toUpper("el", 0,
1433 u
"Πατάτα", 6, dest
, UPRV_LENGTHOF(dest
), nullptr, errorCode
);
1434 assertEquals(u
"toUpper(Πατάτα)",
1435 UnicodeString(u
"ΠΑΤΑΤΑ"), UnicodeString(TRUE
, dest
, length
));
1436 #if !UCONFIG_NO_BREAK_ITERATION
1437 length
= CaseMap::toTitle("nl",
1438 U_TITLECASE_NO_BREAK_ADJUSTMENT
|
1439 U_TITLECASE_NO_LOWERCASE
,
1440 nullptr, u
"IjssEL IglOo", 12,
1441 dest
, UPRV_LENGTHOF(dest
), nullptr, errorCode
);
1442 assertEquals(u
"toTitle(IjssEL IglOo)",
1443 UnicodeString(u
"IJssEL IglOo"), UnicodeString(TRUE
, dest
, length
));
1445 length
= CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I
,
1446 u
"IßtanBul", 8, dest
, UPRV_LENGTHOF(dest
), nullptr, errorCode
);
1447 assertEquals(u
"foldCase(IßtanBul)",
1448 UnicodeString(u
"ısstanbul"), UnicodeString(TRUE
, dest
, length
));
1451 void StringCaseTest::TestCaseMapUTF8ToString() {
1452 IcuTestErrorCode
errorCode(*this, "TestCaseMapUTF8ToString");
1454 StringByteSink
<std::string
> sink(&dest
);
1456 // Omit unchanged text.
1457 CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT
, reinterpret_cast<const char*>(u8
"IstanBul"), sink
, nullptr, errorCode
);
1458 assertEquals(u
"toLower(IstanBul)", UnicodeString(u
"ıb"), UnicodeString::fromUTF8(dest
));
1460 CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT
, reinterpret_cast<const char*>(u8
"Πατάτα"), sink
, nullptr, errorCode
);
1461 assertEquals(u
"toUpper(Πατάτα)", UnicodeString(u
"ΑΤΑΤΑ"),
1462 UnicodeString::fromUTF8(dest
));
1463 #if !UCONFIG_NO_BREAK_ITERATION
1465 CaseMap::utf8ToTitle(
1466 "nl", U_OMIT_UNCHANGED_TEXT
| U_TITLECASE_NO_BREAK_ADJUSTMENT
| U_TITLECASE_NO_LOWERCASE
,
1467 nullptr, reinterpret_cast<const char*>(u8
"IjssEL IglOo"), sink
, nullptr, errorCode
);
1468 assertEquals(u
"toTitle(IjssEL IglOo)", UnicodeString(u
"J"),
1469 UnicodeString::fromUTF8(dest
));
1472 CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT
| U_FOLD_CASE_EXCLUDE_SPECIAL_I
,
1473 reinterpret_cast<const char*>(u8
"IßtanBul"), sink
, nullptr, errorCode
);
1474 assertEquals(u
"foldCase(IßtanBul)", UnicodeString(u
"ıssb"),
1475 UnicodeString::fromUTF8(dest
));
1477 // Return the whole result string.
1479 CaseMap::utf8ToLower("tr", 0, reinterpret_cast<const char*>(u8
"IstanBul"), sink
, nullptr, errorCode
);
1480 assertEquals(u
"toLower(IstanBul)", UnicodeString(u
"ıstanbul"),
1481 UnicodeString::fromUTF8(dest
));
1483 CaseMap::utf8ToUpper("el", 0, reinterpret_cast<const char*>(u8
"Πατάτα"), sink
, nullptr, errorCode
);
1484 assertEquals(u
"toUpper(Πατάτα)", UnicodeString(u
"ΠΑΤΑΤΑ"),
1485 UnicodeString::fromUTF8(dest
));
1486 #if !UCONFIG_NO_BREAK_ITERATION
1488 CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT
| U_TITLECASE_NO_LOWERCASE
,
1489 nullptr, reinterpret_cast<const char*>(u8
"IjssEL IglOo"), sink
, nullptr, errorCode
);
1490 assertEquals(u
"toTitle(IjssEL IglOo)", UnicodeString(u
"IJssEL IglOo"),
1491 UnicodeString::fromUTF8(dest
));
1494 CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I
, reinterpret_cast<const char*>(u8
"IßtanBul"), sink
, nullptr, errorCode
);
1495 assertEquals(u
"foldCase(IßtanBul)", UnicodeString(u
"ısstanbul"),
1496 UnicodeString::fromUTF8(dest
));
1499 void StringCaseTest::TestLongUnicodeString() {
1500 // Code coverage for UnicodeString case mapping code handling
1501 // long strings or many changes in a string.
1502 UnicodeString
s(TRUE
,
1504 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1505 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1506 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1507 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1508 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1509 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
1510 UnicodeString
expected(TRUE
,
1512 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1513 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1514 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1515 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1516 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1517 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
1518 s
.toUpper(Locale::getRoot());
1519 assertEquals("string length 306", expected
, s
);
1522 #if !UCONFIG_NO_BREAK_ITERATION
1523 void StringCaseTest::TestBug13127() {
1524 // Test case crashed when the bug was present.
1525 const char16_t *s16
= u
"日本語";
1526 UnicodeString
s(TRUE
, s16
, -1);
1527 s
.toTitle(0, Locale::getEnglish());
1530 void StringCaseTest::TestInPlaceTitle() {
1531 // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place.
1532 IcuTestErrorCode
errorCode(*this, "TestInPlaceTitle");
1533 char16_t s
[32] = u
"ß ß ß日本語 abcdef";
1534 const char16_t *expected
= u
"Ss Ss Ss日本語 Abcdef";
1535 int32_t length
= u_strToTitle(s
, UPRV_LENGTHOF(s
), s
, -1, nullptr, "", errorCode
);
1536 assertEquals("u_strToTitle(in-place) length", u_strlen(expected
), length
);
1537 assertEquals("u_strToTitle(in-place)", expected
, s
);
1541 void StringCaseTest::TestCaseMapEditsIteratorDocs() {
1542 IcuTestErrorCode
status(*this, "TestCaseMapEditsIteratorDocs");
1543 const char16_t* input
= u
"abcßDeF";
1544 int32_t inputLength
= u_strlen(input
);
1545 // output: "abcssdef"
1547 char16_t output
[10];
1549 CaseMap::fold(0, input
, -1, output
, 10, &edits
, status
);
1551 static const char16_t* fineIteratorExpected
[] = {
1552 u
"{ src[0..3] ≡ dest[0..3] (no-change) }",
1553 u
"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1554 u
"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1555 u
"{ src[5..6] ≡ dest[6..7] (no-change) }",
1556 u
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1558 static const char16_t* fineChangesIteratorExpected
[] = {
1559 u
"{ src[3..4] ⇝ dest[3..5], repl[0..2] }",
1560 u
"{ src[4..5] ⇝ dest[5..6], repl[2..3] }",
1561 u
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1563 static const char16_t* coarseIteratorExpected
[] = {
1564 u
"{ src[0..3] ≡ dest[0..3] (no-change) }",
1565 u
"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1566 u
"{ src[5..6] ≡ dest[6..7] (no-change) }",
1567 u
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1569 static const char16_t* coarseChangesIteratorExpected
[] = {
1570 u
"{ src[3..5] ⇝ dest[3..6], repl[0..3] }",
1571 u
"{ src[6..7] ⇝ dest[7..8], repl[3..4] }",
1574 // Expected destination indices when source index is queried
1575 static int32_t expectedDestFineEditIndices
[] = {0, 0, 0, 3, 5, 6, 7};
1576 static int32_t expectedDestCoarseEditIndices
[] = {0, 0, 0, 3, 3, 6, 7};
1577 static int32_t expectedDestFineStringIndices
[] = {0, 1, 2, 3, 5, 6, 7};
1578 static int32_t expectedDestCoarseStringIndices
[] = {0, 1, 2, 3, 6, 6, 7};
1580 // Expected source indices when destination index is queried
1581 static int32_t expectedSrcFineEditIndices
[] = { 0, 0, 0, 3, 3, 4, 5, 6 };
1582 static int32_t expectedSrcCoarseEditIndices
[] = { 0, 0, 0, 3, 3, 3, 5, 6 };
1583 static int32_t expectedSrcFineStringIndices
[] = { 0, 1, 2, 3, 4, 4, 5, 6 };
1584 static int32_t expectedSrcCoarseStringIndices
[] = { 0, 1, 2, 3, 5, 5, 5, 6 };
1586 // Demonstrate the iterator next() method:
1587 Edits::Iterator fineIterator
= edits
.getFineIterator();
1589 UnicodeString toString
;
1590 while (fineIterator
.next(status
)) {
1591 UnicodeString expected
= fineIteratorExpected
[i
++];
1592 assertEquals(UnicodeString(u
"Iteration #") + i
,
1594 fineIterator
.toString(toString
.remove()));
1596 Edits::Iterator fineChangesIterator
= edits
.getFineChangesIterator();
1598 while (fineChangesIterator
.next(status
)) {
1599 UnicodeString expected
= fineChangesIteratorExpected
[i
++];
1600 assertEquals(UnicodeString(u
"Iteration #") + i
,
1602 fineChangesIterator
.toString(toString
.remove()));
1604 Edits::Iterator coarseIterator
= edits
.getCoarseIterator();
1606 while (coarseIterator
.next(status
)) {
1607 UnicodeString expected
= coarseIteratorExpected
[i
++];
1608 assertEquals(UnicodeString(u
"Iteration #") + i
,
1610 coarseIterator
.toString(toString
.remove()));
1612 Edits::Iterator coarseChangesIterator
= edits
.getCoarseChangesIterator();
1614 while (coarseChangesIterator
.next(status
)) {
1615 UnicodeString expected
= coarseChangesIteratorExpected
[i
++];
1616 assertEquals(UnicodeString(u
"Iteration #") + i
,
1618 coarseChangesIterator
.toString(toString
.remove()));
1621 // Demonstrate the iterator indexing methods:
1622 // fineIterator should have the same behavior as fineChangesIterator, and
1623 // coarseIterator should have the same behavior as coarseChangesIterator.
1624 for (int32_t srcIndex
=0; srcIndex
<inputLength
; srcIndex
++) {
1625 fineIterator
.findSourceIndex(srcIndex
, status
);
1626 fineChangesIterator
.findSourceIndex(srcIndex
, status
);
1627 coarseIterator
.findSourceIndex(srcIndex
, status
);
1628 coarseChangesIterator
.findSourceIndex(srcIndex
, status
);
1630 assertEquals(UnicodeString("Source index: ") + srcIndex
,
1631 expectedDestFineEditIndices
[srcIndex
],
1632 fineIterator
.destinationIndex());
1633 assertEquals(UnicodeString("Source index: ") + srcIndex
,
1634 expectedDestFineEditIndices
[srcIndex
],
1635 fineChangesIterator
.destinationIndex());
1636 assertEquals(UnicodeString("Source index: ") + srcIndex
,
1637 expectedDestCoarseEditIndices
[srcIndex
],
1638 coarseIterator
.destinationIndex());
1639 assertEquals(UnicodeString("Source index: ") + srcIndex
,
1640 expectedDestCoarseEditIndices
[srcIndex
],
1641 coarseChangesIterator
.destinationIndex());
1643 assertEquals(UnicodeString("Source index: ") + srcIndex
,
1644 expectedDestFineStringIndices
[srcIndex
],
1645 fineIterator
.destinationIndexFromSourceIndex(srcIndex
, status
));
1646 assertEquals(UnicodeString("Source index: ") + srcIndex
,
1647 expectedDestFineStringIndices
[srcIndex
],
1648 fineChangesIterator
.destinationIndexFromSourceIndex(srcIndex
, status
));
1649 assertEquals(UnicodeString("Source index: ") + srcIndex
,
1650 expectedDestCoarseStringIndices
[srcIndex
],
1651 coarseIterator
.destinationIndexFromSourceIndex(srcIndex
, status
));
1652 assertEquals(UnicodeString("Source index: ") + srcIndex
,
1653 expectedDestCoarseStringIndices
[srcIndex
],
1654 coarseChangesIterator
.destinationIndexFromSourceIndex(srcIndex
, status
));
1656 for (int32_t destIndex
=0; destIndex
<inputLength
; destIndex
++) {
1657 fineIterator
.findDestinationIndex(destIndex
, status
);
1658 fineChangesIterator
.findDestinationIndex(destIndex
, status
);
1659 coarseIterator
.findDestinationIndex(destIndex
, status
);
1660 coarseChangesIterator
.findDestinationIndex(destIndex
, status
);
1662 assertEquals(UnicodeString("Destination index: ") + destIndex
,
1663 expectedSrcFineEditIndices
[destIndex
],
1664 fineIterator
.sourceIndex());
1665 assertEquals(UnicodeString("Destination index: ") + destIndex
,
1666 expectedSrcFineEditIndices
[destIndex
],
1667 fineChangesIterator
.sourceIndex());
1668 assertEquals(UnicodeString("Destination index: ") + destIndex
,
1669 expectedSrcCoarseEditIndices
[destIndex
],
1670 coarseIterator
.sourceIndex());
1671 assertEquals(UnicodeString("Destination index: ") + destIndex
,
1672 expectedSrcCoarseEditIndices
[destIndex
],
1673 coarseChangesIterator
.sourceIndex());
1675 assertEquals(UnicodeString("Destination index: ") + destIndex
,
1676 expectedSrcFineStringIndices
[destIndex
],
1677 fineIterator
.sourceIndexFromDestinationIndex(destIndex
, status
));
1678 assertEquals(UnicodeString("Destination index: ") + destIndex
,
1679 expectedSrcFineStringIndices
[destIndex
],
1680 fineChangesIterator
.sourceIndexFromDestinationIndex(destIndex
, status
));
1681 assertEquals(UnicodeString("Destination index: ") + destIndex
,
1682 expectedSrcCoarseStringIndices
[destIndex
],
1683 coarseIterator
.sourceIndexFromDestinationIndex(destIndex
, status
));
1684 assertEquals(UnicodeString("Destination index: ") + destIndex
,
1685 expectedSrcCoarseStringIndices
[destIndex
],
1686 coarseChangesIterator
.sourceIndexFromDestinationIndex(destIndex
, status
));
1690 void StringCaseTest::TestCaseMapGreekExtended() {
1692 UnicodeString
s(u
"\u1F80\u1F88\u1FFC");
1693 UnicodeString
result(s
);
1694 result
.toLower(Locale::getRoot());
1695 assertEquals(u
"lower", u
"\u1F80\u1F80\u1FF3", result
);
1696 #if !UCONFIG_NO_BREAK_ITERATION
1698 result
.toTitle(nullptr, Locale::getRoot());
1699 assertEquals(u
"title", u
"\u1F88\u1F80\u1FF3", result
);