1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: strcase.cpp
12 * tab size: 8 (not used)
15 * created on: 2002mar12
16 * created by: Markus W. Scherer
18 * Test file for string casing C++ API functions.
21 #include "unicode/std_string.h"
22 #include "unicode/casemap.h"
23 #include "unicode/edits.h"
24 #include "unicode/uchar.h"
25 #include "unicode/ures.h"
26 #include "unicode/uloc.h"
27 #include "unicode/locid.h"
28 #include "unicode/ubrk.h"
29 #include "unicode/unistr.h"
30 #include "unicode/ucasemap.h"
33 #include "unicode/tstdtmod.h"
38 int32_t oldLength
, newLength
;
41 class StringCaseTest
: public IntlTest
{
44 virtual ~StringCaseTest();
46 void runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char *par
=0);
48 void TestCaseConversion();
50 void TestCasingImpl(const UnicodeString
&input
,
51 const UnicodeString
&output
,
53 void *iter
, const char *localeID
, uint32_t options
);
55 void TestFullCaseFoldingIterator();
56 void TestGreekUpper();
58 void TestMalformedUTF8();
59 void TestBufferOverflow();
61 void TestCaseMapWithEdits();
62 void TestCaseMapUTF8WithEdits();
63 void TestLongUnicodeString();
67 void assertGreekUpper(const char16_t *s
, const char16_t *expected
);
69 const UnicodeString
&name
, Edits::Iterator ei1
, Edits::Iterator ei2
, // two equal iterators
70 const EditChange expected
[], int32_t expLength
, UBool withUnchanged
,
71 UErrorCode
&errorCode
);
76 StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {}
78 StringCaseTest::~StringCaseTest() {}
80 extern IntlTest
*createStringCaseTest() {
81 return new StringCaseTest();
85 StringCaseTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
87 logln("TestSuite StringCaseTest: ");
90 TESTCASE_AUTO(TestCaseConversion
);
91 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
92 TESTCASE_AUTO(TestCasing
);
93 TESTCASE_AUTO(TestBug13127
);
95 TESTCASE_AUTO(TestFullCaseFoldingIterator
);
96 TESTCASE_AUTO(TestGreekUpper
);
97 TESTCASE_AUTO(TestLongUpper
);
98 TESTCASE_AUTO(TestMalformedUTF8
);
99 TESTCASE_AUTO(TestBufferOverflow
);
100 TESTCASE_AUTO(TestEdits
);
101 TESTCASE_AUTO(TestCaseMapWithEdits
);
102 TESTCASE_AUTO(TestCaseMapUTF8WithEdits
);
103 TESTCASE_AUTO(TestLongUnicodeString
);
108 StringCaseTest::TestCaseConversion()
110 static const UChar uppercaseGreek
[] =
111 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
115 static const UChar lowercaseGreek
[] =
116 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
120 static const UChar lowercaseTurkish
[] =
121 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
122 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
124 static const UChar uppercaseTurkish
[] =
125 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
126 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
128 UnicodeString expectedResult
;
131 test3
+= (UChar32
)0x0130;
132 test3
+= "STANBUL, NOT CONSTANTINOPLE!";
134 UnicodeString
test4(test3
);
135 test4
.toLower(Locale(""));
136 expectedResult
= UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
137 if (test4
!= expectedResult
)
138 errln("1. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
141 test4
.toLower(Locale("tr", "TR"));
142 expectedResult
= lowercaseTurkish
;
143 if (test4
!= expectedResult
)
144 errln("2. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
147 test3
+= (UChar32
)0x0131;
148 test3
+= " palace, istanbul";
151 test4
.toUpper(Locale(""));
152 expectedResult
= "TOPKAPI PALACE, ISTANBUL";
153 if (test4
!= expectedResult
)
154 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
157 test4
.toUpper(Locale("tr", "TR"));
158 expectedResult
= uppercaseTurkish
;
159 if (test4
!= expectedResult
)
160 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
162 test3
= CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
164 test3
.toUpper(Locale("de", "DE"));
165 expectedResult
= CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
166 if (test3
!= expectedResult
)
167 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test3
+ "\".");
169 test4
.replace(0, test4
.length(), uppercaseGreek
);
171 test4
.toLower(Locale("el", "GR"));
172 expectedResult
= lowercaseGreek
;
173 if (test4
!= expectedResult
)
174 errln("toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
176 test4
.replace(0, test4
.length(), lowercaseGreek
);
179 expectedResult
= uppercaseGreek
;
180 if (test4
!= expectedResult
)
181 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
183 // more string case mapping tests with the new implementation
187 beforeLower
[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
188 lowerRoot
[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
189 lowerTurkish
[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
191 beforeUpper
[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
192 upperRoot
[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
193 upperTurkish
[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
195 beforeMiniUpper
[]= { 0xdf, 0x61 },
196 miniUpper
[]= { 0x53, 0x53, 0x41 };
200 /* lowercase with root locale */
201 s
=UnicodeString(FALSE
, beforeLower
, UPRV_LENGTHOF(beforeLower
));
203 if( s
.length()!=UPRV_LENGTHOF(lowerRoot
) ||
204 s
!=UnicodeString(FALSE
, lowerRoot
, s
.length())
206 errln("error in toLower(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerRoot
, UPRV_LENGTHOF(lowerRoot
)) + "\"");
209 /* lowercase with turkish locale */
210 s
=UnicodeString(FALSE
, beforeLower
, UPRV_LENGTHOF(beforeLower
));
211 s
.setCharAt(0, beforeLower
[0]).toLower(Locale("tr"));
212 if( s
.length()!=UPRV_LENGTHOF(lowerTurkish
) ||
213 s
!=UnicodeString(FALSE
, lowerTurkish
, s
.length())
215 errln("error in toLower(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerTurkish
, UPRV_LENGTHOF(lowerTurkish
)) + "\"");
218 /* uppercase with root locale */
219 s
=UnicodeString(FALSE
, beforeUpper
, UPRV_LENGTHOF(beforeUpper
));
220 s
.setCharAt(0, beforeUpper
[0]).toUpper(Locale(""));
221 if( s
.length()!=UPRV_LENGTHOF(upperRoot
) ||
222 s
!=UnicodeString(FALSE
, upperRoot
, s
.length())
224 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperRoot
, UPRV_LENGTHOF(upperRoot
)) + "\"");
227 /* uppercase with turkish locale */
228 s
=UnicodeString(FALSE
, beforeUpper
, UPRV_LENGTHOF(beforeUpper
));
229 s
.toUpper(Locale("tr"));
230 if( s
.length()!=UPRV_LENGTHOF(upperTurkish
) ||
231 s
!=UnicodeString(FALSE
, upperTurkish
, s
.length())
233 errln("error in toUpper(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperTurkish
, UPRV_LENGTHOF(upperTurkish
)) + "\"");
236 /* uppercase a short string with root locale */
237 s
=UnicodeString(FALSE
, beforeMiniUpper
, UPRV_LENGTHOF(beforeMiniUpper
));
238 s
.setCharAt(0, beforeMiniUpper
[0]).toUpper("");
239 if( s
.length()!=UPRV_LENGTHOF(miniUpper
) ||
240 s
!=UnicodeString(FALSE
, miniUpper
, s
.length())
242 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, miniUpper
, UPRV_LENGTHOF(miniUpper
)) + "\"");
246 // test some supplementary characters (>= Unicode 3.1)
251 deseretInput
=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
252 deseretLower
=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
253 deseretUpper
=UnicodeString("\\U00010414\\U00010414", "").unescape();
254 (t
=deseretInput
).toLower();
255 if(t
!=deseretLower
) {
256 errln("error lowercasing Deseret (plane 1) characters");
258 (t
=deseretInput
).toUpper();
259 if(t
!=deseretUpper
) {
260 errln("error uppercasing Deseret (plane 1) characters");
264 // test some more cases that looked like problems
269 ljInput
=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
270 ljLower
=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
271 ljUpper
=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
272 (t
=ljInput
).toLower("en");
274 errln("error lowercasing LJ characters");
276 (t
=ljInput
).toUpper("en");
278 errln("error uppercasing LJ characters");
282 #if !UCONFIG_NO_NORMALIZATION
283 // some context-sensitive casing depends on normalization data being present
285 // Unicode 3.1.1 SpecialCasing tests
289 // sigmas preceded and/or followed by cased letters
291 sigmas
=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
292 sigmasLower
=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
293 sigmasUpper
=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
295 (t
=sigmas
).toLower();
297 errln("error in sigmas.toLower()=\"" + t
+ "\" expected \"" + sigmasLower
+ "\"");
300 (t
=sigmas
).toUpper(Locale(""));
302 errln("error in sigmas.toUpper()=\"" + t
+ "\" expected \"" + sigmasUpper
+ "\"");
305 // turkish & azerbaijani dotless i & dotted I
306 // remove dot above if there was a capital I before and there are no more accents above
308 dots
=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
309 dotsTurkish
=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
310 dotsDefault
=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
312 (t
=dots
).toLower("tr");
314 errln("error in dots.toLower(tr)=\"" + t
+ "\" expected \"" + dotsTurkish
+ "\"");
317 (t
=dots
).toLower("de");
319 errln("error in dots.toLower(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
323 // more Unicode 3.1.1 tests
327 // lithuanian dot above in uppercasing
329 dots
=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
330 dotsLithuanian
=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
331 dotsDefault
=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
333 (t
=dots
).toUpper("lt");
334 if(t
!=dotsLithuanian
) {
335 errln("error in dots.toUpper(lt)=\"" + t
+ "\" expected \"" + dotsLithuanian
+ "\"");
338 (t
=dots
).toUpper("de");
340 errln("error in dots.toUpper(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
343 // lithuanian adds dot above to i in lowercasing if there are more above accents
345 i
=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
346 iLithuanian
=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
347 iDefault
=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
351 errln("error in i.toLower(lt)=\"" + t
+ "\" expected \"" + iLithuanian
+ "\"");
356 errln("error in i.toLower(de)=\"" + t
+ "\" expected \"" + iDefault
+ "\"");
365 s
=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
366 f
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
367 g
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
372 errln("error in foldCase(\"" + s
+ "\", default)=\"" + t
+ "\" but expected \"" + f
+ "\"");
375 // alternate handling for dotted I/dotless i (U+0130, U+0131)
376 (t
=s
).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I
);
378 errln("error in foldCase(\"" + s
+ "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t
+ "\" but expected \"" + g
+ "\"");
383 // data-driven case mapping tests ------------------------------------------ ***
393 // names of TestData children in casing.txt
394 static const char *const dataNames
[TEST_COUNT
+1]={
403 StringCaseTest::TestCasingImpl(const UnicodeString
&input
,
404 const UnicodeString
&output
,
406 void *iter
, const char *localeID
, uint32_t options
) {
408 UnicodeString result
;
410 Locale
locale(localeID
);
416 result
.toLower(locale
);
420 result
.toUpper(locale
);
422 #if !UCONFIG_NO_BREAK_ITERATION
425 result
.toTitle((BreakIterator
*)iter
, locale
, options
);
430 result
.foldCase(options
);
434 break; // won't happen
437 dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name
);
439 #if !UCONFIG_NO_BREAK_ITERATION
440 if(whichCase
==TEST_TITLE
&& options
==0) {
442 result
.toTitle((BreakIterator
*)iter
, locale
);
444 dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
450 char utf8In
[100], utf8Out
[100];
451 int32_t utf8InLength
, utf8OutLength
, resultLength
;
454 IcuTestErrorCode
errorCode(*this, "TestCasingImpl");
455 LocalUCaseMapPointer
csm(ucasemap_open(localeID
, options
, errorCode
));
456 #if !UCONFIG_NO_BREAK_ITERATION
458 // Clone the break iterator so that the UCaseMap can safely adopt it.
459 UBreakIterator
*clone
=ubrk_safeClone((UBreakIterator
*)iter
, NULL
, NULL
, errorCode
);
460 ucasemap_setBreakIterator(csm
.getAlias(), clone
, errorCode
);
464 u_strToUTF8(utf8In
, (int32_t)sizeof(utf8In
), &utf8InLength
, input
.getBuffer(), input
.length(), errorCode
);
467 name
="ucasemap_utf8ToLower";
468 utf8OutLength
=ucasemap_utf8ToLower(csm
.getAlias(),
469 utf8Out
, (int32_t)sizeof(utf8Out
),
470 utf8In
, utf8InLength
, errorCode
);
473 name
="ucasemap_utf8ToUpper";
474 utf8OutLength
=ucasemap_utf8ToUpper(csm
.getAlias(),
475 utf8Out
, (int32_t)sizeof(utf8Out
),
476 utf8In
, utf8InLength
, errorCode
);
478 #if !UCONFIG_NO_BREAK_ITERATION
480 name
="ucasemap_utf8ToTitle";
481 utf8OutLength
=ucasemap_utf8ToTitle(csm
.getAlias(),
482 utf8Out
, (int32_t)sizeof(utf8Out
),
483 utf8In
, utf8InLength
, errorCode
);
487 name
="ucasemap_utf8FoldCase";
488 utf8OutLength
=ucasemap_utf8FoldCase(csm
.getAlias(),
489 utf8Out
, (int32_t)sizeof(utf8Out
),
490 utf8In
, utf8InLength
, errorCode
);
495 break; // won't happen
497 buffer
=result
.getBuffer(utf8OutLength
);
498 u_strFromUTF8(buffer
, result
.getCapacity(), &resultLength
, utf8Out
, utf8OutLength
, errorCode
);
499 result
.releaseBuffer(errorCode
.isSuccess() ? resultLength
: 0);
501 if(errorCode
.isFailure()) {
502 errcheckln(errorCode
, "error: %s() got an error for a test case from casing.res - %s", name
, u_errorName(errorCode
));
504 } else if(result
!=output
) {
505 errln("error: %s() got a wrong result for a test case from casing.res", name
);
506 errln("expected \"" + output
+ "\" got \"" + result
+ "\"" );
511 StringCaseTest::TestCasing() {
512 UErrorCode status
= U_ZERO_ERROR
;
513 #if !UCONFIG_NO_BREAK_ITERATION
514 LocalUBreakIteratorPointer iter
;
517 UnicodeString locale
, input
, output
, optionsString
, result
;
519 int32_t whichCase
, type
;
520 LocalPointer
<TestDataModule
> driver(TestDataModule::getTestDataModule("casing", *this, status
));
521 if(U_SUCCESS(status
)) {
522 for(whichCase
=0; whichCase
<TEST_COUNT
; ++whichCase
) {
523 #if UCONFIG_NO_BREAK_ITERATION
524 if(whichCase
==TEST_TITLE
) {
528 LocalPointer
<TestData
> casingTest(driver
->createTestData(dataNames
[whichCase
], status
));
529 if(U_FAILURE(status
)) {
530 errln("TestCasing failed to createTestData(%s) - %s", dataNames
[whichCase
], u_errorName(status
));
533 const DataMap
*myCase
= NULL
;
534 while(casingTest
->nextCase(myCase
, status
)) {
535 input
= myCase
->getString("Input", status
);
536 output
= myCase
->getString("Output", status
);
538 if(whichCase
!=TEST_FOLD
) {
539 locale
= myCase
->getString("Locale", status
);
541 locale
.extract(0, 0x7fffffff, cLocaleID
, sizeof(cLocaleID
), "");
543 #if !UCONFIG_NO_BREAK_ITERATION
544 if(whichCase
==TEST_TITLE
) {
545 type
= myCase
->getInt("Type", status
);
547 iter
.adoptInstead(ubrk_open((UBreakIteratorType
)type
, cLocaleID
, NULL
, 0, &status
));
548 } else if(type
==-2) {
549 // Open a trivial break iterator that only delivers { 0, length }
550 // or even just { 0 } as boundaries.
551 static const UChar rules
[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
552 UParseError parseError
;
553 iter
.adoptInstead(ubrk_openRules(rules
, UPRV_LENGTHOF(rules
), NULL
, 0, &parseError
, &status
));
558 if(whichCase
==TEST_TITLE
|| whichCase
==TEST_FOLD
) {
559 optionsString
= myCase
->getString("Options", status
);
560 if(optionsString
.indexOf((UChar
)0x54)>=0) { // T
561 options
|=U_FOLD_CASE_EXCLUDE_SPECIAL_I
;
563 if(optionsString
.indexOf((UChar
)0x4c)>=0) { // L
564 options
|=U_TITLECASE_NO_LOWERCASE
;
566 if(optionsString
.indexOf((UChar
)0x41)>=0) { // A
567 options
|=U_TITLECASE_NO_BREAK_ADJUSTMENT
;
571 if(U_FAILURE(status
)) {
572 dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames
[whichCase
], u_errorName(status
));
573 status
= U_ZERO_ERROR
;
575 #if UCONFIG_NO_BREAK_ITERATION
576 LocalPointer
<UMemory
> iter
;
578 TestCasingImpl(input
, output
, whichCase
, iter
.getAlias(), cLocaleID
, options
);
581 #if !UCONFIG_NO_BREAK_ITERATION
582 iter
.adoptInstead(NULL
);
588 #if !UCONFIG_NO_BREAK_ITERATION
589 // more tests for API coverage
591 input
=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
592 (result
=input
).toTitle(NULL
);
593 if(result
!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
594 dataerrln("UnicodeString::toTitle(NULL) failed.");
600 StringCaseTest::TestFullCaseFoldingIterator() {
601 UnicodeString ffi
=UNICODE_STRING_SIMPLE("ffi");
602 UnicodeString ss
=UNICODE_STRING_SIMPLE("ss");
603 FullCaseFoldingIterator iter
;
605 int32_t countSpecific
=0;
608 while((c
=iter
.next(full
))>=0) {
610 // Check that the full Case_Folding has more than 1 code point.
611 if(!full
.hasMoreChar32Than(0, 0x7fffffff, 1)) {
612 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c
);
615 // Check that full == Case_Folding(c).
619 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c
);
622 // Spot-check a couple of specific cases.
623 if((full
==ffi
&& c
==0xfb03) || (full
==ss
&& (c
==0xdf || c
==0x1e9e))) {
627 if(countSpecific
!=3) {
628 errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
631 errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count
);
636 StringCaseTest::assertGreekUpper(const char16_t *s
, const char16_t *expected
) {
637 UnicodeString
s16(s
);
638 UnicodeString
expected16(expected
);
639 UnicodeString msg
= UnicodeString("UnicodeString::toUpper/Greek(\"") + s16
+ "\")";
640 UnicodeString
result16(s16
);
641 result16
.toUpper(GREEK_LOCALE_
);
642 assertEquals(msg
, expected16
, result16
);
644 msg
= UnicodeString("u_strToUpper/Greek(\"") + s16
+ "\") cap=";
645 int32_t length
= expected16
.length();
646 int32_t capacities
[] = {
647 // Keep in sync with the UTF-8 capacities near the bottom of this function.
648 0, length
/ 2, length
- 1, length
, length
+ 1
650 for (int32_t i
= 0; i
< UPRV_LENGTHOF(capacities
); ++i
) {
651 int32_t cap
= capacities
[i
];
652 UChar
*dest16
= result16
.getBuffer(expected16
.length() + 1);
653 u_memset(dest16
, 0x55AA, result16
.getCapacity());
654 UErrorCode errorCode
= U_ZERO_ERROR
;
655 length
= u_strToUpper(dest16
, cap
, s16
.getBuffer(), s16
.length(), "el", &errorCode
);
656 assertEquals(msg
+ cap
, expected16
.length(), length
);
657 UErrorCode expectedErrorCode
;
658 if (cap
< expected16
.length()) {
659 expectedErrorCode
= U_BUFFER_OVERFLOW_ERROR
;
660 } else if (cap
== expected16
.length()) {
661 expectedErrorCode
= U_STRING_NOT_TERMINATED_WARNING
;
663 expectedErrorCode
= U_ZERO_ERROR
;
664 assertEquals(msg
+ cap
+ " NUL", 0, dest16
[length
]);
666 assertEquals(msg
+ cap
+ " errorCode", expectedErrorCode
, errorCode
);
667 result16
.releaseBuffer(length
);
668 if (cap
>= expected16
.length()) {
669 assertEquals(msg
+ cap
, expected16
, result16
);
673 UErrorCode errorCode
= U_ZERO_ERROR
;
674 LocalUCaseMapPointer
csm(ucasemap_open("el", 0, &errorCode
));
675 assertSuccess("ucasemap_open", errorCode
);
677 s16
.toUTF8String(s8
);
678 msg
= UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16
+ "\")";
680 length
= ucasemap_utf8ToUpper(csm
.getAlias(), dest8
, UPRV_LENGTHOF(dest8
),
681 s8
.data(), s8
.length(), &errorCode
);
682 assertSuccess("ucasemap_utf8ToUpper", errorCode
);
683 StringPiece
result8(dest8
, length
);
684 UnicodeString result16From8
= UnicodeString::fromUTF8(result8
);
685 assertEquals(msg
, expected16
, result16From8
);
688 capacities
[1] = length
/ 2;
689 capacities
[2] = length
- 1;
690 capacities
[3] = length
;
691 capacities
[4] = length
+ 1;
693 int32_t expected8Length
= length
; // Assuming the previous call worked.
694 for (int32_t i
= 0; i
< UPRV_LENGTHOF(capacities
); ++i
) {
695 int32_t cap
= capacities
[i
];
696 memset(dest8b
, 0x5A, UPRV_LENGTHOF(dest8b
));
697 UErrorCode errorCode
= U_ZERO_ERROR
;
698 length
= ucasemap_utf8ToUpper(csm
.getAlias(), dest8b
, cap
,
699 s8
.data(), s8
.length(), &errorCode
);
700 assertEquals(msg
+ cap
, expected8Length
, length
);
701 UErrorCode expectedErrorCode
;
702 if (cap
< expected8Length
) {
703 expectedErrorCode
= U_BUFFER_OVERFLOW_ERROR
;
704 } else if (cap
== expected8Length
) {
705 expectedErrorCode
= U_STRING_NOT_TERMINATED_WARNING
;
707 expectedErrorCode
= U_ZERO_ERROR
;
708 // Casts to int32_t to avoid matching UBool.
709 assertEquals(msg
+ cap
+ " NUL", (int32_t)0, (int32_t)dest8b
[length
]);
711 assertEquals(msg
+ cap
+ " errorCode", expectedErrorCode
, errorCode
);
712 if (cap
>= expected8Length
) {
713 assertEquals(msg
+ cap
+ " (memcmp)", 0, memcmp(dest8
, dest8b
, expected8Length
));
719 StringCaseTest::TestGreekUpper() {
720 // http://bugs.icu-project.org/trac/ticket/5456
721 assertGreekUpper(u
"άδικος, κείμενο, ίριδα", u
"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
722 // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
723 // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
724 assertGreekUpper(u
"Πατάτα", u
"ΠΑΤΑΤΑ");
725 assertGreekUpper(u
"Αέρας, Μυστήριο, Ωραίο", u
"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
726 assertGreekUpper(u
"Μαΐου, Πόρος, Ρύθμιση", u
"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
727 assertGreekUpper(u
"ΰ, Τηρώ, Μάιος", u
"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
728 assertGreekUpper(u
"άυλος", u
"ΑΫΛΟΣ");
729 assertGreekUpper(u
"ΑΫΛΟΣ", u
"ΑΫΛΟΣ");
730 assertGreekUpper(u
"Άκλιτα ρήματα ή άκλιτες μετοχές", u
"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
731 // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
732 assertGreekUpper(u
"Επειδή η αναγνώριση της αξιοπρέπειας", u
"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
733 assertGreekUpper(u
"νομικού ή διεθνούς", u
"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
734 // http://unicode.org/udhr/d/udhr_ell_polytonic.html
735 assertGreekUpper(u
"Ἐπειδὴ ἡ ἀναγνώριση", u
"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
736 assertGreekUpper(u
"νομικοῦ ἢ διεθνοῦς", u
"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
737 // From Google bug report
738 assertGreekUpper(u
"Νέο, Δημιουργία", u
"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
739 // http://crbug.com/234797
740 assertGreekUpper(u
"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u
"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
741 assertGreekUpper(u
"Μαΐου, τρόλεϊ", u
"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
742 assertGreekUpper(u
"Το ένα ή το άλλο.", u
"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
743 // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
744 assertGreekUpper(u
"ρωμέικα", u
"ΡΩΜΕΪΚΑ");
748 StringCaseTest::TestLongUpper() {
750 logln("not exhaustive mode: skipping this test");
753 // Ticket #12663, crash with an extremely long string where
754 // U+0390 maps to 0399 0308 0301 so that the result is three times as long
755 // and overflows an int32_t.
756 int32_t length
= 0x40000004; // more than 1G UChars
757 UnicodeString
s(length
, (UChar32
)0x390, length
);
758 UnicodeString result
;
759 UChar
*dest
= result
.getBuffer(length
+ 1);
760 if (s
.isBogus() || dest
== NULL
) {
761 logln("Out of memory, unable to run this test on this machine.");
764 IcuTestErrorCode
errorCode(*this, "TestLongUpper");
765 int32_t destLength
= u_strToUpper(dest
, result
.getCapacity(),
766 s
.getBuffer(), s
.length(), "", errorCode
);
767 result
.releaseBuffer(destLength
);
768 if (errorCode
.reset() != U_INDEX_OUTOFBOUNDS_ERROR
) {
769 errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)",
770 errorCode
.errorName(), (long)destLength
);
774 void StringCaseTest::TestMalformedUTF8() {
776 IcuTestErrorCode
errorCode(*this, "TestMalformedUTF8");
777 LocalUCaseMapPointer
csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT
, errorCode
));
778 if (errorCode
.isFailure()) {
779 errln("ucasemap_open(English) failed - %s", errorCode
.errorName());
782 char src
[1] = { (char)0x85 }; // malformed UTF-8
783 char dest
[3] = { 0, 0, 0 };
785 #if !UCONFIG_NO_BREAK_ITERATION
786 destLength
= ucasemap_utf8ToTitle(csm
.getAlias(), dest
, 3, src
, 1, errorCode
);
787 if (errorCode
.isFailure() || destLength
!= 1 || dest
[0] != src
[0]) {
788 errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
789 errorCode
.errorName(), (int)destLength
, dest
[0]);
795 destLength
= ucasemap_utf8ToLower(csm
.getAlias(), dest
, 3, src
, 1, errorCode
);
796 if (errorCode
.isFailure() || destLength
!= 1 || dest
[0] != src
[0]) {
797 errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
798 errorCode
.errorName(), (int)destLength
, dest
[0]);
803 destLength
= ucasemap_utf8ToUpper(csm
.getAlias(), dest
, 3, src
, 1, errorCode
);
804 if (errorCode
.isFailure() || destLength
!= 1 || dest
[0] != src
[0]) {
805 errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
806 errorCode
.errorName(), (int)destLength
, dest
[0]);
811 destLength
= ucasemap_utf8FoldCase(csm
.getAlias(), dest
, 3, src
, 1, errorCode
);
812 if (errorCode
.isFailure() || destLength
!= 1 || dest
[0] != src
[0]) {
813 errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x",
814 errorCode
.errorName(), (int)destLength
, dest
[0]);
818 void StringCaseTest::TestBufferOverflow() {
819 // Ticket #12849, incorrect result from Title Case preflight operation,
820 // when buffer overflow error is expected.
821 IcuTestErrorCode
errorCode(*this, "TestBufferOverflow");
822 LocalUCaseMapPointer
csm(ucasemap_open("en", 0, errorCode
));
823 if (errorCode
.isFailure()) {
824 errln("ucasemap_open(English) failed - %s", errorCode
.errorName());
828 UnicodeString
data("hello world");
830 #if !UCONFIG_NO_BREAK_ITERATION
831 result
= ucasemap_toTitle(csm
.getAlias(), NULL
, 0, data
.getBuffer(), data
.length(), errorCode
);
832 if (errorCode
.get() != U_BUFFER_OVERFLOW_ERROR
|| result
!= data
.length()) {
833 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
834 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
835 __FILE__
, __LINE__
, data
.length(), errorCode
.errorName(), result
);
840 std::string data_utf8
;
841 data
.toUTF8String(data_utf8
);
842 #if !UCONFIG_NO_BREAK_ITERATION
843 result
= ucasemap_utf8ToTitle(csm
.getAlias(), NULL
, 0, data_utf8
.c_str(), data_utf8
.length(), errorCode
);
844 if (errorCode
.get() != U_BUFFER_OVERFLOW_ERROR
|| result
!= (int32_t)data_utf8
.length()) {
845 errln("%s:%d ucasemap_toTitle(\"hello world\") failed: "
846 "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)",
847 __FILE__
, __LINE__
, data_utf8
.length(), errorCode
.errorName(), result
);
853 void StringCaseTest::checkEditsIter(
854 const UnicodeString
&name
,
855 Edits::Iterator ei1
, Edits::Iterator ei2
, // two equal iterators
856 const EditChange expected
[], int32_t expLength
, UBool withUnchanged
,
857 UErrorCode
&errorCode
) {
858 assertFalse(name
, ei2
.findSourceIndex(-1, errorCode
));
860 int32_t expSrcIndex
= 0;
861 int32_t expDestIndex
= 0;
862 int32_t expReplIndex
= 0;
863 for (int32_t expIndex
= 0; expIndex
< expLength
; ++expIndex
) {
864 const EditChange
&expect
= expected
[expIndex
];
865 UnicodeString msg
= UnicodeString(name
).append(u
' ') + expIndex
;
866 if (withUnchanged
|| expect
.change
) {
867 assertTrue(msg
, ei1
.next(errorCode
));
868 assertEquals(msg
, expect
.change
, ei1
.hasChange());
869 assertEquals(msg
, expect
.oldLength
, ei1
.oldLength());
870 assertEquals(msg
, expect
.newLength
, ei1
.newLength());
871 assertEquals(msg
, expSrcIndex
, ei1
.sourceIndex());
872 assertEquals(msg
, expDestIndex
, ei1
.destinationIndex());
873 assertEquals(msg
, expReplIndex
, ei1
.replacementIndex());
876 if (expect
.oldLength
> 0) {
877 assertTrue(msg
, ei2
.findSourceIndex(expSrcIndex
, errorCode
));
878 assertEquals(msg
, expect
.change
, ei2
.hasChange());
879 assertEquals(msg
, expect
.oldLength
, ei2
.oldLength());
880 assertEquals(msg
, expect
.newLength
, ei2
.newLength());
881 assertEquals(msg
, expSrcIndex
, ei2
.sourceIndex());
882 assertEquals(msg
, expDestIndex
, ei2
.destinationIndex());
883 assertEquals(msg
, expReplIndex
, ei2
.replacementIndex());
884 if (!withUnchanged
) {
885 // For some iterators, move past the current range
886 // so that findSourceIndex() has to look before the current index.
892 expSrcIndex
+= expect
.oldLength
;
893 expDestIndex
+= expect
.newLength
;
895 expReplIndex
+= expect
.newLength
;
898 // TODO: remove casts from u"" when merging into trunk
899 UnicodeString msg
= UnicodeString(name
).append(u
" end");
900 assertFalse(msg
, ei1
.next(errorCode
));
901 assertFalse(msg
, ei1
.hasChange());
902 assertEquals(msg
, 0, ei1
.oldLength());
903 assertEquals(msg
, 0, ei1
.newLength());
904 assertEquals(msg
, expSrcIndex
, ei1
.sourceIndex());
905 assertEquals(msg
, expDestIndex
, ei1
.destinationIndex());
906 assertEquals(msg
, expReplIndex
, ei1
.replacementIndex());
908 assertFalse(name
, ei2
.findSourceIndex(expSrcIndex
, errorCode
));
911 void StringCaseTest::TestEdits() {
912 IcuTestErrorCode
errorCode(*this, "TestEdits");
914 assertFalse("new Edits", edits
.hasChanges());
915 assertEquals("new Edits", 0, edits
.lengthDelta());
916 edits
.addUnchanged(1); // multiple unchanged ranges are combined
917 edits
.addUnchanged(10000); // too long, and they are split
918 edits
.addReplace(0, 0);
919 edits
.addUnchanged(2);
920 assertFalse("unchanged 10003", edits
.hasChanges());
921 assertEquals("unchanged 10003", 0, edits
.lengthDelta());
922 edits
.addReplace(1, 1); // multiple short equal-length edits are compressed
923 edits
.addUnchanged(0);
924 edits
.addReplace(1, 1);
925 edits
.addReplace(1, 1);
926 edits
.addReplace(0, 10);
927 edits
.addReplace(100, 0);
928 edits
.addReplace(3000, 4000); // variable-length encoding
929 edits
.addReplace(100000, 100000);
930 assertTrue("some edits", edits
.hasChanges());
931 assertEquals("some edits", 10 - 100 + 1000, edits
.lengthDelta());
932 UErrorCode outErrorCode
= U_ZERO_ERROR
;
933 assertFalse("edits done: copyErrorTo", edits
.copyErrorTo(outErrorCode
));
935 static const EditChange coarseExpectedChanges
[] = {
936 { FALSE
, 10003, 10003 },
937 { TRUE
, 103103, 104013 }
939 checkEditsIter(u
"coarse",
940 edits
.getCoarseIterator(), edits
.getCoarseIterator(),
941 coarseExpectedChanges
, UPRV_LENGTHOF(coarseExpectedChanges
), TRUE
, errorCode
);
942 checkEditsIter(u
"coarse changes",
943 edits
.getCoarseChangesIterator(), edits
.getCoarseChangesIterator(),
944 coarseExpectedChanges
, UPRV_LENGTHOF(coarseExpectedChanges
), FALSE
, errorCode
);
946 static const EditChange fineExpectedChanges
[] = {
947 { FALSE
, 10003, 10003 },
953 { TRUE
, 3000, 4000 },
954 { TRUE
, 100000, 100000 }
956 checkEditsIter(u
"fine",
957 edits
.getFineIterator(), edits
.getFineIterator(),
958 fineExpectedChanges
, UPRV_LENGTHOF(fineExpectedChanges
), TRUE
, errorCode
);
959 checkEditsIter(u
"fine changes",
960 edits
.getFineChangesIterator(), edits
.getFineChangesIterator(),
961 fineExpectedChanges
, UPRV_LENGTHOF(fineExpectedChanges
), FALSE
, errorCode
);
964 assertFalse("reset", edits
.hasChanges());
965 assertEquals("reset", 0, edits
.lengthDelta());
966 Edits::Iterator ei
= edits
.getCoarseChangesIterator();
967 assertFalse("reset then iterator", ei
.next(errorCode
));
970 void StringCaseTest::TestCaseMapWithEdits() {
971 IcuTestErrorCode
errorCode(*this, "TestEdits");
975 int32_t length
= CaseMap::toLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT
,
976 u
"IstanBul", 8, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
977 assertEquals(u
"toLower(IstanBul)", UnicodeString(u
"ıb"), UnicodeString(TRUE
, dest
, length
));
978 static const EditChange lowerExpectedChanges
[] = {
984 checkEditsIter(u
"toLower(IstanBul)",
985 edits
.getFineIterator(), edits
.getFineIterator(),
986 lowerExpectedChanges
, UPRV_LENGTHOF(lowerExpectedChanges
),
990 length
= CaseMap::toUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT
,
991 u
"Πατάτα", 6, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
992 assertEquals(u
"toUpper(Πατάτα)", UnicodeString(u
"ΑΤΑΤΑ"), UnicodeString(TRUE
, dest
, length
));
993 static const EditChange upperExpectedChanges
[] = {
1001 checkEditsIter(u
"toUpper(Πατάτα)",
1002 edits
.getFineIterator(), edits
.getFineIterator(),
1003 upperExpectedChanges
, UPRV_LENGTHOF(upperExpectedChanges
),
1008 #if !UCONFIG_NO_BREAK_ITERATION
1009 length
= CaseMap::toTitle("nl",
1010 UCASEMAP_OMIT_UNCHANGED_TEXT
|
1011 U_TITLECASE_NO_BREAK_ADJUSTMENT
|
1012 U_TITLECASE_NO_LOWERCASE
,
1013 NULL
, u
"IjssEL IglOo", 12,
1014 dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1015 assertEquals(u
"toTitle(IjssEL IglOo)", UnicodeString(u
"J"), UnicodeString(TRUE
, dest
, length
));
1016 static const EditChange titleExpectedChanges
[] = {
1021 checkEditsIter(u
"toTitle(IjssEL IglOo)",
1022 edits
.getFineIterator(), edits
.getFineIterator(),
1023 titleExpectedChanges
, UPRV_LENGTHOF(titleExpectedChanges
),
1028 length
= CaseMap::fold(UCASEMAP_OMIT_UNCHANGED_TEXT
| U_FOLD_CASE_EXCLUDE_SPECIAL_I
,
1029 u
"IßtanBul", 8, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1030 assertEquals(u
"foldCase(IßtanBul)", UnicodeString(u
"ıssb"), UnicodeString(TRUE
, dest
, length
));
1031 static const EditChange foldExpectedChanges
[] = {
1038 checkEditsIter(u
"foldCase(IßtanBul)",
1039 edits
.getFineIterator(), edits
.getFineIterator(),
1040 foldExpectedChanges
, UPRV_LENGTHOF(foldExpectedChanges
),
1044 void StringCaseTest::TestCaseMapUTF8WithEdits() {
1045 IcuTestErrorCode
errorCode(*this, "TestEdits");
1049 int32_t length
= CaseMap::utf8ToLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT
,
1050 u8
"IstanBul", 8, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1051 assertEquals(u
"toLower(IstanBul)", UnicodeString(u
"ıb"),
1052 UnicodeString::fromUTF8(StringPiece(dest
, length
)));
1053 static const EditChange lowerExpectedChanges
[] = {
1059 checkEditsIter(u
"toLower(IstanBul)",
1060 edits
.getFineIterator(), edits
.getFineIterator(),
1061 lowerExpectedChanges
, UPRV_LENGTHOF(lowerExpectedChanges
),
1065 length
= CaseMap::utf8ToUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT
,
1066 u8
"Πατάτα", 6 * 2, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1067 assertEquals(u
"toUpper(Πατάτα)", UnicodeString(u
"ΑΤΑΤΑ"),
1068 UnicodeString::fromUTF8(StringPiece(dest
, length
)));
1069 static const EditChange upperExpectedChanges
[] = {
1077 checkEditsIter(u
"toUpper(Πατάτα)",
1078 edits
.getFineIterator(), edits
.getFineIterator(),
1079 upperExpectedChanges
, UPRV_LENGTHOF(upperExpectedChanges
),
1083 #if !UCONFIG_NO_BREAK_ITERATION
1084 length
= CaseMap::utf8ToTitle("nl",
1085 UCASEMAP_OMIT_UNCHANGED_TEXT
|
1086 U_TITLECASE_NO_BREAK_ADJUSTMENT
|
1087 U_TITLECASE_NO_LOWERCASE
,
1088 NULL
, u8
"IjssEL IglOo", 12,
1089 dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1090 assertEquals(u
"toTitle(IjssEL IglOo)", UnicodeString(u
"J"),
1091 UnicodeString::fromUTF8(StringPiece(dest
, length
)));
1092 static const EditChange titleExpectedChanges
[] = {
1097 checkEditsIter(u
"toTitle(IjssEL IglOo)",
1098 edits
.getFineIterator(), edits
.getFineIterator(),
1099 titleExpectedChanges
, UPRV_LENGTHOF(titleExpectedChanges
),
1104 length
= CaseMap::utf8Fold(UCASEMAP_OMIT_UNCHANGED_TEXT
| U_FOLD_CASE_EXCLUDE_SPECIAL_I
,
1105 u8
"IßtanBul", 1 + 2 + 6, dest
, UPRV_LENGTHOF(dest
), &edits
, errorCode
);
1106 assertEquals(u
"foldCase(IßtanBul)", UnicodeString(u
"ıssb"),
1107 UnicodeString::fromUTF8(StringPiece(dest
, length
)));
1108 static const EditChange foldExpectedChanges
[] = {
1115 checkEditsIter(u
"foldCase(IßtanBul)",
1116 edits
.getFineIterator(), edits
.getFineIterator(),
1117 foldExpectedChanges
, UPRV_LENGTHOF(foldExpectedChanges
),
1121 void StringCaseTest::TestLongUnicodeString() {
1122 // Code coverage for UnicodeString case mapping code handling
1123 // long strings or many changes in a string.
1124 UnicodeString
s(TRUE
,
1126 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1127 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1128 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1129 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1130 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF"
1131 u
"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51);
1132 UnicodeString
expected(TRUE
,
1134 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1135 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1136 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1137 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1138 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF"
1139 u
"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51);
1140 s
.toUpper(Locale::getRoot());
1141 assertEquals("string length 306", expected
, s
);
1144 void StringCaseTest::TestBug13127() {
1145 // Test case crashed when the bug was present.
1146 const char16_t *s16
= u
"日本語";
1147 UnicodeString
s(TRUE
, s16
, -1);
1148 s
.toTitle(0, Locale::getEnglish());