2 *******************************************************************************
4 * Copyright (C) 2002-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: strcase.cpp
10 * tab size: 8 (not used)
13 * created on: 2002mar12
14 * created by: Markus W. Scherer
16 * Test file for string casing C++ API functions.
19 #include "unicode/uchar.h"
20 #include "unicode/ures.h"
21 #include "unicode/uloc.h"
22 #include "unicode/locid.h"
23 #include "unicode/ubrk.h"
24 #include "unicode/unistr.h"
25 #include "unicode/ucasemap.h"
28 #include "unicode/tstdtmod.h"
31 StringCaseTest::~StringCaseTest() {}
34 StringCaseTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
36 logln("TestSuite StringCaseTest: ");
39 TESTCASE_AUTO(TestCaseConversion
);
40 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
41 TESTCASE_AUTO(TestCasing
);
43 TESTCASE_AUTO(TestFullCaseFoldingIterator
);
48 StringCaseTest::TestCaseConversion()
50 static const UChar uppercaseGreek
[] =
51 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
55 static const UChar lowercaseGreek
[] =
56 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
60 static const UChar lowercaseTurkish
[] =
61 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
62 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
64 static const UChar uppercaseTurkish
[] =
65 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
66 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
68 UnicodeString expectedResult
;
71 test3
+= (UChar32
)0x0130;
72 test3
+= "STANBUL, NOT CONSTANTINOPLE!";
74 UnicodeString
test4(test3
);
75 test4
.toLower(Locale(""));
76 expectedResult
= UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
77 if (test4
!= expectedResult
)
78 errln("1. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
81 test4
.toLower(Locale("tr", "TR"));
82 expectedResult
= lowercaseTurkish
;
83 if (test4
!= expectedResult
)
84 errln("2. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
87 test3
+= (UChar32
)0x0131;
88 test3
+= " palace, istanbul";
91 test4
.toUpper(Locale(""));
92 expectedResult
= "TOPKAPI PALACE, ISTANBUL";
93 if (test4
!= expectedResult
)
94 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
97 test4
.toUpper(Locale("tr", "TR"));
98 expectedResult
= uppercaseTurkish
;
99 if (test4
!= expectedResult
)
100 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
102 test3
= CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
104 test3
.toUpper(Locale("de", "DE"));
105 expectedResult
= CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
106 if (test3
!= expectedResult
)
107 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test3
+ "\".");
109 test4
.replace(0, test4
.length(), uppercaseGreek
);
111 test4
.toLower(Locale("el", "GR"));
112 expectedResult
= lowercaseGreek
;
113 if (test4
!= expectedResult
)
114 errln("toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
116 test4
.replace(0, test4
.length(), lowercaseGreek
);
119 expectedResult
= uppercaseGreek
;
120 if (test4
!= expectedResult
)
121 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
123 // more string case mapping tests with the new implementation
127 beforeLower
[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
128 lowerRoot
[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
129 lowerTurkish
[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
131 beforeUpper
[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
132 upperRoot
[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
133 upperTurkish
[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
135 beforeMiniUpper
[]= { 0xdf, 0x61 },
136 miniUpper
[]= { 0x53, 0x53, 0x41 };
140 /* lowercase with root locale */
141 s
=UnicodeString(FALSE
, beforeLower
, UPRV_LENGTHOF(beforeLower
));
143 if( s
.length()!=UPRV_LENGTHOF(lowerRoot
) ||
144 s
!=UnicodeString(FALSE
, lowerRoot
, s
.length())
146 errln("error in toLower(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerRoot
, UPRV_LENGTHOF(lowerRoot
)) + "\"");
149 /* lowercase with turkish locale */
150 s
=UnicodeString(FALSE
, beforeLower
, UPRV_LENGTHOF(beforeLower
));
151 s
.setCharAt(0, beforeLower
[0]).toLower(Locale("tr"));
152 if( s
.length()!=UPRV_LENGTHOF(lowerTurkish
) ||
153 s
!=UnicodeString(FALSE
, lowerTurkish
, s
.length())
155 errln("error in toLower(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerTurkish
, UPRV_LENGTHOF(lowerTurkish
)) + "\"");
158 /* uppercase with root locale */
159 s
=UnicodeString(FALSE
, beforeUpper
, UPRV_LENGTHOF(beforeUpper
));
160 s
.setCharAt(0, beforeUpper
[0]).toUpper(Locale(""));
161 if( s
.length()!=UPRV_LENGTHOF(upperRoot
) ||
162 s
!=UnicodeString(FALSE
, upperRoot
, s
.length())
164 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperRoot
, UPRV_LENGTHOF(upperRoot
)) + "\"");
167 /* uppercase with turkish locale */
168 s
=UnicodeString(FALSE
, beforeUpper
, UPRV_LENGTHOF(beforeUpper
));
169 s
.toUpper(Locale("tr"));
170 if( s
.length()!=UPRV_LENGTHOF(upperTurkish
) ||
171 s
!=UnicodeString(FALSE
, upperTurkish
, s
.length())
173 errln("error in toUpper(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperTurkish
, UPRV_LENGTHOF(upperTurkish
)) + "\"");
176 /* uppercase a short string with root locale */
177 s
=UnicodeString(FALSE
, beforeMiniUpper
, UPRV_LENGTHOF(beforeMiniUpper
));
178 s
.setCharAt(0, beforeMiniUpper
[0]).toUpper("");
179 if( s
.length()!=UPRV_LENGTHOF(miniUpper
) ||
180 s
!=UnicodeString(FALSE
, miniUpper
, s
.length())
182 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, miniUpper
, UPRV_LENGTHOF(miniUpper
)) + "\"");
186 // test some supplementary characters (>= Unicode 3.1)
191 deseretInput
=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
192 deseretLower
=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
193 deseretUpper
=UnicodeString("\\U00010414\\U00010414", "").unescape();
194 (t
=deseretInput
).toLower();
195 if(t
!=deseretLower
) {
196 errln("error lowercasing Deseret (plane 1) characters");
198 (t
=deseretInput
).toUpper();
199 if(t
!=deseretUpper
) {
200 errln("error uppercasing Deseret (plane 1) characters");
204 // test some more cases that looked like problems
209 ljInput
=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
210 ljLower
=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
211 ljUpper
=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
212 (t
=ljInput
).toLower("en");
214 errln("error lowercasing LJ characters");
216 (t
=ljInput
).toUpper("en");
218 errln("error uppercasing LJ characters");
222 #if !UCONFIG_NO_NORMALIZATION
223 // some context-sensitive casing depends on normalization data being present
225 // Unicode 3.1.1 SpecialCasing tests
229 // sigmas preceded and/or followed by cased letters
231 sigmas
=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
232 sigmasLower
=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
233 sigmasUpper
=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
235 (t
=sigmas
).toLower();
237 errln("error in sigmas.toLower()=\"" + t
+ "\" expected \"" + sigmasLower
+ "\"");
240 (t
=sigmas
).toUpper(Locale(""));
242 errln("error in sigmas.toUpper()=\"" + t
+ "\" expected \"" + sigmasUpper
+ "\"");
245 // turkish & azerbaijani dotless i & dotted I
246 // remove dot above if there was a capital I before and there are no more accents above
248 dots
=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
249 dotsTurkish
=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
250 dotsDefault
=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
252 (t
=dots
).toLower("tr");
254 errln("error in dots.toLower(tr)=\"" + t
+ "\" expected \"" + dotsTurkish
+ "\"");
257 (t
=dots
).toLower("de");
259 errln("error in dots.toLower(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
263 // more Unicode 3.1.1 tests
267 // lithuanian dot above in uppercasing
269 dots
=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
270 dotsLithuanian
=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
271 dotsDefault
=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
273 (t
=dots
).toUpper("lt");
274 if(t
!=dotsLithuanian
) {
275 errln("error in dots.toUpper(lt)=\"" + t
+ "\" expected \"" + dotsLithuanian
+ "\"");
278 (t
=dots
).toUpper("de");
280 errln("error in dots.toUpper(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
283 // lithuanian adds dot above to i in lowercasing if there are more above accents
285 i
=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
286 iLithuanian
=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
287 iDefault
=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
291 errln("error in i.toLower(lt)=\"" + t
+ "\" expected \"" + iLithuanian
+ "\"");
296 errln("error in i.toLower(de)=\"" + t
+ "\" expected \"" + iDefault
+ "\"");
305 s
=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
306 f
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
307 g
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
312 errln("error in foldCase(\"" + s
+ "\", default)=\"" + t
+ "\" but expected \"" + f
+ "\"");
315 // alternate handling for dotted I/dotless i (U+0130, U+0131)
316 (t
=s
).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I
);
318 errln("error in foldCase(\"" + s
+ "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t
+ "\" but expected \"" + g
+ "\"");
323 // data-driven case mapping tests ------------------------------------------ ***
333 // names of TestData children in casing.txt
334 static const char *const dataNames
[TEST_COUNT
+1]={
343 StringCaseTest::TestCasingImpl(const UnicodeString
&input
,
344 const UnicodeString
&output
,
346 void *iter
, const char *localeID
, uint32_t options
) {
348 UnicodeString result
;
350 Locale
locale(localeID
);
356 result
.toLower(locale
);
360 result
.toUpper(locale
);
362 #if !UCONFIG_NO_BREAK_ITERATION
365 result
.toTitle((BreakIterator
*)iter
, locale
, options
);
370 result
.foldCase(options
);
374 break; // won't happen
377 dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name
);
379 #if !UCONFIG_NO_BREAK_ITERATION
380 if(whichCase
==TEST_TITLE
&& options
==0) {
382 result
.toTitle((BreakIterator
*)iter
, locale
);
384 dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
390 char utf8In
[100], utf8Out
[100];
391 int32_t utf8InLength
, utf8OutLength
, resultLength
;
394 IcuTestErrorCode
errorCode(*this, "TestCasingImpl");
395 LocalUCaseMapPointer
csm(ucasemap_open(localeID
, options
, errorCode
));
396 #if !UCONFIG_NO_BREAK_ITERATION
398 // Clone the break iterator so that the UCaseMap can safely adopt it.
399 UBreakIterator
*clone
=ubrk_safeClone((UBreakIterator
*)iter
, NULL
, NULL
, errorCode
);
400 ucasemap_setBreakIterator(csm
.getAlias(), clone
, errorCode
);
404 u_strToUTF8(utf8In
, (int32_t)sizeof(utf8In
), &utf8InLength
, input
.getBuffer(), input
.length(), errorCode
);
407 name
="ucasemap_utf8ToLower";
408 utf8OutLength
=ucasemap_utf8ToLower(csm
.getAlias(),
409 utf8Out
, (int32_t)sizeof(utf8Out
),
410 utf8In
, utf8InLength
, errorCode
);
413 name
="ucasemap_utf8ToUpper";
414 utf8OutLength
=ucasemap_utf8ToUpper(csm
.getAlias(),
415 utf8Out
, (int32_t)sizeof(utf8Out
),
416 utf8In
, utf8InLength
, errorCode
);
418 #if !UCONFIG_NO_BREAK_ITERATION
420 name
="ucasemap_utf8ToTitle";
421 utf8OutLength
=ucasemap_utf8ToTitle(csm
.getAlias(),
422 utf8Out
, (int32_t)sizeof(utf8Out
),
423 utf8In
, utf8InLength
, errorCode
);
427 name
="ucasemap_utf8FoldCase";
428 utf8OutLength
=ucasemap_utf8FoldCase(csm
.getAlias(),
429 utf8Out
, (int32_t)sizeof(utf8Out
),
430 utf8In
, utf8InLength
, errorCode
);
435 break; // won't happen
437 buffer
=result
.getBuffer(utf8OutLength
);
438 u_strFromUTF8(buffer
, result
.getCapacity(), &resultLength
, utf8Out
, utf8OutLength
, errorCode
);
439 result
.releaseBuffer(errorCode
.isSuccess() ? resultLength
: 0);
441 if(errorCode
.isFailure()) {
442 errcheckln(errorCode
, "error: %s() got an error for a test case from casing.res - %s", name
, u_errorName(errorCode
));
444 } else if(result
!=output
) {
445 errln("error: %s() got a wrong result for a test case from casing.res", name
);
446 errln("expected \"" + output
+ "\" got \"" + result
+ "\"" );
451 StringCaseTest::TestCasing() {
452 UErrorCode status
= U_ZERO_ERROR
;
453 #if !UCONFIG_NO_BREAK_ITERATION
454 LocalUBreakIteratorPointer iter
;
457 UnicodeString locale
, input
, output
, optionsString
, result
;
459 int32_t whichCase
, type
;
460 LocalPointer
<TestDataModule
> driver(TestDataModule::getTestDataModule("casing", *this, status
));
461 if(U_SUCCESS(status
)) {
462 for(whichCase
=0; whichCase
<TEST_COUNT
; ++whichCase
) {
463 #if UCONFIG_NO_BREAK_ITERATION
464 if(whichCase
==TEST_TITLE
) {
468 LocalPointer
<TestData
> casingTest(driver
->createTestData(dataNames
[whichCase
], status
));
469 if(U_FAILURE(status
)) {
470 errln("TestCasing failed to createTestData(%s) - %s", dataNames
[whichCase
], u_errorName(status
));
473 const DataMap
*myCase
= NULL
;
474 while(casingTest
->nextCase(myCase
, status
)) {
475 input
= myCase
->getString("Input", status
);
476 output
= myCase
->getString("Output", status
);
478 if(whichCase
!=TEST_FOLD
) {
479 locale
= myCase
->getString("Locale", status
);
481 locale
.extract(0, 0x7fffffff, cLocaleID
, sizeof(cLocaleID
), "");
483 #if !UCONFIG_NO_BREAK_ITERATION
484 if(whichCase
==TEST_TITLE
) {
485 type
= myCase
->getInt("Type", status
);
487 iter
.adoptInstead(ubrk_open((UBreakIteratorType
)type
, cLocaleID
, NULL
, 0, &status
));
488 } else if(type
==-2) {
489 // Open a trivial break iterator that only delivers { 0, length }
490 // or even just { 0 } as boundaries.
491 static const UChar rules
[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
492 UParseError parseError
;
493 iter
.adoptInstead(ubrk_openRules(rules
, UPRV_LENGTHOF(rules
), NULL
, 0, &parseError
, &status
));
498 if(whichCase
==TEST_TITLE
|| whichCase
==TEST_FOLD
) {
499 optionsString
= myCase
->getString("Options", status
);
500 if(optionsString
.indexOf((UChar
)0x54)>=0) { // T
501 options
|=U_FOLD_CASE_EXCLUDE_SPECIAL_I
;
503 if(optionsString
.indexOf((UChar
)0x4c)>=0) { // L
504 options
|=U_TITLECASE_NO_LOWERCASE
;
506 if(optionsString
.indexOf((UChar
)0x41)>=0) { // A
507 options
|=U_TITLECASE_NO_BREAK_ADJUSTMENT
;
511 if(U_FAILURE(status
)) {
512 dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames
[whichCase
], u_errorName(status
));
513 status
= U_ZERO_ERROR
;
515 #if UCONFIG_NO_BREAK_ITERATION
516 LocalPointer
<UMemory
> iter
;
518 TestCasingImpl(input
, output
, whichCase
, iter
.getAlias(), cLocaleID
, options
);
521 #if !UCONFIG_NO_BREAK_ITERATION
522 iter
.adoptInstead(NULL
);
528 #if !UCONFIG_NO_BREAK_ITERATION
529 // more tests for API coverage
531 input
=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
532 (result
=input
).toTitle(NULL
);
533 if(result
!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
534 dataerrln("UnicodeString::toTitle(NULL) failed.");
540 StringCaseTest::TestFullCaseFoldingIterator() {
541 UnicodeString ffi
=UNICODE_STRING_SIMPLE("ffi");
542 UnicodeString ss
=UNICODE_STRING_SIMPLE("ss");
543 FullCaseFoldingIterator iter
;
545 int32_t countSpecific
=0;
548 while((c
=iter
.next(full
))>=0) {
550 // Check that the full Case_Folding has more than 1 code point.
551 if(!full
.hasMoreChar32Than(0, 0x7fffffff, 1)) {
552 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c
);
555 // Check that full == Case_Folding(c).
559 errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c
);
562 // Spot-check a couple of specific cases.
563 if((full
==ffi
&& c
==0xfb03) || (full
==ss
&& (c
==0xdf || c
==0x1e9e))) {
567 if(countSpecific
!=3) {
568 errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases");
571 errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count
);