2 *******************************************************************************
4 * Copyright (C) 2002-2009, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: strcase.cpp
10 * tab size: 8 (not used)
13 * created on: 2002mar12
14 * created by: Markus W. Scherer
16 * Test file for string casing C++ API functions.
19 #include "unicode/uchar.h"
20 #include "unicode/ures.h"
21 #include "unicode/uloc.h"
22 #include "unicode/locid.h"
23 #include "unicode/ubrk.h"
24 #include "unicode/unistr.h"
25 #include "unicode/ucasemap.h"
27 #include "unicode/tstdtmod.h"
29 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
31 StringCaseTest::~StringCaseTest() {}
34 StringCaseTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
35 if (exec
) logln("TestSuite StringCaseTest: ");
37 case 0: name
= "TestCaseConversion"; if (exec
) TestCaseConversion(); break;
39 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
41 if(exec
) TestCasing();
47 default: name
= ""; break; //needed to end loop
52 StringCaseTest::TestCaseConversion()
54 static const UChar uppercaseGreek
[] =
55 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
59 static const UChar lowercaseGreek
[] =
60 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
64 static const UChar lowercaseTurkish
[] =
65 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
66 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
68 static const UChar uppercaseTurkish
[] =
69 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
70 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
72 UnicodeString expectedResult
;
75 test3
+= (UChar32
)0x0130;
76 test3
+= "STANBUL, NOT CONSTANTINOPLE!";
78 UnicodeString
test4(test3
);
79 test4
.toLower(Locale(""));
80 expectedResult
= UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
81 if (test4
!= expectedResult
)
82 errln("1. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
85 test4
.toLower(Locale("tr", "TR"));
86 expectedResult
= lowercaseTurkish
;
87 if (test4
!= expectedResult
)
88 errln("2. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
91 test3
+= (UChar32
)0x0131;
92 test3
+= " palace, istanbul";
95 test4
.toUpper(Locale(""));
96 expectedResult
= "TOPKAPI PALACE, ISTANBUL";
97 if (test4
!= expectedResult
)
98 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
101 test4
.toUpper(Locale("tr", "TR"));
102 expectedResult
= uppercaseTurkish
;
103 if (test4
!= expectedResult
)
104 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
106 test3
= CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
108 test3
.toUpper(Locale("de", "DE"));
109 expectedResult
= CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
110 if (test3
!= expectedResult
)
111 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test3
+ "\".");
113 test4
.replace(0, test4
.length(), uppercaseGreek
);
115 test4
.toLower(Locale("el", "GR"));
116 expectedResult
= lowercaseGreek
;
117 if (test4
!= expectedResult
)
118 errln("toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
120 test4
.replace(0, test4
.length(), lowercaseGreek
);
123 expectedResult
= uppercaseGreek
;
124 if (test4
!= expectedResult
)
125 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
127 // more string case mapping tests with the new implementation
131 beforeLower
[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
132 lowerRoot
[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
133 lowerTurkish
[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
135 beforeUpper
[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
136 upperRoot
[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
137 upperTurkish
[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
139 beforeMiniUpper
[]= { 0xdf, 0x61 },
140 miniUpper
[]= { 0x53, 0x53, 0x41 };
144 /* lowercase with root locale */
145 s
=UnicodeString(FALSE
, beforeLower
, (int32_t)(sizeof(beforeLower
)/U_SIZEOF_UCHAR
));
147 if( s
.length()!=(sizeof(lowerRoot
)/U_SIZEOF_UCHAR
) ||
148 s
!=UnicodeString(FALSE
, lowerRoot
, s
.length())
150 errln("error in toLower(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerRoot
, (int32_t)(sizeof(lowerRoot
)/U_SIZEOF_UCHAR
)) + "\"");
153 /* lowercase with turkish locale */
154 s
=UnicodeString(FALSE
, beforeLower
, (int32_t)(sizeof(beforeLower
)/U_SIZEOF_UCHAR
));
155 s
.setCharAt(0, beforeLower
[0]).toLower(Locale("tr"));
156 if( s
.length()!=(sizeof(lowerTurkish
)/U_SIZEOF_UCHAR
) ||
157 s
!=UnicodeString(FALSE
, lowerTurkish
, s
.length())
159 errln("error in toLower(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerTurkish
, (int32_t)(sizeof(lowerTurkish
)/U_SIZEOF_UCHAR
)) + "\"");
162 /* uppercase with root locale */
163 s
=UnicodeString(FALSE
, beforeUpper
, (int32_t)(sizeof(beforeUpper
)/U_SIZEOF_UCHAR
));
164 s
.setCharAt(0, beforeUpper
[0]).toUpper(Locale(""));
165 if( s
.length()!=(sizeof(upperRoot
)/U_SIZEOF_UCHAR
) ||
166 s
!=UnicodeString(FALSE
, upperRoot
, s
.length())
168 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperRoot
, (int32_t)(sizeof(upperRoot
)/U_SIZEOF_UCHAR
)) + "\"");
171 /* uppercase with turkish locale */
172 s
=UnicodeString(FALSE
, beforeUpper
, (int32_t)(sizeof(beforeUpper
)/U_SIZEOF_UCHAR
));
173 s
.toUpper(Locale("tr"));
174 if( s
.length()!=(sizeof(upperTurkish
)/U_SIZEOF_UCHAR
) ||
175 s
!=UnicodeString(FALSE
, upperTurkish
, s
.length())
177 errln("error in toUpper(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperTurkish
, (int32_t)(sizeof(upperTurkish
)/U_SIZEOF_UCHAR
)) + "\"");
180 /* uppercase a short string with root locale */
181 s
=UnicodeString(FALSE
, beforeMiniUpper
, (int32_t)(sizeof(beforeMiniUpper
)/U_SIZEOF_UCHAR
));
182 s
.setCharAt(0, beforeMiniUpper
[0]).toUpper("");
183 if( s
.length()!=(sizeof(miniUpper
)/U_SIZEOF_UCHAR
) ||
184 s
!=UnicodeString(FALSE
, miniUpper
, s
.length())
186 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, miniUpper
, (int32_t)(sizeof(miniUpper
)/U_SIZEOF_UCHAR
)) + "\"");
190 // test some supplementary characters (>= Unicode 3.1)
195 deseretInput
=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
196 deseretLower
=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
197 deseretUpper
=UnicodeString("\\U00010414\\U00010414", "").unescape();
198 (t
=deseretInput
).toLower();
199 if(t
!=deseretLower
) {
200 errln("error lowercasing Deseret (plane 1) characters");
202 (t
=deseretInput
).toUpper();
203 if(t
!=deseretUpper
) {
204 errln("error uppercasing Deseret (plane 1) characters");
208 // test some more cases that looked like problems
213 ljInput
=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
214 ljLower
=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
215 ljUpper
=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
216 (t
=ljInput
).toLower("en");
218 errln("error lowercasing LJ characters");
220 (t
=ljInput
).toUpper("en");
222 errln("error uppercasing LJ characters");
226 #if !UCONFIG_NO_NORMALIZATION
227 // some context-sensitive casing depends on normalization data being present
229 // Unicode 3.1.1 SpecialCasing tests
233 // sigmas preceded and/or followed by cased letters
235 sigmas
=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
236 sigmasLower
=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
237 sigmasUpper
=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
239 (t
=sigmas
).toLower();
241 errln("error in sigmas.toLower()=\"" + t
+ "\" expected \"" + sigmasLower
+ "\"");
244 (t
=sigmas
).toUpper(Locale(""));
246 errln("error in sigmas.toUpper()=\"" + t
+ "\" expected \"" + sigmasUpper
+ "\"");
249 // turkish & azerbaijani dotless i & dotted I
250 // remove dot above if there was a capital I before and there are no more accents above
252 dots
=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
253 dotsTurkish
=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
254 dotsDefault
=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
256 (t
=dots
).toLower("tr");
258 errln("error in dots.toLower(tr)=\"" + t
+ "\" expected \"" + dotsTurkish
+ "\"");
261 (t
=dots
).toLower("de");
263 errln("error in dots.toLower(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
267 // more Unicode 3.1.1 tests
271 // lithuanian dot above in uppercasing
273 dots
=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
274 dotsLithuanian
=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
275 dotsDefault
=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
277 (t
=dots
).toUpper("lt");
278 if(t
!=dotsLithuanian
) {
279 errln("error in dots.toUpper(lt)=\"" + t
+ "\" expected \"" + dotsLithuanian
+ "\"");
282 (t
=dots
).toUpper("de");
284 errln("error in dots.toUpper(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
287 // lithuanian adds dot above to i in lowercasing if there are more above accents
289 i
=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
290 iLithuanian
=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
291 iDefault
=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
295 errln("error in i.toLower(lt)=\"" + t
+ "\" expected \"" + iLithuanian
+ "\"");
300 errln("error in i.toLower(de)=\"" + t
+ "\" expected \"" + iDefault
+ "\"");
309 s
=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
310 f
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
311 g
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
316 errln("error in foldCase(\"" + s
+ "\", default)=\"" + t
+ "\" but expected \"" + f
+ "\"");
319 // alternate handling for dotted I/dotless i (U+0130, U+0131)
320 (t
=s
).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I
);
322 errln("error in foldCase(\"" + s
+ "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t
+ "\" but expected \"" + g
+ "\"");
327 // data-driven case mapping tests ------------------------------------------ ***
337 // names of TestData children in casing.txt
338 static const char *const dataNames
[TEST_COUNT
+1]={
347 StringCaseTest::TestCasingImpl(const UnicodeString
&input
,
348 const UnicodeString
&output
,
350 void *iter
, const char *localeID
, uint32_t options
) {
352 UnicodeString result
;
354 Locale
locale(localeID
);
360 result
.toLower(locale
);
364 result
.toUpper(locale
);
366 #if !UCONFIG_NO_BREAK_ITERATION
369 result
.toTitle((BreakIterator
*)iter
, locale
, options
);
374 result
.foldCase(options
);
378 break; // won't happen
381 dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name
);
383 #if !UCONFIG_NO_BREAK_ITERATION
384 if(whichCase
==TEST_TITLE
&& options
==0) {
386 result
.toTitle((BreakIterator
*)iter
, locale
);
388 dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
394 char utf8In
[100], utf8Out
[100];
395 int32_t utf8InLength
, utf8OutLength
, resultLength
;
398 IcuTestErrorCode
errorCode(*this, "TestCasingImpl");
399 LocalUCaseMapPointer
csm(ucasemap_open(localeID
, options
, errorCode
));
400 #if !UCONFIG_NO_BREAK_ITERATION
402 // Clone the break iterator so that the UCaseMap can safely adopt it.
403 int32_t size
=1; // Not 0 because that only gives preflighting.
404 UBreakIterator
*clone
=ubrk_safeClone((UBreakIterator
*)iter
, NULL
, &size
, errorCode
);
405 ucasemap_setBreakIterator(csm
.getAlias(), clone
, errorCode
);
409 u_strToUTF8(utf8In
, (int32_t)sizeof(utf8In
), &utf8InLength
, input
.getBuffer(), input
.length(), errorCode
);
412 name
="ucasemap_utf8ToLower";
413 utf8OutLength
=ucasemap_utf8ToLower(csm
.getAlias(),
414 utf8Out
, (int32_t)sizeof(utf8Out
),
415 utf8In
, utf8InLength
, errorCode
);
418 name
="ucasemap_utf8ToUpper";
419 utf8OutLength
=ucasemap_utf8ToUpper(csm
.getAlias(),
420 utf8Out
, (int32_t)sizeof(utf8Out
),
421 utf8In
, utf8InLength
, errorCode
);
423 #if !UCONFIG_NO_BREAK_ITERATION
425 name
="ucasemap_utf8ToTitle";
426 utf8OutLength
=ucasemap_utf8ToTitle(csm
.getAlias(),
427 utf8Out
, (int32_t)sizeof(utf8Out
),
428 utf8In
, utf8InLength
, errorCode
);
432 name
="ucasemap_utf8FoldCase";
433 utf8OutLength
=ucasemap_utf8FoldCase(csm
.getAlias(),
434 utf8Out
, (int32_t)sizeof(utf8Out
),
435 utf8In
, utf8InLength
, errorCode
);
440 break; // won't happen
442 buffer
=result
.getBuffer(utf8OutLength
);
443 u_strFromUTF8(buffer
, result
.getCapacity(), &resultLength
, utf8Out
, utf8OutLength
, errorCode
);
444 result
.releaseBuffer(errorCode
.isSuccess() ? resultLength
: 0);
446 if(errorCode
.isFailure()) {
447 errcheckln(errorCode
, "error: %s() got an error for a test case from casing.res - %s", name
, u_errorName(errorCode
));
449 } else if(result
!=output
) {
450 errln("error: %s() got a wrong result for a test case from casing.res", name
);
451 errln("expected \"" + output
+ "\" got \"" + result
+ "\"" );
456 StringCaseTest::TestCasing() {
457 UErrorCode status
= U_ZERO_ERROR
;
458 #if !UCONFIG_NO_BREAK_ITERATION
459 LocalUBreakIteratorPointer iter
;
462 UnicodeString locale
, input
, output
, optionsString
, result
;
464 int32_t whichCase
, type
;
465 LocalPointer
<TestDataModule
> driver(TestDataModule::getTestDataModule("casing", *this, status
));
466 if(U_SUCCESS(status
)) {
467 for(whichCase
=0; whichCase
<TEST_COUNT
; ++whichCase
) {
468 #if UCONFIG_NO_BREAK_ITERATION
469 if(whichCase
==TEST_TITLE
) {
473 LocalPointer
<TestData
> casingTest(driver
->createTestData(dataNames
[whichCase
], status
));
474 if(U_FAILURE(status
)) {
475 errln("TestCasing failed to createTestData(%s) - %s", dataNames
[whichCase
], u_errorName(status
));
478 const DataMap
*myCase
= NULL
;
479 while(casingTest
->nextCase(myCase
, status
)) {
480 input
= myCase
->getString("Input", status
);
481 output
= myCase
->getString("Output", status
);
483 if(whichCase
!=TEST_FOLD
) {
484 locale
= myCase
->getString("Locale", status
);
486 locale
.extract(0, 0x7fffffff, cLocaleID
, sizeof(cLocaleID
), "");
488 #if !UCONFIG_NO_BREAK_ITERATION
489 if(whichCase
==TEST_TITLE
) {
490 type
= myCase
->getInt("Type", status
);
492 iter
.adoptInstead(ubrk_open((UBreakIteratorType
)type
, cLocaleID
, NULL
, 0, &status
));
493 } else if(type
==-2) {
494 // Open a trivial break iterator that only delivers { 0, length }
495 // or even just { 0 } as boundaries.
496 static const UChar rules
[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
497 UParseError parseError
;
498 iter
.adoptInstead(ubrk_openRules(rules
, LENGTHOF(rules
), NULL
, 0, &parseError
, &status
));
503 if(whichCase
==TEST_TITLE
|| whichCase
==TEST_FOLD
) {
504 optionsString
= myCase
->getString("Options", status
);
505 if(optionsString
.indexOf((UChar
)0x54)>=0) { // T
506 options
|=U_FOLD_CASE_EXCLUDE_SPECIAL_I
;
508 if(optionsString
.indexOf((UChar
)0x4c)>=0) { // L
509 options
|=U_TITLECASE_NO_LOWERCASE
;
511 if(optionsString
.indexOf((UChar
)0x41)>=0) { // A
512 options
|=U_TITLECASE_NO_BREAK_ADJUSTMENT
;
516 if(U_FAILURE(status
)) {
517 dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames
[whichCase
], u_errorName(status
));
518 status
= U_ZERO_ERROR
;
520 #if UCONFIG_NO_BREAK_ITERATION
521 LocalPointer
<UMemory
> iter
;
523 TestCasingImpl(input
, output
, whichCase
, iter
.getAlias(), cLocaleID
, options
);
526 #if !UCONFIG_NO_BREAK_ITERATION
527 iter
.adoptInstead(NULL
);
533 #if !UCONFIG_NO_BREAK_ITERATION
534 // more tests for API coverage
536 input
=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
537 (result
=input
).toTitle(NULL
);
538 if(result
!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
539 dataerrln("UnicodeString::toTitle(NULL) failed.");