2 *******************************************************************************
4 * Copyright (C) 2002-2005, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: strcase.cpp
10 * tab size: 8 (not used)
13 * created on: 2002mar12
14 * created by: Markus W. Scherer
16 * Test file for string casing C++ API functions.
19 #include "unicode/uchar.h"
20 #include "unicode/ures.h"
21 #include "unicode/uloc.h"
22 #include "unicode/locid.h"
23 #include "unicode/ubrk.h"
24 #include "unicode/unistr.h"
25 #include "unicode/ucasemap.h"
27 #include "unicode/tstdtmod.h"
29 StringCaseTest::~StringCaseTest() {}
32 StringCaseTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
33 if (exec
) logln("TestSuite StringCaseTest: ");
35 case 0: name
= "TestCaseConversion"; if (exec
) TestCaseConversion(); break;
38 #if !UCONFIG_NO_BREAK_ITERATION
39 if(exec
) TestCasing();
43 default: name
= ""; break; //needed to end loop
48 StringCaseTest::TestCaseConversion()
50 static const UChar uppercaseGreek
[] =
51 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
55 static const UChar lowercaseGreek
[] =
56 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
60 static const UChar lowercaseTurkish
[] =
61 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
62 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
64 static const UChar uppercaseTurkish
[] =
65 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
66 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
68 UnicodeString expectedResult
;
71 test3
+= (UChar32
)0x0130;
72 test3
+= "STANBUL, NOT CONSTANTINOPLE!";
74 UnicodeString
test4(test3
);
75 test4
.toLower(Locale(""));
76 expectedResult
= UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
77 if (test4
!= expectedResult
)
78 errln("1. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
81 test4
.toLower(Locale("tr", "TR"));
82 expectedResult
= lowercaseTurkish
;
83 if (test4
!= expectedResult
)
84 errln("2. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
87 test3
+= (UChar32
)0x0131;
88 test3
+= " palace, istanbul";
91 test4
.toUpper(Locale(""));
92 expectedResult
= "TOPKAPI PALACE, ISTANBUL";
93 if (test4
!= expectedResult
)
94 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
97 test4
.toUpper(Locale("tr", "TR"));
98 expectedResult
= uppercaseTurkish
;
99 if (test4
!= expectedResult
)
100 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
102 test3
= CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
104 test3
.toUpper(Locale("de", "DE"));
105 expectedResult
= CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
106 if (test3
!= expectedResult
)
107 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test3
+ "\".");
109 test4
.replace(0, test4
.length(), uppercaseGreek
);
111 test4
.toLower(Locale("el", "GR"));
112 expectedResult
= lowercaseGreek
;
113 if (test4
!= expectedResult
)
114 errln("toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
116 test4
.replace(0, test4
.length(), lowercaseGreek
);
119 expectedResult
= uppercaseGreek
;
120 if (test4
!= expectedResult
)
121 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
123 // more string case mapping tests with the new implementation
127 beforeLower
[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
128 lowerRoot
[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
129 lowerTurkish
[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
131 beforeUpper
[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
132 upperRoot
[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
133 upperTurkish
[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
135 beforeMiniUpper
[]= { 0xdf, 0x61 },
136 miniUpper
[]= { 0x53, 0x53, 0x41 };
140 /* lowercase with root locale */
141 s
=UnicodeString(FALSE
, beforeLower
, (int32_t)(sizeof(beforeLower
)/U_SIZEOF_UCHAR
));
143 if( s
.length()!=(sizeof(lowerRoot
)/U_SIZEOF_UCHAR
) ||
144 s
!=UnicodeString(FALSE
, lowerRoot
, s
.length())
146 errln("error in toLower(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerRoot
, (int32_t)(sizeof(lowerRoot
)/U_SIZEOF_UCHAR
)) + "\"");
149 /* lowercase with turkish locale */
150 s
=UnicodeString(FALSE
, beforeLower
, (int32_t)(sizeof(beforeLower
)/U_SIZEOF_UCHAR
));
151 s
.setCharAt(0, beforeLower
[0]).toLower(Locale("tr"));
152 if( s
.length()!=(sizeof(lowerTurkish
)/U_SIZEOF_UCHAR
) ||
153 s
!=UnicodeString(FALSE
, lowerTurkish
, s
.length())
155 errln("error in toLower(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerTurkish
, (int32_t)(sizeof(lowerTurkish
)/U_SIZEOF_UCHAR
)) + "\"");
158 /* uppercase with root locale */
159 s
=UnicodeString(FALSE
, beforeUpper
, (int32_t)(sizeof(beforeUpper
)/U_SIZEOF_UCHAR
));
160 s
.setCharAt(0, beforeUpper
[0]).toUpper(Locale(""));
161 if( s
.length()!=(sizeof(upperRoot
)/U_SIZEOF_UCHAR
) ||
162 s
!=UnicodeString(FALSE
, upperRoot
, s
.length())
164 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperRoot
, (int32_t)(sizeof(upperRoot
)/U_SIZEOF_UCHAR
)) + "\"");
167 /* uppercase with turkish locale */
168 s
=UnicodeString(FALSE
, beforeUpper
, (int32_t)(sizeof(beforeUpper
)/U_SIZEOF_UCHAR
));
169 s
.toUpper(Locale("tr"));
170 if( s
.length()!=(sizeof(upperTurkish
)/U_SIZEOF_UCHAR
) ||
171 s
!=UnicodeString(FALSE
, upperTurkish
, s
.length())
173 errln("error in toUpper(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperTurkish
, (int32_t)(sizeof(upperTurkish
)/U_SIZEOF_UCHAR
)) + "\"");
176 /* uppercase a short string with root locale */
177 s
=UnicodeString(FALSE
, beforeMiniUpper
, (int32_t)(sizeof(beforeMiniUpper
)/U_SIZEOF_UCHAR
));
178 s
.setCharAt(0, beforeMiniUpper
[0]).toUpper("");
179 if( s
.length()!=(sizeof(miniUpper
)/U_SIZEOF_UCHAR
) ||
180 s
!=UnicodeString(FALSE
, miniUpper
, s
.length())
182 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, miniUpper
, (int32_t)(sizeof(miniUpper
)/U_SIZEOF_UCHAR
)) + "\"");
186 // test some supplementary characters (>= Unicode 3.1)
191 deseretInput
=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
192 deseretLower
=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
193 deseretUpper
=UnicodeString("\\U00010414\\U00010414", "").unescape();
194 (t
=deseretInput
).toLower();
195 if(t
!=deseretLower
) {
196 errln("error lowercasing Deseret (plane 1) characters");
198 (t
=deseretInput
).toUpper();
199 if(t
!=deseretUpper
) {
200 errln("error uppercasing Deseret (plane 1) characters");
204 // test some more cases that looked like problems
209 ljInput
=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
210 ljLower
=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
211 ljUpper
=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
212 (t
=ljInput
).toLower("en");
214 errln("error lowercasing LJ characters");
216 (t
=ljInput
).toUpper("en");
218 errln("error uppercasing LJ characters");
222 #if !UCONFIG_NO_NORMALIZATION
223 // some context-sensitive casing depends on normalization data being present
225 // Unicode 3.1.1 SpecialCasing tests
229 // sigmas preceded and/or followed by cased letters
231 sigmas
=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
232 sigmasLower
=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
233 sigmasUpper
=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
235 (t
=sigmas
).toLower();
237 errln("error in sigmas.toLower()=\"" + t
+ "\" expected \"" + sigmasLower
+ "\"");
240 (t
=sigmas
).toUpper(Locale(""));
242 errln("error in sigmas.toUpper()=\"" + t
+ "\" expected \"" + sigmasUpper
+ "\"");
245 // turkish & azerbaijani dotless i & dotted I
246 // remove dot above if there was a capital I before and there are no more accents above
248 dots
=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
249 dotsTurkish
=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
250 dotsDefault
=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
252 (t
=dots
).toLower("tr");
254 errln("error in dots.toLower(tr)=\"" + t
+ "\" expected \"" + dotsTurkish
+ "\"");
257 (t
=dots
).toLower("de");
259 errln("error in dots.toLower(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
263 // more Unicode 3.1.1 tests
267 // lithuanian dot above in uppercasing
269 dots
=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
270 dotsLithuanian
=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
271 dotsDefault
=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
273 (t
=dots
).toUpper("lt");
274 if(t
!=dotsLithuanian
) {
275 errln("error in dots.toUpper(lt)=\"" + t
+ "\" expected \"" + dotsLithuanian
+ "\"");
278 (t
=dots
).toUpper("de");
280 errln("error in dots.toUpper(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
283 // lithuanian adds dot above to i in lowercasing if there are more above accents
285 i
=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
286 iLithuanian
=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
287 iDefault
=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
291 errln("error in i.toLower(lt)=\"" + t
+ "\" expected \"" + iLithuanian
+ "\"");
296 errln("error in i.toLower(de)=\"" + t
+ "\" expected \"" + iDefault
+ "\"");
305 s
=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
306 f
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
307 g
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
312 errln("error in foldCase(\"" + s
+ "\", default)=\"" + t
+ "\" but expected \"" + f
+ "\"");
315 // alternate handling for dotted I/dotless i (U+0130, U+0131)
316 (t
=s
).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I
);
318 errln("error in foldCase(\"" + s
+ "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t
+ "\" but expected \"" + g
+ "\"");
323 // data-driven case mapping tests ------------------------------------------ ***
328 #if !UCONFIG_NO_BREAK_ITERATION
334 // names of TestData children in casing.txt
335 static const char *const dataNames
[TEST_COUNT
+1]={
338 #if !UCONFIG_NO_BREAK_ITERATION
345 StringCaseTest::TestCasingImpl(const UnicodeString
&input
,
346 const UnicodeString
&output
,
348 const char *localeID
, uint32_t options
) {
350 UnicodeString result
;
357 result
.toLower(Locale(localeID
));
361 result
.toUpper(Locale(localeID
));
365 break; // won't happen
368 errln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name
);
372 char utf8In
[100], utf8Out
[100];
373 int32_t utf8InLength
, utf8OutLength
, resultLength
;
377 UErrorCode errorCode
;
379 errorCode
=U_ZERO_ERROR
;
380 csm
=ucasemap_open(localeID
, options
, &errorCode
);
382 u_strToUTF8(utf8In
, (int32_t)sizeof(utf8In
), &utf8InLength
, input
.getBuffer(), input
.length(), &errorCode
);
385 name
="ucasemap_utf8ToLower";
386 utf8OutLength
=ucasemap_utf8ToLower(csm
,
387 utf8Out
, (int32_t)sizeof(utf8Out
),
388 utf8In
, utf8InLength
, &errorCode
);
391 name
="ucasemap_utf8ToUpper";
392 utf8OutLength
=ucasemap_utf8ToUpper(csm
,
393 utf8Out
, (int32_t)sizeof(utf8Out
),
394 utf8In
, utf8InLength
, &errorCode
);
399 break; // won't happen
401 buffer
=result
.getBuffer(utf8OutLength
);
402 u_strFromUTF8(buffer
, result
.getCapacity(), &resultLength
, utf8Out
, utf8OutLength
, &errorCode
);
403 result
.releaseBuffer(U_SUCCESS(errorCode
) ? resultLength
: 0);
405 if(U_FAILURE(errorCode
)) {
406 errln("error: %s() got an error for a test case from casing.res - %s", name
, u_errorName(errorCode
));
407 } else if(result
!=output
) {
408 errln("error: %s() got a wrong result for a test case from casing.res", name
);
413 #if !UCONFIG_NO_BREAK_ITERATION
416 StringCaseTest::TestTitleCasing(const UnicodeString
&input
,
417 const UnicodeString
&output
,
418 const char *localeID
,
419 UBreakIterator
*iter
) {
420 UnicodeString result
;
423 result
.toTitle((BreakIterator
*)iter
, Locale(localeID
));
425 errln("error: UnicodeString.toTitle() got a wrong result for a test case from casing.res");
432 StringCaseTest::TestCasing() {
433 UErrorCode status
= U_ZERO_ERROR
;
434 #if !UCONFIG_NO_BREAK_ITERATION
435 UBreakIterator
*iter
;
438 UnicodeString locale
, input
, output
, result
;
439 int32_t whichCase
, type
;
440 TestDataModule
*driver
= TestDataModule::getTestDataModule("casing", *this, status
);
441 if(U_SUCCESS(status
)) {
442 for(whichCase
=0; whichCase
<TEST_COUNT
; ++whichCase
) {
443 TestData
*casingTest
= driver
->createTestData(dataNames
[whichCase
], status
);
444 if(U_FAILURE(status
)) {
445 errln("TestCasing failed to createTestData(%s) - %s", dataNames
[whichCase
], u_errorName(status
));
448 const DataMap
*myCase
= NULL
;
449 while(casingTest
->nextCase(myCase
, status
)) {
450 locale
= myCase
->getString("Locale", status
);
451 locale
.extract(0, 0x7fffffff, cLocaleID
, sizeof(cLocaleID
), "");
453 input
= myCase
->getString("Input", status
);
454 output
= myCase
->getString("Output", status
);
456 #if !UCONFIG_NO_BREAK_ITERATION
458 if(whichCase
==TEST_TITLE
) {
459 type
= myCase
->getInt("Type", status
);
461 iter
=ubrk_open((UBreakIteratorType
)type
, cLocaleID
, NULL
, 0, &status
);
466 if(U_FAILURE(status
)) {
467 errln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames
[whichCase
], u_errorName(status
));
468 status
= U_ZERO_ERROR
;
473 TestCasingImpl(input
, output
, whichCase
, cLocaleID
, 0);
475 #if !UCONFIG_NO_BREAK_ITERATION
477 TestTitleCasing(input
, output
, cLocaleID
, iter
);
481 break; // won't happen
485 #if !UCONFIG_NO_BREAK_ITERATION
496 #if !UCONFIG_NO_BREAK_ITERATION
497 // more tests for API coverage
499 input
=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
500 (result
=input
).toTitle(NULL
);
501 if(result
!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
502 errln("UnicodeString::toTitle(NULL) failed");