2 *******************************************************************************
4 * Copyright (C) 2002-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: strcase.cpp
10 * tab size: 8 (not used)
13 * created on: 2002mar12
14 * created by: Markus W. Scherer
16 * Test file for string casing C++ API functions.
19 #include "unicode/uchar.h"
20 #include "unicode/ures.h"
21 #include "unicode/uloc.h"
22 #include "unicode/locid.h"
23 #include "unicode/ubrk.h"
25 #include "unicode/tstdtmod.h"
27 StringCaseTest::~StringCaseTest() {}
30 StringCaseTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
31 if (exec
) logln("TestSuite StringCaseTest: ");
33 case 0: name
= "TestCaseConversion"; if (exec
) TestCaseConversion(); break;
35 name
= "TestTitleCasing";
36 #if !UCONFIG_NO_BREAK_ITERATION
37 if(exec
) TestTitleCasing();
41 default: name
= ""; break; //needed to end loop
46 StringCaseTest::TestCaseConversion()
48 UChar uppercaseGreek
[] =
49 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
53 UChar lowercaseGreek
[] =
54 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
58 UChar lowercaseTurkish
[] =
59 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
60 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
62 UChar uppercaseTurkish
[] =
63 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
64 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
66 UnicodeString expectedResult
;
69 test3
+= (UChar32
)0x0130;
70 test3
+= "STANBUL, NOT CONSTANTINOPLE!";
72 UnicodeString
test4(test3
);
74 expectedResult
= UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
75 if (test4
!= expectedResult
)
76 errln("1. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
79 test4
.toLower(Locale("tr", "TR"));
80 expectedResult
= lowercaseTurkish
;
81 if (test4
!= expectedResult
)
82 errln("2. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
85 test3
+= (UChar32
)0x0131;
86 test3
+= " palace, istanbul";
90 expectedResult
= "TOPKAPI PALACE, ISTANBUL";
91 if (test4
!= expectedResult
)
92 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
95 test4
.toUpper(Locale("tr", "TR"));
96 expectedResult
= uppercaseTurkish
;
97 if (test4
!= expectedResult
)
98 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
100 test3
= CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
102 test3
.toUpper(Locale("de", "DE"));
103 expectedResult
= CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
104 if (test3
!= expectedResult
)
105 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test3
+ "\".");
107 test4
.replace(0, test4
.length(), uppercaseGreek
);
109 test4
.toLower(Locale("el", "GR"));
110 expectedResult
= lowercaseGreek
;
111 if (test4
!= expectedResult
)
112 errln("toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
114 test4
.replace(0, test4
.length(), lowercaseGreek
);
117 expectedResult
= uppercaseGreek
;
118 if (test4
!= expectedResult
)
119 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
121 // more string case mapping tests with the new implementation
125 beforeLower
[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
126 lowerRoot
[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
127 lowerTurkish
[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
129 beforeUpper
[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
130 upperRoot
[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
131 upperTurkish
[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
133 beforeMiniUpper
[]= { 0xdf, 0x61 },
134 miniUpper
[]= { 0x53, 0x53, 0x41 };
138 /* lowercase with root locale */
139 s
=UnicodeString(FALSE
, beforeLower
, (int32_t)(sizeof(beforeLower
)/U_SIZEOF_UCHAR
));
141 if( s
.length()!=(sizeof(lowerRoot
)/U_SIZEOF_UCHAR
) ||
142 s
!=UnicodeString(FALSE
, lowerRoot
, s
.length())
144 errln("error in toLower(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerRoot
, (int32_t)(sizeof(lowerRoot
)/U_SIZEOF_UCHAR
)) + "\"");
147 /* lowercase with turkish locale */
148 s
=UnicodeString(FALSE
, beforeLower
, (int32_t)(sizeof(beforeLower
)/U_SIZEOF_UCHAR
));
149 s
.setCharAt(0, beforeLower
[0]).toLower(Locale("tr"));
150 if( s
.length()!=(sizeof(lowerTurkish
)/U_SIZEOF_UCHAR
) ||
151 s
!=UnicodeString(FALSE
, lowerTurkish
, s
.length())
153 errln("error in toLower(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerTurkish
, (int32_t)(sizeof(lowerTurkish
)/U_SIZEOF_UCHAR
)) + "\"");
156 /* uppercase with root locale */
157 s
=UnicodeString(FALSE
, beforeUpper
, (int32_t)(sizeof(beforeUpper
)/U_SIZEOF_UCHAR
));
158 s
.setCharAt(0, beforeUpper
[0]).toUpper();
159 if( s
.length()!=(sizeof(upperRoot
)/U_SIZEOF_UCHAR
) ||
160 s
!=UnicodeString(FALSE
, upperRoot
, s
.length())
162 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperRoot
, (int32_t)(sizeof(upperRoot
)/U_SIZEOF_UCHAR
)) + "\"");
165 /* uppercase with turkish locale */
166 s
=UnicodeString(FALSE
, beforeUpper
, (int32_t)(sizeof(beforeUpper
)/U_SIZEOF_UCHAR
));
167 s
.toUpper(Locale("tr"));
168 if( s
.length()!=(sizeof(upperTurkish
)/U_SIZEOF_UCHAR
) ||
169 s
!=UnicodeString(FALSE
, upperTurkish
, s
.length())
171 errln("error in toUpper(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperTurkish
, (int32_t)(sizeof(upperTurkish
)/U_SIZEOF_UCHAR
)) + "\"");
174 /* uppercase a short string with root locale */
175 s
=UnicodeString(FALSE
, beforeMiniUpper
, (int32_t)(sizeof(beforeMiniUpper
)/U_SIZEOF_UCHAR
));
176 s
.setCharAt(0, beforeMiniUpper
[0]).toUpper("");
177 if( s
.length()!=(sizeof(miniUpper
)/U_SIZEOF_UCHAR
) ||
178 s
!=UnicodeString(FALSE
, miniUpper
, s
.length())
180 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, miniUpper
, (int32_t)(sizeof(miniUpper
)/U_SIZEOF_UCHAR
)) + "\"");
184 // test some supplementary characters (>= Unicode 3.1)
189 deseretInput
=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
190 deseretLower
=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
191 deseretUpper
=UnicodeString("\\U00010414\\U00010414", "").unescape();
192 (t
=deseretInput
).toLower();
193 if(t
!=deseretLower
) {
194 errln("error lowercasing Deseret (plane 1) characters");
196 (t
=deseretInput
).toUpper();
197 if(t
!=deseretUpper
) {
198 errln("error uppercasing Deseret (plane 1) characters");
202 // test some more cases that looked like problems
207 ljInput
=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
208 ljLower
=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
209 ljUpper
=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
210 (t
=ljInput
).toLower("en");
212 errln("error lowercasing LJ characters");
214 (t
=ljInput
).toUpper("en");
216 errln("error uppercasing LJ characters");
220 #if !UCONFIG_NO_NORMALIZATION
221 // some context-sensitive casing depends on normalization data being present
223 // Unicode 3.1.1 SpecialCasing tests
227 // sigmas preceded and/or followed by cased letters
229 sigmas
=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
230 sigmasLower
=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
231 sigmasUpper
=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
233 (t
=sigmas
).toLower();
235 errln("error in sigmas.toLower()=\"" + t
+ "\" expected \"" + sigmasLower
+ "\"");
238 (t
=sigmas
).toUpper();
240 errln("error in sigmas.toUpper()=\"" + t
+ "\" expected \"" + sigmasUpper
+ "\"");
243 // turkish & azerbaijani dotless i & dotted I
244 // remove dot above if there was a capital I before and there are no more accents above
246 dots
=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
247 dotsTurkish
=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
248 dotsDefault
=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
250 (t
=dots
).toLower("tr");
252 errln("error in dots.toLower(tr)=\"" + t
+ "\" expected \"" + dotsTurkish
+ "\"");
255 (t
=dots
).toLower("de");
257 errln("error in dots.toLower(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
261 // more Unicode 3.1.1 tests
265 // lithuanian dot above in uppercasing
267 dots
=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
268 dotsLithuanian
=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
269 dotsDefault
=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
271 (t
=dots
).toUpper("lt");
272 if(t
!=dotsLithuanian
) {
273 errln("error in dots.toUpper(lt)=\"" + t
+ "\" expected \"" + dotsLithuanian
+ "\"");
276 (t
=dots
).toUpper("de");
278 errln("error in dots.toUpper(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
281 // lithuanian adds dot above to i in lowercasing if there are more above accents
283 i
=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
284 iLithuanian
=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
285 iDefault
=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
289 errln("error in i.toLower(lt)=\"" + t
+ "\" expected \"" + iLithuanian
+ "\"");
294 errln("error in i.toLower(de)=\"" + t
+ "\" expected \"" + iDefault
+ "\"");
303 s
=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
304 f
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
305 g
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
310 errln("error in foldCase(\"" + s
+ "\", default)=\"" + t
+ "\" but expected \"" + f
+ "\"");
313 // alternate handling for dotted I/dotless i (U+0130, U+0131)
314 (t
=s
).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I
);
316 errln("error in foldCase(\"" + s
+ "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t
+ "\" but expected \"" + g
+ "\"");
321 #if !UCONFIG_NO_BREAK_ITERATION
324 StringCaseTest::TestTitleCasing() {
325 UErrorCode status
= U_ZERO_ERROR
;
326 UBreakIterator
*iter
;
328 UnicodeString locale
, input
, result
;
330 TestDataModule
*driver
= TestDataModule::getTestDataModule("casing", *this, status
);
331 if(U_SUCCESS(status
)) {
332 TestData
*casingTest
= driver
->createTestData("titlecasing", status
);
333 const DataMap
*myCase
= NULL
;
334 while(casingTest
->nextCase(myCase
, status
)) {
335 locale
= myCase
->getString("Locale", status
);
336 locale
.extract(0, 0x7fffffff, cLocaleID
, sizeof(cLocaleID
), "");
337 type
= myCase
->getInt("Type", status
);
340 input
= myCase
->getString("Input", status
);
344 iter
=ubrk_open((UBreakIteratorType
)type
, cLocaleID
, NULL
, 0, &status
);
347 if(U_FAILURE(status
)) {
348 errln("error: TestTitleCasing() ubrk_open(%d) failed for test case from casing.res: %s", type
, u_errorName(status
));
349 status
= U_ZERO_ERROR
;
352 result
.toTitle((BreakIterator
*)iter
, Locale(cLocaleID
));
353 if(result
!=myCase
->getString("Output", status
)) {
354 errln("error: TestTitleCasing() got a wrong result for test case from casing.res");
363 // more tests for API coverage
365 input
=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
366 (result
=input
).toTitle(NULL
);
367 if(result
!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
368 errln("UnicodeString::toTitle(NULL) failed");
373 UnicodeString in
, expect
, result
, localeID
;
374 UResourceBundle
*casing
, *titlecasing
, *test
, *res
;
375 UErrorCode errorCode
;
376 int32_t testIndex
, type
;
378 errorCode
=U_ZERO_ERROR
;
379 loadTestData(errorCode
);
380 casing
=ures_openDirect("testdata", "casing", &errorCode
);
381 if(U_FAILURE(errorCode
)) {
382 errln("error: TestTitleCasing() is unable to open casing.res: %s", u_errorName(errorCode
));
387 titlecasing
=ures_getByKey(casing
, "titlecasing", 0, &errorCode
);
388 if(U_FAILURE(errorCode
)) {
389 logln("TestTitleCasing() is unable to open get casing.res/titlecasing: %s", u_errorName(errorCode
));
391 UBreakIterator
*iter
;
393 for(testIndex
=0;; ++testIndex
) {
395 test
=ures_getByIndex(titlecasing
, testIndex
, 0, &errorCode
);
396 if(U_FAILURE(errorCode
)) {
400 // get test case data
401 in
=ures_getUnicodeStringByIndex(test
, 0, &errorCode
);
402 expect
=ures_getUnicodeStringByIndex(test
, 1, &errorCode
);
403 localeID
=ures_getUnicodeStringByIndex(test
, 2, &errorCode
);
405 res
=ures_getByIndex(test
, 3, 0, &errorCode
);
406 type
=ures_getInt(res
, &errorCode
);
409 if(U_FAILURE(errorCode
)) {
410 errln("error: TestTitleCasing() is unable to get data for test case %ld from casing.res: %s", testIndex
, u_errorName(errorCode
));
411 continue; // skip this test case
414 // run this test case
415 localeID
.extract(0, 0x7fffffff, cLocaleID
, sizeof(cLocaleID
), "");
419 iter
=ubrk_open((UBreakIteratorType
)type
, cLocaleID
, in
.getBuffer(), in
.length(), &errorCode
);
422 if(U_FAILURE(errorCode
)) {
423 errln("error: TestTitleCasing() ubrk_open(%d) failed for test case %d from casing.res: %s", type
, testIndex
, u_errorName(errorCode
));
426 result
.toTitle((BreakIterator
*)iter
, Locale(cLocaleID
));
428 errln("error: TestTitleCasing() got a wrong result for test case %ld from casing.res", testIndex
);
436 ures_close(titlecasing
);
437 logln("TestTitleCasing() processed %ld test cases", testIndex
);