2 *******************************************************************************
4 * Copyright (C) 2002-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: strcase.cpp
10 * tab size: 8 (not used)
13 * created on: 2002mar12
14 * created by: Markus W. Scherer
16 * Test file for string casing C++ API functions.
19 #include "unicode/uchar.h"
20 #include "unicode/ures.h"
21 #include "unicode/uloc.h"
22 #include "unicode/locid.h"
23 #include "unicode/ubrk.h"
24 #include "unicode/unistr.h"
25 #include "unicode/ucasemap.h"
27 #include "unicode/tstdtmod.h"
29 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
31 StringCaseTest::~StringCaseTest() {}
34 StringCaseTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
35 if (exec
) logln("TestSuite StringCaseTest: ");
37 case 0: name
= "TestCaseConversion"; if (exec
) TestCaseConversion(); break;
40 #if !UCONFIG_NO_BREAK_ITERATION
41 if(exec
) TestCasing();
45 default: name
= ""; break; //needed to end loop
50 StringCaseTest::TestCaseConversion()
52 static const UChar uppercaseGreek
[] =
53 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
57 static const UChar lowercaseGreek
[] =
58 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
62 static const UChar lowercaseTurkish
[] =
63 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
64 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
66 static const UChar uppercaseTurkish
[] =
67 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
68 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
70 UnicodeString expectedResult
;
73 test3
+= (UChar32
)0x0130;
74 test3
+= "STANBUL, NOT CONSTANTINOPLE!";
76 UnicodeString
test4(test3
);
77 test4
.toLower(Locale(""));
78 expectedResult
= UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
79 if (test4
!= expectedResult
)
80 errln("1. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
83 test4
.toLower(Locale("tr", "TR"));
84 expectedResult
= lowercaseTurkish
;
85 if (test4
!= expectedResult
)
86 errln("2. toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
89 test3
+= (UChar32
)0x0131;
90 test3
+= " palace, istanbul";
93 test4
.toUpper(Locale(""));
94 expectedResult
= "TOPKAPI PALACE, ISTANBUL";
95 if (test4
!= expectedResult
)
96 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
99 test4
.toUpper(Locale("tr", "TR"));
100 expectedResult
= uppercaseTurkish
;
101 if (test4
!= expectedResult
)
102 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
104 test3
= CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
106 test3
.toUpper(Locale("de", "DE"));
107 expectedResult
= CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
108 if (test3
!= expectedResult
)
109 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test3
+ "\".");
111 test4
.replace(0, test4
.length(), uppercaseGreek
);
113 test4
.toLower(Locale("el", "GR"));
114 expectedResult
= lowercaseGreek
;
115 if (test4
!= expectedResult
)
116 errln("toLower failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
118 test4
.replace(0, test4
.length(), lowercaseGreek
);
121 expectedResult
= uppercaseGreek
;
122 if (test4
!= expectedResult
)
123 errln("toUpper failed: expected \"" + expectedResult
+ "\", got \"" + test4
+ "\".");
125 // more string case mapping tests with the new implementation
129 beforeLower
[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
130 lowerRoot
[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
131 lowerTurkish
[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
133 beforeUpper
[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
134 upperRoot
[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
135 upperTurkish
[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
137 beforeMiniUpper
[]= { 0xdf, 0x61 },
138 miniUpper
[]= { 0x53, 0x53, 0x41 };
142 /* lowercase with root locale */
143 s
=UnicodeString(FALSE
, beforeLower
, (int32_t)(sizeof(beforeLower
)/U_SIZEOF_UCHAR
));
145 if( s
.length()!=(sizeof(lowerRoot
)/U_SIZEOF_UCHAR
) ||
146 s
!=UnicodeString(FALSE
, lowerRoot
, s
.length())
148 errln("error in toLower(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerRoot
, (int32_t)(sizeof(lowerRoot
)/U_SIZEOF_UCHAR
)) + "\"");
151 /* lowercase with turkish locale */
152 s
=UnicodeString(FALSE
, beforeLower
, (int32_t)(sizeof(beforeLower
)/U_SIZEOF_UCHAR
));
153 s
.setCharAt(0, beforeLower
[0]).toLower(Locale("tr"));
154 if( s
.length()!=(sizeof(lowerTurkish
)/U_SIZEOF_UCHAR
) ||
155 s
!=UnicodeString(FALSE
, lowerTurkish
, s
.length())
157 errln("error in toLower(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, lowerTurkish
, (int32_t)(sizeof(lowerTurkish
)/U_SIZEOF_UCHAR
)) + "\"");
160 /* uppercase with root locale */
161 s
=UnicodeString(FALSE
, beforeUpper
, (int32_t)(sizeof(beforeUpper
)/U_SIZEOF_UCHAR
));
162 s
.setCharAt(0, beforeUpper
[0]).toUpper(Locale(""));
163 if( s
.length()!=(sizeof(upperRoot
)/U_SIZEOF_UCHAR
) ||
164 s
!=UnicodeString(FALSE
, upperRoot
, s
.length())
166 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperRoot
, (int32_t)(sizeof(upperRoot
)/U_SIZEOF_UCHAR
)) + "\"");
169 /* uppercase with turkish locale */
170 s
=UnicodeString(FALSE
, beforeUpper
, (int32_t)(sizeof(beforeUpper
)/U_SIZEOF_UCHAR
));
171 s
.toUpper(Locale("tr"));
172 if( s
.length()!=(sizeof(upperTurkish
)/U_SIZEOF_UCHAR
) ||
173 s
!=UnicodeString(FALSE
, upperTurkish
, s
.length())
175 errln("error in toUpper(turkish locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, upperTurkish
, (int32_t)(sizeof(upperTurkish
)/U_SIZEOF_UCHAR
)) + "\"");
178 /* uppercase a short string with root locale */
179 s
=UnicodeString(FALSE
, beforeMiniUpper
, (int32_t)(sizeof(beforeMiniUpper
)/U_SIZEOF_UCHAR
));
180 s
.setCharAt(0, beforeMiniUpper
[0]).toUpper("");
181 if( s
.length()!=(sizeof(miniUpper
)/U_SIZEOF_UCHAR
) ||
182 s
!=UnicodeString(FALSE
, miniUpper
, s
.length())
184 errln("error in toUpper(root locale)=\"" + s
+ "\" expected \"" + UnicodeString(FALSE
, miniUpper
, (int32_t)(sizeof(miniUpper
)/U_SIZEOF_UCHAR
)) + "\"");
188 // test some supplementary characters (>= Unicode 3.1)
193 deseretInput
=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
194 deseretLower
=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
195 deseretUpper
=UnicodeString("\\U00010414\\U00010414", "").unescape();
196 (t
=deseretInput
).toLower();
197 if(t
!=deseretLower
) {
198 errln("error lowercasing Deseret (plane 1) characters");
200 (t
=deseretInput
).toUpper();
201 if(t
!=deseretUpper
) {
202 errln("error uppercasing Deseret (plane 1) characters");
206 // test some more cases that looked like problems
211 ljInput
=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
212 ljLower
=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
213 ljUpper
=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
214 (t
=ljInput
).toLower("en");
216 errln("error lowercasing LJ characters");
218 (t
=ljInput
).toUpper("en");
220 errln("error uppercasing LJ characters");
224 #if !UCONFIG_NO_NORMALIZATION
225 // some context-sensitive casing depends on normalization data being present
227 // Unicode 3.1.1 SpecialCasing tests
231 // sigmas preceded and/or followed by cased letters
233 sigmas
=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
234 sigmasLower
=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
235 sigmasUpper
=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
237 (t
=sigmas
).toLower();
239 errln("error in sigmas.toLower()=\"" + t
+ "\" expected \"" + sigmasLower
+ "\"");
242 (t
=sigmas
).toUpper(Locale(""));
244 errln("error in sigmas.toUpper()=\"" + t
+ "\" expected \"" + sigmasUpper
+ "\"");
247 // turkish & azerbaijani dotless i & dotted I
248 // remove dot above if there was a capital I before and there are no more accents above
250 dots
=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
251 dotsTurkish
=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
252 dotsDefault
=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
254 (t
=dots
).toLower("tr");
256 errln("error in dots.toLower(tr)=\"" + t
+ "\" expected \"" + dotsTurkish
+ "\"");
259 (t
=dots
).toLower("de");
261 errln("error in dots.toLower(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
265 // more Unicode 3.1.1 tests
269 // lithuanian dot above in uppercasing
271 dots
=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
272 dotsLithuanian
=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
273 dotsDefault
=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
275 (t
=dots
).toUpper("lt");
276 if(t
!=dotsLithuanian
) {
277 errln("error in dots.toUpper(lt)=\"" + t
+ "\" expected \"" + dotsLithuanian
+ "\"");
280 (t
=dots
).toUpper("de");
282 errln("error in dots.toUpper(de)=\"" + t
+ "\" expected \"" + dotsDefault
+ "\"");
285 // lithuanian adds dot above to i in lowercasing if there are more above accents
287 i
=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
288 iLithuanian
=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
289 iDefault
=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
293 errln("error in i.toLower(lt)=\"" + t
+ "\" expected \"" + iLithuanian
+ "\"");
298 errln("error in i.toLower(de)=\"" + t
+ "\" expected \"" + iDefault
+ "\"");
307 s
=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
308 f
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
309 g
=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
314 errln("error in foldCase(\"" + s
+ "\", default)=\"" + t
+ "\" but expected \"" + f
+ "\"");
317 // alternate handling for dotted I/dotless i (U+0130, U+0131)
318 (t
=s
).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I
);
320 errln("error in foldCase(\"" + s
+ "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t
+ "\" but expected \"" + g
+ "\"");
325 // data-driven case mapping tests ------------------------------------------ ***
335 // names of TestData children in casing.txt
336 static const char *const dataNames
[TEST_COUNT
+1]={
345 StringCaseTest::TestCasingImpl(const UnicodeString
&input
,
346 const UnicodeString
&output
,
348 void *iter
, const char *localeID
, uint32_t options
) {
350 UnicodeString result
;
352 Locale
locale(localeID
);
358 result
.toLower(locale
);
362 result
.toUpper(locale
);
364 #if !UCONFIG_NO_BREAK_ITERATION
367 result
.toTitle((BreakIterator
*)iter
, locale
, options
);
372 result
.foldCase(options
);
376 break; // won't happen
379 errln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name
);
381 #if !UCONFIG_NO_BREAK_ITERATION
382 if(whichCase
==TEST_TITLE
&& options
==0) {
384 result
.toTitle((BreakIterator
*)iter
, locale
);
386 errln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
392 char utf8In
[100], utf8Out
[100];
393 int32_t utf8InLength
, utf8OutLength
, resultLength
;
397 UErrorCode errorCode
;
399 errorCode
=U_ZERO_ERROR
;
400 csm
=ucasemap_open(localeID
, options
, &errorCode
);
401 #if !UCONFIG_NO_BREAK_ITERATION
403 // Clone the break iterator so that the UCaseMap can safely adopt it.
404 int32_t size
=1; // Not 0 because that only gives preflighting.
405 UBreakIterator
*clone
=ubrk_safeClone((UBreakIterator
*)iter
, NULL
, &size
, &errorCode
);
406 ucasemap_setBreakIterator(csm
, clone
, &errorCode
);
410 u_strToUTF8(utf8In
, (int32_t)sizeof(utf8In
), &utf8InLength
, input
.getBuffer(), input
.length(), &errorCode
);
413 name
="ucasemap_utf8ToLower";
414 utf8OutLength
=ucasemap_utf8ToLower(csm
,
415 utf8Out
, (int32_t)sizeof(utf8Out
),
416 utf8In
, utf8InLength
, &errorCode
);
419 name
="ucasemap_utf8ToUpper";
420 utf8OutLength
=ucasemap_utf8ToUpper(csm
,
421 utf8Out
, (int32_t)sizeof(utf8Out
),
422 utf8In
, utf8InLength
, &errorCode
);
424 #if !UCONFIG_NO_BREAK_ITERATION
426 name
="ucasemap_utf8ToTitle";
427 utf8OutLength
=ucasemap_utf8ToTitle(csm
,
428 utf8Out
, (int32_t)sizeof(utf8Out
),
429 utf8In
, utf8InLength
, &errorCode
);
433 name
="ucasemap_utf8FoldCase";
434 utf8OutLength
=ucasemap_utf8FoldCase(csm
,
435 utf8Out
, (int32_t)sizeof(utf8Out
),
436 utf8In
, utf8InLength
, &errorCode
);
441 break; // won't happen
443 buffer
=result
.getBuffer(utf8OutLength
);
444 u_strFromUTF8(buffer
, result
.getCapacity(), &resultLength
, utf8Out
, utf8OutLength
, &errorCode
);
445 result
.releaseBuffer(U_SUCCESS(errorCode
) ? resultLength
: 0);
447 if(U_FAILURE(errorCode
)) {
448 errln("error: %s() got an error for a test case from casing.res - %s", name
, u_errorName(errorCode
));
449 } else if(result
!=output
) {
450 errln("error: %s() got a wrong result for a test case from casing.res", name
);
451 errln("expected \"" + output
+ "\" got \"" + result
+ "\"" );
457 StringCaseTest::TestCasing() {
458 UErrorCode status
= U_ZERO_ERROR
;
461 UnicodeString locale
, input
, output
, optionsString
, result
;
463 int32_t whichCase
, type
;
464 TestDataModule
*driver
= TestDataModule::getTestDataModule("casing", *this, status
);
465 if(U_SUCCESS(status
)) {
466 for(whichCase
=0; whichCase
<TEST_COUNT
; ++whichCase
) {
467 #if UCONFIG_NO_BREAK_ITERATION
468 if(whichCase
==TEST_TITLE
) {
472 TestData
*casingTest
= driver
->createTestData(dataNames
[whichCase
], status
);
473 if(U_FAILURE(status
)) {
474 errln("TestCasing failed to createTestData(%s) - %s", dataNames
[whichCase
], u_errorName(status
));
477 const DataMap
*myCase
= NULL
;
478 while(casingTest
->nextCase(myCase
, status
)) {
479 input
= myCase
->getString("Input", status
);
480 output
= myCase
->getString("Output", status
);
482 if(whichCase
!=TEST_FOLD
) {
483 locale
= myCase
->getString("Locale", status
);
485 locale
.extract(0, 0x7fffffff, cLocaleID
, sizeof(cLocaleID
), "");
488 #if !UCONFIG_NO_BREAK_ITERATION
489 if(whichCase
==TEST_TITLE
) {
490 type
= myCase
->getInt("Type", status
);
492 iter
=ubrk_open((UBreakIteratorType
)type
, cLocaleID
, NULL
, 0, &status
);
493 } else if(type
==-2) {
494 // Open a trivial break iterator that only delivers { 0, length }
495 // or even just { 0 } as boundaries.
496 static const UChar rules
[] = { 0x2e, 0x2a, 0x3b }; // ".*;"
497 UParseError parseError
;
498 iter
=ubrk_openRules(rules
, LENGTHOF(rules
), NULL
, 0, &parseError
, &status
);
503 if(whichCase
==TEST_TITLE
|| whichCase
==TEST_FOLD
) {
504 optionsString
= myCase
->getString("Options", status
);
505 if(optionsString
.indexOf((UChar
)0x54)>=0) { // T
506 options
|=U_FOLD_CASE_EXCLUDE_SPECIAL_I
;
508 if(optionsString
.indexOf((UChar
)0x4c)>=0) { // L
509 options
|=U_TITLECASE_NO_LOWERCASE
;
511 if(optionsString
.indexOf((UChar
)0x41)>=0) { // A
512 options
|=U_TITLECASE_NO_BREAK_ADJUSTMENT
;
516 if(U_FAILURE(status
)) {
517 errln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames
[whichCase
], u_errorName(status
));
518 status
= U_ZERO_ERROR
;
520 TestCasingImpl(input
, output
, whichCase
, iter
, cLocaleID
, options
);
523 #if !UCONFIG_NO_BREAK_ITERATION
534 #if !UCONFIG_NO_BREAK_ITERATION
535 // more tests for API coverage
537 input
=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
538 (result
=input
).toTitle(NULL
);
539 if(result
!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
540 errln("UnicodeString::toTitle(NULL) failed");