]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/strcase.cpp
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / test / intltest / strcase.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
374ca955 4* Copyright (C) 2002-2004, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: strcase.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2002mar12
14* created by: Markus W. Scherer
15*
16* Test file for string casing C++ API functions.
17*/
18
b75a7d8f 19#include "unicode/uchar.h"
b75a7d8f
A
20#include "unicode/ures.h"
21#include "unicode/uloc.h"
22#include "unicode/locid.h"
23#include "unicode/ubrk.h"
24#include "ustrtest.h"
374ca955
A
25#include "unicode/tstdtmod.h"
26
27StringCaseTest::~StringCaseTest() {}
b75a7d8f
A
28
29void
30StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
31 if (exec) logln("TestSuite StringCaseTest: ");
32 switch (index) {
33 case 0: name = "TestCaseConversion"; if (exec) TestCaseConversion(); break;
34 case 1:
35 name = "TestTitleCasing";
36#if !UCONFIG_NO_BREAK_ITERATION
37 if(exec) TestTitleCasing();
38#endif
39 break;
40
41 default: name = ""; break; //needed to end loop
42 }
43}
44
45void
46StringCaseTest::TestCaseConversion()
47{
48 UChar uppercaseGreek[] =
49 { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4,
50 0x39f, 0x3a3, 0 };
51 // "IESUS CHRISTOS"
52
53 UChar lowercaseGreek[] =
54 { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4,
55 0x3bf, 0x3c2, 0 };
56 // "iesus christos"
57
58 UChar lowercaseTurkish[] =
59 { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f,
60 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 };
61
62 UChar uppercaseTurkish[] =
63 { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20,
64 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 };
65
66 UnicodeString expectedResult;
67 UnicodeString test3;
68
69 test3 += (UChar32)0x0130;
70 test3 += "STANBUL, NOT CONSTANTINOPLE!";
71
72 UnicodeString test4(test3);
73 test4.toLower();
74 expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape();
75 if (test4 != expectedResult)
76 errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
77
78 test4 = test3;
79 test4.toLower(Locale("tr", "TR"));
80 expectedResult = lowercaseTurkish;
81 if (test4 != expectedResult)
82 errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
83
84 test3 = "topkap";
85 test3 += (UChar32)0x0131;
86 test3 += " palace, istanbul";
87 test4 = test3;
88
89 test4.toUpper();
90 expectedResult = "TOPKAPI PALACE, ISTANBUL";
91 if (test4 != expectedResult)
92 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
93
94 test4 = test3;
95 test4.toUpper(Locale("tr", "TR"));
96 expectedResult = uppercaseTurkish;
97 if (test4 != expectedResult)
98 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
99
100 test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe");
101
102 test3.toUpper(Locale("de", "DE"));
103 expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE");
104 if (test3 != expectedResult)
105 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\".");
106
107 test4.replace(0, test4.length(), uppercaseGreek);
108
109 test4.toLower(Locale("el", "GR"));
110 expectedResult = lowercaseGreek;
111 if (test4 != expectedResult)
112 errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
113
114 test4.replace(0, test4.length(), lowercaseGreek);
115
116 test4.toUpper();
117 expectedResult = uppercaseGreek;
118 if (test4 != expectedResult)
119 errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\".");
120
121 // more string case mapping tests with the new implementation
122 {
123 static const UChar
124
125 beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff },
126 lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
127 lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff },
128
129 beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff },
130 upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
131 upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff },
132
133 beforeMiniUpper[]= { 0xdf, 0x61 },
134 miniUpper[]= { 0x53, 0x53, 0x41 };
135
136 UnicodeString s;
137
138 /* lowercase with root locale */
139 s=UnicodeString(FALSE, beforeLower, (int32_t)(sizeof(beforeLower)/U_SIZEOF_UCHAR));
140 s.toLower("");
141 if( s.length()!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) ||
142 s!=UnicodeString(FALSE, lowerRoot, s.length())
143 ) {
144 errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, (int32_t)(sizeof(lowerRoot)/U_SIZEOF_UCHAR)) + "\"");
145 }
146
147 /* lowercase with turkish locale */
148 s=UnicodeString(FALSE, beforeLower, (int32_t)(sizeof(beforeLower)/U_SIZEOF_UCHAR));
149 s.setCharAt(0, beforeLower[0]).toLower(Locale("tr"));
150 if( s.length()!=(sizeof(lowerTurkish)/U_SIZEOF_UCHAR) ||
151 s!=UnicodeString(FALSE, lowerTurkish, s.length())
152 ) {
153 errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, (int32_t)(sizeof(lowerTurkish)/U_SIZEOF_UCHAR)) + "\"");
154 }
155
156 /* uppercase with root locale */
157 s=UnicodeString(FALSE, beforeUpper, (int32_t)(sizeof(beforeUpper)/U_SIZEOF_UCHAR));
158 s.setCharAt(0, beforeUpper[0]).toUpper();
159 if( s.length()!=(sizeof(upperRoot)/U_SIZEOF_UCHAR) ||
160 s!=UnicodeString(FALSE, upperRoot, s.length())
161 ) {
162 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, (int32_t)(sizeof(upperRoot)/U_SIZEOF_UCHAR)) + "\"");
163 }
164
165 /* uppercase with turkish locale */
166 s=UnicodeString(FALSE, beforeUpper, (int32_t)(sizeof(beforeUpper)/U_SIZEOF_UCHAR));
167 s.toUpper(Locale("tr"));
168 if( s.length()!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR) ||
169 s!=UnicodeString(FALSE, upperTurkish, s.length())
170 ) {
171 errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, (int32_t)(sizeof(upperTurkish)/U_SIZEOF_UCHAR)) + "\"");
172 }
173
174 /* uppercase a short string with root locale */
175 s=UnicodeString(FALSE, beforeMiniUpper, (int32_t)(sizeof(beforeMiniUpper)/U_SIZEOF_UCHAR));
176 s.setCharAt(0, beforeMiniUpper[0]).toUpper("");
177 if( s.length()!=(sizeof(miniUpper)/U_SIZEOF_UCHAR) ||
178 s!=UnicodeString(FALSE, miniUpper, s.length())
179 ) {
180 errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, (int32_t)(sizeof(miniUpper)/U_SIZEOF_UCHAR)) + "\"");
181 }
182 }
183
184 // test some supplementary characters (>= Unicode 3.1)
185 {
186 UnicodeString t;
187
188 UnicodeString
189 deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(),
190 deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(),
191 deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape();
192 (t=deseretInput).toLower();
193 if(t!=deseretLower) {
194 errln("error lowercasing Deseret (plane 1) characters");
195 }
196 (t=deseretInput).toUpper();
197 if(t!=deseretUpper) {
198 errln("error uppercasing Deseret (plane 1) characters");
199 }
200 }
201
202 // test some more cases that looked like problems
203 {
204 UnicodeString t;
205
206 UnicodeString
207 ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(),
208 ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(),
209 ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape();
210 (t=ljInput).toLower("en");
211 if(t!=ljLower) {
212 errln("error lowercasing LJ characters");
213 }
214 (t=ljInput).toUpper("en");
215 if(t!=ljUpper) {
216 errln("error uppercasing LJ characters");
217 }
218 }
219
220#if !UCONFIG_NO_NORMALIZATION
221 // some context-sensitive casing depends on normalization data being present
222
223 // Unicode 3.1.1 SpecialCasing tests
224 {
225 UnicodeString t;
226
227 // sigmas preceded and/or followed by cased letters
228 UnicodeString
229 sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(),
230 sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(),
231 sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape();
232
233 (t=sigmas).toLower();
234 if(t!=sigmasLower) {
235 errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\"");
236 }
237
238 (t=sigmas).toUpper();
239 if(t!=sigmasUpper) {
240 errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\"");
241 }
242
243 // turkish & azerbaijani dotless i & dotted I
244 // remove dot above if there was a capital I before and there are no more accents above
245 UnicodeString
246 dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(),
247 dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(),
248 dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape();
249
250 (t=dots).toLower("tr");
251 if(t!=dotsTurkish) {
252 errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\"");
253 }
254
255 (t=dots).toLower("de");
256 if(t!=dotsDefault) {
257 errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
258 }
259 }
260
261 // more Unicode 3.1.1 tests
262 {
263 UnicodeString t;
264
265 // lithuanian dot above in uppercasing
266 UnicodeString
267 dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(),
268 dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(),
269 dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape();
270
271 (t=dots).toUpper("lt");
272 if(t!=dotsLithuanian) {
273 errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\"");
274 }
275
276 (t=dots).toUpper("de");
277 if(t!=dotsDefault) {
278 errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\"");
279 }
280
281 // lithuanian adds dot above to i in lowercasing if there are more above accents
282 UnicodeString
283 i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(),
284 iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(),
285 iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape();
286
287 (t=i).toLower("lt");
288 if(t!=iLithuanian) {
289 errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\"");
290 }
291
292 (t=i).toLower("de");
293 if(t!=iDefault) {
294 errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\"");
295 }
296 }
297
298#endif
299
300 // test case folding
301 {
302 UnicodeString
303 s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(),
304 f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(),
305 g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(),
306 t;
307
308 (t=s).foldCase();
309 if(f!=t) {
310 errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\"");
311 }
312
313 // alternate handling for dotted I/dotless i (U+0130, U+0131)
314 (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I);
315 if(g!=t) {
316 errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\"");
317 }
318 }
319}
320
321#if !UCONFIG_NO_BREAK_ITERATION
322
323void
324StringCaseTest::TestTitleCasing() {
374ca955
A
325 UErrorCode status = U_ZERO_ERROR;
326 UBreakIterator *iter;
327 char cLocaleID[100];
328 UnicodeString locale, input, result;
329 int32_t type;
330 TestDataModule *driver = TestDataModule::getTestDataModule("casing", *this, status);
331 if(U_SUCCESS(status)) {
332 TestData *casingTest = driver->createTestData("titlecasing", status);
333 const DataMap *myCase = NULL;
334 while(casingTest->nextCase(myCase, status)) {
335 locale = myCase->getString("Locale", status);
336 locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
337 type = myCase->getInt("Type", status);
b75a7d8f
A
338
339
374ca955
A
340 input = myCase->getString("Input", status);
341 if(type<0) {
342 iter=0;
343 } else {
344 iter=ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status);
345 }
346
347 if(U_FAILURE(status)) {
348 errln("error: TestTitleCasing() ubrk_open(%d) failed for test case from casing.res: %s", type, u_errorName(status));
349 status = U_ZERO_ERROR;
350 } else {
351 result=input;
352 result.toTitle((BreakIterator *)iter, Locale(cLocaleID));
353 if(result!=myCase->getString("Output", status)) {
354 errln("error: TestTitleCasing() got a wrong result for test case from casing.res");
355 }
356 ubrk_close(iter);
357 }
358 }
359 delete casingTest;
b75a7d8f 360 }
374ca955 361 delete driver;
b75a7d8f
A
362
363 // more tests for API coverage
364 status=U_ZERO_ERROR;
365 input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape();
366 (result=input).toTitle(NULL);
367 if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) {
374ca955 368 errln("UnicodeString::toTitle(NULL) failed");
b75a7d8f
A
369 }
370
371#if 0
372 char cLocaleID[100];
373 UnicodeString in, expect, result, localeID;
374 UResourceBundle *casing, *titlecasing, *test, *res;
375 UErrorCode errorCode;
376 int32_t testIndex, type;
377
378 errorCode=U_ZERO_ERROR;
379 loadTestData(errorCode);
380 casing=ures_openDirect("testdata", "casing", &errorCode);
381 if(U_FAILURE(errorCode)) {
382 errln("error: TestTitleCasing() is unable to open casing.res: %s", u_errorName(errorCode));
383 return;
384 }
385
386 // titlecasing tests
387 titlecasing=ures_getByKey(casing, "titlecasing", 0, &errorCode);
388 if(U_FAILURE(errorCode)) {
389 logln("TestTitleCasing() is unable to open get casing.res/titlecasing: %s", u_errorName(errorCode));
390 } else {
391 UBreakIterator *iter;
392
393 for(testIndex=0;; ++testIndex) {
394 // get test case
395 test=ures_getByIndex(titlecasing, testIndex, 0, &errorCode);
396 if(U_FAILURE(errorCode)) {
397 break; // done
398 }
399
400 // get test case data
401 in=ures_getUnicodeStringByIndex(test, 0, &errorCode);
402 expect=ures_getUnicodeStringByIndex(test, 1, &errorCode);
403 localeID=ures_getUnicodeStringByIndex(test, 2, &errorCode);
404
405 res=ures_getByIndex(test, 3, 0, &errorCode);
406 type=ures_getInt(res, &errorCode);
407 ures_close(res);
408
409 if(U_FAILURE(errorCode)) {
410 errln("error: TestTitleCasing() is unable to get data for test case %ld from casing.res: %s", testIndex, u_errorName(errorCode));
411 continue; // skip this test case
412 }
413
414 // run this test case
415 localeID.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), "");
416 if(type<0) {
417 iter=0;
418 } else {
419 iter=ubrk_open((UBreakIteratorType)type, cLocaleID, in.getBuffer(), in.length(), &errorCode);
420 }
421
422 if(U_FAILURE(errorCode)) {
423 errln("error: TestTitleCasing() ubrk_open(%d) failed for test case %d from casing.res: %s", type, testIndex, u_errorName(errorCode));
424 } else {
425 result=in;
426 result.toTitle((BreakIterator *)iter, Locale(cLocaleID));
427 if(result!=expect) {
428 errln("error: TestTitleCasing() got a wrong result for test case %ld from casing.res", testIndex);
429 }
430 }
431
432 // clean up
433 ubrk_close(iter);
434 ures_close(test);
435 }
436 ures_close(titlecasing);
437 logln("TestTitleCasing() processed %ld test cases", testIndex);
438 }
439
440 ures_close(casing);
441#endif
442}
443#endif