]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4388f060 | 4 | * Copyright (C) 2002-2012, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: strcase.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2002mar12 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Test file for string casing C++ API functions. | |
17 | */ | |
18 | ||
b75a7d8f | 19 | #include "unicode/uchar.h" |
b75a7d8f A |
20 | #include "unicode/ures.h" |
21 | #include "unicode/uloc.h" | |
22 | #include "unicode/locid.h" | |
23 | #include "unicode/ubrk.h" | |
73c04bcf A |
24 | #include "unicode/unistr.h" |
25 | #include "unicode/ucasemap.h" | |
4388f060 | 26 | #include "ucase.h" |
b75a7d8f | 27 | #include "ustrtest.h" |
374ca955 A |
28 | #include "unicode/tstdtmod.h" |
29 | ||
46f4442e A |
30 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
31 | ||
374ca955 | 32 | StringCaseTest::~StringCaseTest() {} |
b75a7d8f A |
33 | |
34 | void | |
35 | StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
4388f060 A |
36 | if(exec) { |
37 | logln("TestSuite StringCaseTest: "); | |
38 | } | |
39 | TESTCASE_AUTO_BEGIN; | |
40 | TESTCASE_AUTO(TestCaseConversion); | |
729e4ab9 | 41 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION |
4388f060 | 42 | TESTCASE_AUTO(TestCasing); |
b75a7d8f | 43 | #endif |
4388f060 A |
44 | TESTCASE_AUTO(TestFullCaseFoldingIterator); |
45 | TESTCASE_AUTO_END; | |
b75a7d8f A |
46 | } |
47 | ||
48 | void | |
49 | StringCaseTest::TestCaseConversion() | |
50 | { | |
73c04bcf | 51 | static const UChar uppercaseGreek[] = |
b75a7d8f A |
52 | { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4, |
53 | 0x39f, 0x3a3, 0 }; | |
54 | // "IESUS CHRISTOS" | |
55 | ||
73c04bcf | 56 | static const UChar lowercaseGreek[] = |
b75a7d8f A |
57 | { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4, |
58 | 0x3bf, 0x3c2, 0 }; | |
59 | // "iesus christos" | |
60 | ||
73c04bcf | 61 | static const UChar lowercaseTurkish[] = |
b75a7d8f A |
62 | { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f, |
63 | 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 }; | |
64 | ||
73c04bcf | 65 | static const UChar uppercaseTurkish[] = |
b75a7d8f A |
66 | { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20, |
67 | 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 }; | |
68 | ||
69 | UnicodeString expectedResult; | |
70 | UnicodeString test3; | |
71 | ||
72 | test3 += (UChar32)0x0130; | |
73 | test3 += "STANBUL, NOT CONSTANTINOPLE!"; | |
74 | ||
75 | UnicodeString test4(test3); | |
73c04bcf | 76 | test4.toLower(Locale("")); |
b75a7d8f A |
77 | expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape(); |
78 | if (test4 != expectedResult) | |
79 | errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
80 | ||
81 | test4 = test3; | |
82 | test4.toLower(Locale("tr", "TR")); | |
83 | expectedResult = lowercaseTurkish; | |
84 | if (test4 != expectedResult) | |
85 | errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
86 | ||
87 | test3 = "topkap"; | |
88 | test3 += (UChar32)0x0131; | |
89 | test3 += " palace, istanbul"; | |
90 | test4 = test3; | |
91 | ||
73c04bcf | 92 | test4.toUpper(Locale("")); |
b75a7d8f A |
93 | expectedResult = "TOPKAPI PALACE, ISTANBUL"; |
94 | if (test4 != expectedResult) | |
95 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
96 | ||
97 | test4 = test3; | |
98 | test4.toUpper(Locale("tr", "TR")); | |
99 | expectedResult = uppercaseTurkish; | |
100 | if (test4 != expectedResult) | |
101 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
102 | ||
103 | test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe"); | |
104 | ||
105 | test3.toUpper(Locale("de", "DE")); | |
106 | expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE"); | |
107 | if (test3 != expectedResult) | |
108 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\"."); | |
109 | ||
110 | test4.replace(0, test4.length(), uppercaseGreek); | |
111 | ||
112 | test4.toLower(Locale("el", "GR")); | |
113 | expectedResult = lowercaseGreek; | |
114 | if (test4 != expectedResult) | |
115 | errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
116 | ||
117 | test4.replace(0, test4.length(), lowercaseGreek); | |
118 | ||
119 | test4.toUpper(); | |
120 | expectedResult = uppercaseGreek; | |
121 | if (test4 != expectedResult) | |
122 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
123 | ||
124 | // more string case mapping tests with the new implementation | |
125 | { | |
126 | static const UChar | |
127 | ||
128 | beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff }, | |
129 | lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, | |
130 | lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, | |
131 | ||
132 | beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff }, | |
133 | upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, | |
134 | upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, | |
135 | ||
136 | beforeMiniUpper[]= { 0xdf, 0x61 }, | |
137 | miniUpper[]= { 0x53, 0x53, 0x41 }; | |
138 | ||
139 | UnicodeString s; | |
140 | ||
141 | /* lowercase with root locale */ | |
142 | s=UnicodeString(FALSE, beforeLower, (int32_t)(sizeof(beforeLower)/U_SIZEOF_UCHAR)); | |
143 | s.toLower(""); | |
144 | if( s.length()!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) || | |
145 | s!=UnicodeString(FALSE, lowerRoot, s.length()) | |
146 | ) { | |
147 | errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, (int32_t)(sizeof(lowerRoot)/U_SIZEOF_UCHAR)) + "\""); | |
148 | } | |
149 | ||
150 | /* lowercase with turkish locale */ | |
151 | s=UnicodeString(FALSE, beforeLower, (int32_t)(sizeof(beforeLower)/U_SIZEOF_UCHAR)); | |
152 | s.setCharAt(0, beforeLower[0]).toLower(Locale("tr")); | |
153 | if( s.length()!=(sizeof(lowerTurkish)/U_SIZEOF_UCHAR) || | |
154 | s!=UnicodeString(FALSE, lowerTurkish, s.length()) | |
155 | ) { | |
156 | errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, (int32_t)(sizeof(lowerTurkish)/U_SIZEOF_UCHAR)) + "\""); | |
157 | } | |
158 | ||
159 | /* uppercase with root locale */ | |
160 | s=UnicodeString(FALSE, beforeUpper, (int32_t)(sizeof(beforeUpper)/U_SIZEOF_UCHAR)); | |
73c04bcf | 161 | s.setCharAt(0, beforeUpper[0]).toUpper(Locale("")); |
b75a7d8f A |
162 | if( s.length()!=(sizeof(upperRoot)/U_SIZEOF_UCHAR) || |
163 | s!=UnicodeString(FALSE, upperRoot, s.length()) | |
164 | ) { | |
165 | errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, (int32_t)(sizeof(upperRoot)/U_SIZEOF_UCHAR)) + "\""); | |
166 | } | |
167 | ||
168 | /* uppercase with turkish locale */ | |
169 | s=UnicodeString(FALSE, beforeUpper, (int32_t)(sizeof(beforeUpper)/U_SIZEOF_UCHAR)); | |
170 | s.toUpper(Locale("tr")); | |
171 | if( s.length()!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR) || | |
172 | s!=UnicodeString(FALSE, upperTurkish, s.length()) | |
173 | ) { | |
174 | errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, (int32_t)(sizeof(upperTurkish)/U_SIZEOF_UCHAR)) + "\""); | |
175 | } | |
176 | ||
177 | /* uppercase a short string with root locale */ | |
178 | s=UnicodeString(FALSE, beforeMiniUpper, (int32_t)(sizeof(beforeMiniUpper)/U_SIZEOF_UCHAR)); | |
179 | s.setCharAt(0, beforeMiniUpper[0]).toUpper(""); | |
180 | if( s.length()!=(sizeof(miniUpper)/U_SIZEOF_UCHAR) || | |
181 | s!=UnicodeString(FALSE, miniUpper, s.length()) | |
182 | ) { | |
183 | errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, (int32_t)(sizeof(miniUpper)/U_SIZEOF_UCHAR)) + "\""); | |
184 | } | |
185 | } | |
186 | ||
187 | // test some supplementary characters (>= Unicode 3.1) | |
188 | { | |
189 | UnicodeString t; | |
190 | ||
191 | UnicodeString | |
192 | deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(), | |
193 | deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(), | |
194 | deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape(); | |
195 | (t=deseretInput).toLower(); | |
196 | if(t!=deseretLower) { | |
197 | errln("error lowercasing Deseret (plane 1) characters"); | |
198 | } | |
199 | (t=deseretInput).toUpper(); | |
200 | if(t!=deseretUpper) { | |
201 | errln("error uppercasing Deseret (plane 1) characters"); | |
202 | } | |
203 | } | |
204 | ||
205 | // test some more cases that looked like problems | |
206 | { | |
207 | UnicodeString t; | |
208 | ||
209 | UnicodeString | |
210 | ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(), | |
211 | ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(), | |
212 | ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape(); | |
213 | (t=ljInput).toLower("en"); | |
214 | if(t!=ljLower) { | |
215 | errln("error lowercasing LJ characters"); | |
216 | } | |
217 | (t=ljInput).toUpper("en"); | |
218 | if(t!=ljUpper) { | |
219 | errln("error uppercasing LJ characters"); | |
220 | } | |
221 | } | |
222 | ||
223 | #if !UCONFIG_NO_NORMALIZATION | |
224 | // some context-sensitive casing depends on normalization data being present | |
225 | ||
226 | // Unicode 3.1.1 SpecialCasing tests | |
227 | { | |
228 | UnicodeString t; | |
229 | ||
230 | // sigmas preceded and/or followed by cased letters | |
231 | UnicodeString | |
232 | sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(), | |
233 | sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(), | |
234 | sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(); | |
235 | ||
236 | (t=sigmas).toLower(); | |
237 | if(t!=sigmasLower) { | |
238 | errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\""); | |
239 | } | |
240 | ||
73c04bcf | 241 | (t=sigmas).toUpper(Locale("")); |
b75a7d8f A |
242 | if(t!=sigmasUpper) { |
243 | errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\""); | |
244 | } | |
245 | ||
246 | // turkish & azerbaijani dotless i & dotted I | |
247 | // remove dot above if there was a capital I before and there are no more accents above | |
248 | UnicodeString | |
249 | dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(), | |
250 | dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(), | |
251 | dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape(); | |
252 | ||
253 | (t=dots).toLower("tr"); | |
254 | if(t!=dotsTurkish) { | |
255 | errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\""); | |
256 | } | |
257 | ||
258 | (t=dots).toLower("de"); | |
259 | if(t!=dotsDefault) { | |
260 | errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); | |
261 | } | |
262 | } | |
263 | ||
264 | // more Unicode 3.1.1 tests | |
265 | { | |
266 | UnicodeString t; | |
267 | ||
268 | // lithuanian dot above in uppercasing | |
269 | UnicodeString | |
270 | dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(), | |
271 | dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(), | |
272 | dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape(); | |
273 | ||
274 | (t=dots).toUpper("lt"); | |
275 | if(t!=dotsLithuanian) { | |
276 | errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\""); | |
277 | } | |
278 | ||
279 | (t=dots).toUpper("de"); | |
280 | if(t!=dotsDefault) { | |
281 | errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); | |
282 | } | |
283 | ||
284 | // lithuanian adds dot above to i in lowercasing if there are more above accents | |
285 | UnicodeString | |
286 | i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(), | |
287 | iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(), | |
288 | iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape(); | |
289 | ||
290 | (t=i).toLower("lt"); | |
291 | if(t!=iLithuanian) { | |
292 | errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\""); | |
293 | } | |
294 | ||
295 | (t=i).toLower("de"); | |
296 | if(t!=iDefault) { | |
297 | errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\""); | |
298 | } | |
299 | } | |
300 | ||
301 | #endif | |
302 | ||
303 | // test case folding | |
304 | { | |
305 | UnicodeString | |
306 | s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(), | |
307 | f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(), | |
308 | g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(), | |
309 | t; | |
310 | ||
311 | (t=s).foldCase(); | |
312 | if(f!=t) { | |
313 | errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\""); | |
314 | } | |
315 | ||
316 | // alternate handling for dotted I/dotless i (U+0130, U+0131) | |
317 | (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I); | |
318 | if(g!=t) { | |
319 | errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\""); | |
320 | } | |
321 | } | |
322 | } | |
323 | ||
73c04bcf A |
324 | // data-driven case mapping tests ------------------------------------------ *** |
325 | ||
326 | enum { | |
327 | TEST_LOWER, | |
328 | TEST_UPPER, | |
73c04bcf | 329 | TEST_TITLE, |
46f4442e | 330 | TEST_FOLD, |
73c04bcf A |
331 | TEST_COUNT |
332 | }; | |
b75a7d8f | 333 | |
73c04bcf A |
334 | // names of TestData children in casing.txt |
335 | static const char *const dataNames[TEST_COUNT+1]={ | |
336 | "lowercasing", | |
337 | "uppercasing", | |
73c04bcf | 338 | "titlecasing", |
46f4442e | 339 | "casefolding", |
73c04bcf A |
340 | "" |
341 | }; | |
374ca955 | 342 | |
73c04bcf A |
343 | void |
344 | StringCaseTest::TestCasingImpl(const UnicodeString &input, | |
345 | const UnicodeString &output, | |
346 | int32_t whichCase, | |
46f4442e | 347 | void *iter, const char *localeID, uint32_t options) { |
73c04bcf A |
348 | // UnicodeString |
349 | UnicodeString result; | |
350 | const char *name; | |
46f4442e | 351 | Locale locale(localeID); |
73c04bcf A |
352 | |
353 | result=input; | |
354 | switch(whichCase) { | |
355 | case TEST_LOWER: | |
356 | name="toLower"; | |
46f4442e | 357 | result.toLower(locale); |
73c04bcf A |
358 | break; |
359 | case TEST_UPPER: | |
360 | name="toUpper"; | |
46f4442e A |
361 | result.toUpper(locale); |
362 | break; | |
363 | #if !UCONFIG_NO_BREAK_ITERATION | |
364 | case TEST_TITLE: | |
365 | name="toTitle"; | |
366 | result.toTitle((BreakIterator *)iter, locale, options); | |
367 | break; | |
368 | #endif | |
369 | case TEST_FOLD: | |
370 | name="foldCase"; | |
371 | result.foldCase(options); | |
73c04bcf A |
372 | break; |
373 | default: | |
374 | name=""; | |
375 | break; // won't happen | |
b75a7d8f | 376 | } |
73c04bcf | 377 | if(result!=output) { |
729e4ab9 | 378 | dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); |
b75a7d8f | 379 | } |
46f4442e A |
380 | #if !UCONFIG_NO_BREAK_ITERATION |
381 | if(whichCase==TEST_TITLE && options==0) { | |
382 | result=input; | |
383 | result.toTitle((BreakIterator *)iter, locale); | |
384 | if(result!=output) { | |
729e4ab9 | 385 | dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); |
46f4442e A |
386 | } |
387 | } | |
388 | #endif | |
b75a7d8f | 389 | |
73c04bcf A |
390 | // UTF-8 |
391 | char utf8In[100], utf8Out[100]; | |
392 | int32_t utf8InLength, utf8OutLength, resultLength; | |
393 | UChar *buffer; | |
394 | ||
729e4ab9 A |
395 | IcuTestErrorCode errorCode(*this, "TestCasingImpl"); |
396 | LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); | |
46f4442e A |
397 | #if !UCONFIG_NO_BREAK_ITERATION |
398 | if(iter!=NULL) { | |
399 | // Clone the break iterator so that the UCaseMap can safely adopt it. | |
400 | int32_t size=1; // Not 0 because that only gives preflighting. | |
729e4ab9 A |
401 | UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, &size, errorCode); |
402 | ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); | |
46f4442e A |
403 | } |
404 | #endif | |
73c04bcf | 405 | |
729e4ab9 | 406 | u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); |
73c04bcf A |
407 | switch(whichCase) { |
408 | case TEST_LOWER: | |
409 | name="ucasemap_utf8ToLower"; | |
729e4ab9 | 410 | utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), |
73c04bcf | 411 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 412 | utf8In, utf8InLength, errorCode); |
73c04bcf A |
413 | break; |
414 | case TEST_UPPER: | |
415 | name="ucasemap_utf8ToUpper"; | |
729e4ab9 | 416 | utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), |
73c04bcf | 417 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 418 | utf8In, utf8InLength, errorCode); |
73c04bcf | 419 | break; |
46f4442e A |
420 | #if !UCONFIG_NO_BREAK_ITERATION |
421 | case TEST_TITLE: | |
422 | name="ucasemap_utf8ToTitle"; | |
729e4ab9 | 423 | utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), |
46f4442e | 424 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 425 | utf8In, utf8InLength, errorCode); |
46f4442e A |
426 | break; |
427 | #endif | |
428 | case TEST_FOLD: | |
429 | name="ucasemap_utf8FoldCase"; | |
729e4ab9 | 430 | utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), |
46f4442e | 431 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 432 | utf8In, utf8InLength, errorCode); |
46f4442e | 433 | break; |
73c04bcf A |
434 | default: |
435 | name=""; | |
436 | utf8OutLength=0; | |
437 | break; // won't happen | |
b75a7d8f | 438 | } |
73c04bcf | 439 | buffer=result.getBuffer(utf8OutLength); |
729e4ab9 A |
440 | u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); |
441 | result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); | |
b75a7d8f | 442 | |
729e4ab9 A |
443 | if(errorCode.isFailure()) { |
444 | errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); | |
445 | errorCode.reset(); | |
73c04bcf A |
446 | } else if(result!=output) { |
447 | errln("error: %s() got a wrong result for a test case from casing.res", name); | |
46f4442e | 448 | errln("expected \"" + output + "\" got \"" + result + "\"" ); |
73c04bcf | 449 | } |
73c04bcf A |
450 | } |
451 | ||
73c04bcf A |
452 | void |
453 | StringCaseTest::TestCasing() { | |
454 | UErrorCode status = U_ZERO_ERROR; | |
729e4ab9 A |
455 | #if !UCONFIG_NO_BREAK_ITERATION |
456 | LocalUBreakIteratorPointer iter; | |
457 | #endif | |
73c04bcf | 458 | char cLocaleID[100]; |
46f4442e A |
459 | UnicodeString locale, input, output, optionsString, result; |
460 | uint32_t options; | |
73c04bcf | 461 | int32_t whichCase, type; |
729e4ab9 | 462 | LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status)); |
73c04bcf A |
463 | if(U_SUCCESS(status)) { |
464 | for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) { | |
46f4442e A |
465 | #if UCONFIG_NO_BREAK_ITERATION |
466 | if(whichCase==TEST_TITLE) { | |
467 | continue; | |
468 | } | |
469 | #endif | |
729e4ab9 | 470 | LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status)); |
73c04bcf A |
471 | if(U_FAILURE(status)) { |
472 | errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status)); | |
473 | break; | |
b75a7d8f | 474 | } |
73c04bcf A |
475 | const DataMap *myCase = NULL; |
476 | while(casingTest->nextCase(myCase, status)) { | |
73c04bcf A |
477 | input = myCase->getString("Input", status); |
478 | output = myCase->getString("Output", status); | |
b75a7d8f | 479 | |
46f4442e A |
480 | if(whichCase!=TEST_FOLD) { |
481 | locale = myCase->getString("Locale", status); | |
482 | } | |
483 | locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), ""); | |
484 | ||
46f4442e | 485 | #if !UCONFIG_NO_BREAK_ITERATION |
73c04bcf A |
486 | if(whichCase==TEST_TITLE) { |
487 | type = myCase->getInt("Type", status); | |
488 | if(type>=0) { | |
729e4ab9 | 489 | iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status)); |
46f4442e A |
490 | } else if(type==-2) { |
491 | // Open a trivial break iterator that only delivers { 0, length } | |
492 | // or even just { 0 } as boundaries. | |
493 | static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;" | |
494 | UParseError parseError; | |
729e4ab9 | 495 | iter.adoptInstead(ubrk_openRules(rules, LENGTHOF(rules), NULL, 0, &parseError, &status)); |
73c04bcf A |
496 | } |
497 | } | |
498 | #endif | |
46f4442e A |
499 | options = 0; |
500 | if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) { | |
501 | optionsString = myCase->getString("Options", status); | |
502 | if(optionsString.indexOf((UChar)0x54)>=0) { // T | |
503 | options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I; | |
504 | } | |
505 | if(optionsString.indexOf((UChar)0x4c)>=0) { // L | |
506 | options|=U_TITLECASE_NO_LOWERCASE; | |
507 | } | |
508 | if(optionsString.indexOf((UChar)0x41)>=0) { // A | |
509 | options|=U_TITLECASE_NO_BREAK_ADJUSTMENT; | |
510 | } | |
511 | } | |
73c04bcf A |
512 | |
513 | if(U_FAILURE(status)) { | |
729e4ab9 | 514 | dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status)); |
73c04bcf A |
515 | status = U_ZERO_ERROR; |
516 | } else { | |
729e4ab9 A |
517 | #if UCONFIG_NO_BREAK_ITERATION |
518 | LocalPointer<UMemory> iter; | |
519 | #endif | |
520 | TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options); | |
b75a7d8f | 521 | } |
b75a7d8f | 522 | |
73c04bcf | 523 | #if !UCONFIG_NO_BREAK_ITERATION |
729e4ab9 | 524 | iter.adoptInstead(NULL); |
73c04bcf A |
525 | #endif |
526 | } | |
b75a7d8f | 527 | } |
b75a7d8f A |
528 | } |
529 | ||
73c04bcf A |
530 | #if !UCONFIG_NO_BREAK_ITERATION |
531 | // more tests for API coverage | |
532 | status=U_ZERO_ERROR; | |
533 | input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape(); | |
534 | (result=input).toTitle(NULL); | |
535 | if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) { | |
729e4ab9 | 536 | dataerrln("UnicodeString::toTitle(NULL) failed."); |
73c04bcf | 537 | } |
b75a7d8f A |
538 | #endif |
539 | } | |
4388f060 A |
540 | |
541 | void | |
542 | StringCaseTest::TestFullCaseFoldingIterator() { | |
543 | UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi"); | |
544 | UnicodeString ss=UNICODE_STRING_SIMPLE("ss"); | |
545 | FullCaseFoldingIterator iter; | |
546 | int32_t count=0; | |
547 | int32_t countSpecific=0; | |
548 | UChar32 c; | |
549 | UnicodeString full; | |
550 | while((c=iter.next(full))>=0) { | |
551 | ++count; | |
552 | // Check that the full Case_Folding has more than 1 code point. | |
553 | if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) { | |
554 | errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c); | |
555 | continue; | |
556 | } | |
557 | // Check that full == Case_Folding(c). | |
558 | UnicodeString cf(c); | |
559 | cf.foldCase(); | |
560 | if(full!=cf) { | |
561 | errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c); | |
562 | continue; | |
563 | } | |
564 | // Spot-check a couple of specific cases. | |
565 | if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) { | |
566 | ++countSpecific; | |
567 | } | |
568 | } | |
569 | if(countSpecific!=3) { | |
570 | errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases"); | |
571 | } | |
572 | if(count<70) { | |
573 | errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count); | |
574 | } | |
575 | } |