]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
2ca993e8 | 4 | * Copyright (C) 2002-2016, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: strcase.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2002mar12 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Test file for string casing C++ API functions. | |
17 | */ | |
18 | ||
b75a7d8f | 19 | #include "unicode/uchar.h" |
b75a7d8f A |
20 | #include "unicode/ures.h" |
21 | #include "unicode/uloc.h" | |
22 | #include "unicode/locid.h" | |
23 | #include "unicode/ubrk.h" | |
73c04bcf A |
24 | #include "unicode/unistr.h" |
25 | #include "unicode/ucasemap.h" | |
4388f060 | 26 | #include "ucase.h" |
b75a7d8f | 27 | #include "ustrtest.h" |
374ca955 | 28 | #include "unicode/tstdtmod.h" |
b331163b | 29 | #include "cmemory.h" |
46f4442e | 30 | |
374ca955 | 31 | StringCaseTest::~StringCaseTest() {} |
b75a7d8f A |
32 | |
33 | void | |
34 | StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
4388f060 A |
35 | if(exec) { |
36 | logln("TestSuite StringCaseTest: "); | |
37 | } | |
38 | TESTCASE_AUTO_BEGIN; | |
39 | TESTCASE_AUTO(TestCaseConversion); | |
729e4ab9 | 40 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION |
4388f060 | 41 | TESTCASE_AUTO(TestCasing); |
b75a7d8f | 42 | #endif |
4388f060 | 43 | TESTCASE_AUTO(TestFullCaseFoldingIterator); |
a62d09fc | 44 | TESTCASE_AUTO(TestLongUpper); |
4388f060 | 45 | TESTCASE_AUTO_END; |
b75a7d8f A |
46 | } |
47 | ||
48 | void | |
49 | StringCaseTest::TestCaseConversion() | |
50 | { | |
73c04bcf | 51 | static const UChar uppercaseGreek[] = |
b75a7d8f A |
52 | { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4, |
53 | 0x39f, 0x3a3, 0 }; | |
54 | // "IESUS CHRISTOS" | |
55 | ||
73c04bcf | 56 | static const UChar lowercaseGreek[] = |
b75a7d8f A |
57 | { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4, |
58 | 0x3bf, 0x3c2, 0 }; | |
59 | // "iesus christos" | |
60 | ||
73c04bcf | 61 | static const UChar lowercaseTurkish[] = |
b75a7d8f A |
62 | { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f, |
63 | 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 }; | |
64 | ||
73c04bcf | 65 | static const UChar uppercaseTurkish[] = |
b75a7d8f A |
66 | { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20, |
67 | 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 }; | |
68 | ||
69 | UnicodeString expectedResult; | |
70 | UnicodeString test3; | |
71 | ||
72 | test3 += (UChar32)0x0130; | |
73 | test3 += "STANBUL, NOT CONSTANTINOPLE!"; | |
74 | ||
75 | UnicodeString test4(test3); | |
73c04bcf | 76 | test4.toLower(Locale("")); |
b75a7d8f A |
77 | expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape(); |
78 | if (test4 != expectedResult) | |
79 | errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
80 | ||
81 | test4 = test3; | |
82 | test4.toLower(Locale("tr", "TR")); | |
83 | expectedResult = lowercaseTurkish; | |
84 | if (test4 != expectedResult) | |
85 | errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
86 | ||
87 | test3 = "topkap"; | |
88 | test3 += (UChar32)0x0131; | |
89 | test3 += " palace, istanbul"; | |
90 | test4 = test3; | |
91 | ||
73c04bcf | 92 | test4.toUpper(Locale("")); |
b75a7d8f A |
93 | expectedResult = "TOPKAPI PALACE, ISTANBUL"; |
94 | if (test4 != expectedResult) | |
95 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
96 | ||
97 | test4 = test3; | |
98 | test4.toUpper(Locale("tr", "TR")); | |
99 | expectedResult = uppercaseTurkish; | |
100 | if (test4 != expectedResult) | |
101 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
102 | ||
103 | test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe"); | |
104 | ||
105 | test3.toUpper(Locale("de", "DE")); | |
106 | expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE"); | |
107 | if (test3 != expectedResult) | |
108 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\"."); | |
109 | ||
110 | test4.replace(0, test4.length(), uppercaseGreek); | |
111 | ||
112 | test4.toLower(Locale("el", "GR")); | |
113 | expectedResult = lowercaseGreek; | |
114 | if (test4 != expectedResult) | |
115 | errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
116 | ||
117 | test4.replace(0, test4.length(), lowercaseGreek); | |
118 | ||
119 | test4.toUpper(); | |
120 | expectedResult = uppercaseGreek; | |
121 | if (test4 != expectedResult) | |
122 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
123 | ||
124 | // more string case mapping tests with the new implementation | |
125 | { | |
126 | static const UChar | |
127 | ||
128 | beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff }, | |
129 | lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, | |
130 | lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, | |
131 | ||
132 | beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff }, | |
133 | upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, | |
134 | upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, | |
135 | ||
136 | beforeMiniUpper[]= { 0xdf, 0x61 }, | |
137 | miniUpper[]= { 0x53, 0x53, 0x41 }; | |
138 | ||
139 | UnicodeString s; | |
140 | ||
141 | /* lowercase with root locale */ | |
2ca993e8 | 142 | s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); |
b75a7d8f | 143 | s.toLower(""); |
2ca993e8 | 144 | if( s.length()!=UPRV_LENGTHOF(lowerRoot) || |
b75a7d8f A |
145 | s!=UnicodeString(FALSE, lowerRoot, s.length()) |
146 | ) { | |
2ca993e8 | 147 | errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\""); |
b75a7d8f A |
148 | } |
149 | ||
150 | /* lowercase with turkish locale */ | |
2ca993e8 | 151 | s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); |
b75a7d8f | 152 | s.setCharAt(0, beforeLower[0]).toLower(Locale("tr")); |
2ca993e8 | 153 | if( s.length()!=UPRV_LENGTHOF(lowerTurkish) || |
b75a7d8f A |
154 | s!=UnicodeString(FALSE, lowerTurkish, s.length()) |
155 | ) { | |
2ca993e8 | 156 | errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\""); |
b75a7d8f A |
157 | } |
158 | ||
159 | /* uppercase with root locale */ | |
2ca993e8 | 160 | s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); |
73c04bcf | 161 | s.setCharAt(0, beforeUpper[0]).toUpper(Locale("")); |
2ca993e8 | 162 | if( s.length()!=UPRV_LENGTHOF(upperRoot) || |
b75a7d8f A |
163 | s!=UnicodeString(FALSE, upperRoot, s.length()) |
164 | ) { | |
2ca993e8 | 165 | errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\""); |
b75a7d8f A |
166 | } |
167 | ||
168 | /* uppercase with turkish locale */ | |
2ca993e8 | 169 | s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); |
b75a7d8f | 170 | s.toUpper(Locale("tr")); |
2ca993e8 | 171 | if( s.length()!=UPRV_LENGTHOF(upperTurkish) || |
b75a7d8f A |
172 | s!=UnicodeString(FALSE, upperTurkish, s.length()) |
173 | ) { | |
2ca993e8 | 174 | errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\""); |
b75a7d8f A |
175 | } |
176 | ||
177 | /* uppercase a short string with root locale */ | |
2ca993e8 | 178 | s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper)); |
b75a7d8f | 179 | s.setCharAt(0, beforeMiniUpper[0]).toUpper(""); |
2ca993e8 | 180 | if( s.length()!=UPRV_LENGTHOF(miniUpper) || |
b75a7d8f A |
181 | s!=UnicodeString(FALSE, miniUpper, s.length()) |
182 | ) { | |
2ca993e8 | 183 | errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\""); |
b75a7d8f A |
184 | } |
185 | } | |
186 | ||
187 | // test some supplementary characters (>= Unicode 3.1) | |
188 | { | |
189 | UnicodeString t; | |
190 | ||
191 | UnicodeString | |
192 | deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(), | |
193 | deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(), | |
194 | deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape(); | |
195 | (t=deseretInput).toLower(); | |
196 | if(t!=deseretLower) { | |
197 | errln("error lowercasing Deseret (plane 1) characters"); | |
198 | } | |
199 | (t=deseretInput).toUpper(); | |
200 | if(t!=deseretUpper) { | |
201 | errln("error uppercasing Deseret (plane 1) characters"); | |
202 | } | |
203 | } | |
204 | ||
205 | // test some more cases that looked like problems | |
206 | { | |
207 | UnicodeString t; | |
208 | ||
209 | UnicodeString | |
210 | ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(), | |
211 | ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(), | |
212 | ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape(); | |
213 | (t=ljInput).toLower("en"); | |
214 | if(t!=ljLower) { | |
215 | errln("error lowercasing LJ characters"); | |
216 | } | |
217 | (t=ljInput).toUpper("en"); | |
218 | if(t!=ljUpper) { | |
219 | errln("error uppercasing LJ characters"); | |
220 | } | |
221 | } | |
222 | ||
223 | #if !UCONFIG_NO_NORMALIZATION | |
224 | // some context-sensitive casing depends on normalization data being present | |
225 | ||
226 | // Unicode 3.1.1 SpecialCasing tests | |
227 | { | |
228 | UnicodeString t; | |
229 | ||
230 | // sigmas preceded and/or followed by cased letters | |
231 | UnicodeString | |
232 | sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(), | |
233 | sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(), | |
234 | sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(); | |
235 | ||
236 | (t=sigmas).toLower(); | |
237 | if(t!=sigmasLower) { | |
238 | errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\""); | |
239 | } | |
240 | ||
73c04bcf | 241 | (t=sigmas).toUpper(Locale("")); |
b75a7d8f A |
242 | if(t!=sigmasUpper) { |
243 | errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\""); | |
244 | } | |
245 | ||
246 | // turkish & azerbaijani dotless i & dotted I | |
247 | // remove dot above if there was a capital I before and there are no more accents above | |
248 | UnicodeString | |
249 | dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(), | |
250 | dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(), | |
251 | dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape(); | |
252 | ||
253 | (t=dots).toLower("tr"); | |
254 | if(t!=dotsTurkish) { | |
255 | errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\""); | |
256 | } | |
257 | ||
258 | (t=dots).toLower("de"); | |
259 | if(t!=dotsDefault) { | |
260 | errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); | |
261 | } | |
262 | } | |
263 | ||
264 | // more Unicode 3.1.1 tests | |
265 | { | |
266 | UnicodeString t; | |
267 | ||
268 | // lithuanian dot above in uppercasing | |
269 | UnicodeString | |
270 | dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(), | |
271 | dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(), | |
272 | dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape(); | |
273 | ||
274 | (t=dots).toUpper("lt"); | |
275 | if(t!=dotsLithuanian) { | |
276 | errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\""); | |
277 | } | |
278 | ||
279 | (t=dots).toUpper("de"); | |
280 | if(t!=dotsDefault) { | |
281 | errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); | |
282 | } | |
283 | ||
284 | // lithuanian adds dot above to i in lowercasing if there are more above accents | |
285 | UnicodeString | |
286 | i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(), | |
287 | iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(), | |
288 | iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape(); | |
289 | ||
290 | (t=i).toLower("lt"); | |
291 | if(t!=iLithuanian) { | |
292 | errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\""); | |
293 | } | |
294 | ||
295 | (t=i).toLower("de"); | |
296 | if(t!=iDefault) { | |
297 | errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\""); | |
298 | } | |
299 | } | |
300 | ||
301 | #endif | |
302 | ||
303 | // test case folding | |
304 | { | |
305 | UnicodeString | |
306 | s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(), | |
307 | f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(), | |
308 | g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(), | |
309 | t; | |
310 | ||
311 | (t=s).foldCase(); | |
312 | if(f!=t) { | |
313 | errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\""); | |
314 | } | |
315 | ||
316 | // alternate handling for dotted I/dotless i (U+0130, U+0131) | |
317 | (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I); | |
318 | if(g!=t) { | |
319 | errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\""); | |
320 | } | |
321 | } | |
322 | } | |
323 | ||
73c04bcf A |
324 | // data-driven case mapping tests ------------------------------------------ *** |
325 | ||
326 | enum { | |
327 | TEST_LOWER, | |
328 | TEST_UPPER, | |
73c04bcf | 329 | TEST_TITLE, |
46f4442e | 330 | TEST_FOLD, |
73c04bcf A |
331 | TEST_COUNT |
332 | }; | |
b75a7d8f | 333 | |
73c04bcf A |
334 | // names of TestData children in casing.txt |
335 | static const char *const dataNames[TEST_COUNT+1]={ | |
336 | "lowercasing", | |
337 | "uppercasing", | |
73c04bcf | 338 | "titlecasing", |
46f4442e | 339 | "casefolding", |
73c04bcf A |
340 | "" |
341 | }; | |
374ca955 | 342 | |
73c04bcf A |
343 | void |
344 | StringCaseTest::TestCasingImpl(const UnicodeString &input, | |
345 | const UnicodeString &output, | |
346 | int32_t whichCase, | |
46f4442e | 347 | void *iter, const char *localeID, uint32_t options) { |
73c04bcf A |
348 | // UnicodeString |
349 | UnicodeString result; | |
350 | const char *name; | |
46f4442e | 351 | Locale locale(localeID); |
73c04bcf A |
352 | |
353 | result=input; | |
354 | switch(whichCase) { | |
355 | case TEST_LOWER: | |
356 | name="toLower"; | |
46f4442e | 357 | result.toLower(locale); |
73c04bcf A |
358 | break; |
359 | case TEST_UPPER: | |
360 | name="toUpper"; | |
46f4442e A |
361 | result.toUpper(locale); |
362 | break; | |
363 | #if !UCONFIG_NO_BREAK_ITERATION | |
364 | case TEST_TITLE: | |
365 | name="toTitle"; | |
366 | result.toTitle((BreakIterator *)iter, locale, options); | |
367 | break; | |
368 | #endif | |
369 | case TEST_FOLD: | |
370 | name="foldCase"; | |
371 | result.foldCase(options); | |
73c04bcf A |
372 | break; |
373 | default: | |
374 | name=""; | |
375 | break; // won't happen | |
b75a7d8f | 376 | } |
73c04bcf | 377 | if(result!=output) { |
729e4ab9 | 378 | dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); |
b75a7d8f | 379 | } |
46f4442e A |
380 | #if !UCONFIG_NO_BREAK_ITERATION |
381 | if(whichCase==TEST_TITLE && options==0) { | |
382 | result=input; | |
383 | result.toTitle((BreakIterator *)iter, locale); | |
384 | if(result!=output) { | |
729e4ab9 | 385 | dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); |
46f4442e A |
386 | } |
387 | } | |
388 | #endif | |
b75a7d8f | 389 | |
73c04bcf A |
390 | // UTF-8 |
391 | char utf8In[100], utf8Out[100]; | |
392 | int32_t utf8InLength, utf8OutLength, resultLength; | |
393 | UChar *buffer; | |
394 | ||
729e4ab9 A |
395 | IcuTestErrorCode errorCode(*this, "TestCasingImpl"); |
396 | LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); | |
46f4442e A |
397 | #if !UCONFIG_NO_BREAK_ITERATION |
398 | if(iter!=NULL) { | |
399 | // Clone the break iterator so that the UCaseMap can safely adopt it. | |
57a6839d | 400 | UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode); |
729e4ab9 | 401 | ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); |
46f4442e A |
402 | } |
403 | #endif | |
73c04bcf | 404 | |
729e4ab9 | 405 | u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); |
73c04bcf A |
406 | switch(whichCase) { |
407 | case TEST_LOWER: | |
408 | name="ucasemap_utf8ToLower"; | |
729e4ab9 | 409 | utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), |
73c04bcf | 410 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 411 | utf8In, utf8InLength, errorCode); |
73c04bcf A |
412 | break; |
413 | case TEST_UPPER: | |
414 | name="ucasemap_utf8ToUpper"; | |
729e4ab9 | 415 | utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), |
73c04bcf | 416 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 417 | utf8In, utf8InLength, errorCode); |
73c04bcf | 418 | break; |
46f4442e A |
419 | #if !UCONFIG_NO_BREAK_ITERATION |
420 | case TEST_TITLE: | |
421 | name="ucasemap_utf8ToTitle"; | |
729e4ab9 | 422 | utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), |
46f4442e | 423 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 424 | utf8In, utf8InLength, errorCode); |
46f4442e A |
425 | break; |
426 | #endif | |
427 | case TEST_FOLD: | |
428 | name="ucasemap_utf8FoldCase"; | |
729e4ab9 | 429 | utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), |
46f4442e | 430 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 431 | utf8In, utf8InLength, errorCode); |
46f4442e | 432 | break; |
73c04bcf A |
433 | default: |
434 | name=""; | |
435 | utf8OutLength=0; | |
436 | break; // won't happen | |
b75a7d8f | 437 | } |
73c04bcf | 438 | buffer=result.getBuffer(utf8OutLength); |
729e4ab9 A |
439 | u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); |
440 | result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); | |
b75a7d8f | 441 | |
729e4ab9 A |
442 | if(errorCode.isFailure()) { |
443 | errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); | |
444 | errorCode.reset(); | |
73c04bcf A |
445 | } else if(result!=output) { |
446 | errln("error: %s() got a wrong result for a test case from casing.res", name); | |
46f4442e | 447 | errln("expected \"" + output + "\" got \"" + result + "\"" ); |
73c04bcf | 448 | } |
73c04bcf A |
449 | } |
450 | ||
73c04bcf A |
451 | void |
452 | StringCaseTest::TestCasing() { | |
453 | UErrorCode status = U_ZERO_ERROR; | |
729e4ab9 A |
454 | #if !UCONFIG_NO_BREAK_ITERATION |
455 | LocalUBreakIteratorPointer iter; | |
456 | #endif | |
73c04bcf | 457 | char cLocaleID[100]; |
46f4442e A |
458 | UnicodeString locale, input, output, optionsString, result; |
459 | uint32_t options; | |
73c04bcf | 460 | int32_t whichCase, type; |
729e4ab9 | 461 | LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status)); |
73c04bcf A |
462 | if(U_SUCCESS(status)) { |
463 | for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) { | |
46f4442e A |
464 | #if UCONFIG_NO_BREAK_ITERATION |
465 | if(whichCase==TEST_TITLE) { | |
466 | continue; | |
467 | } | |
468 | #endif | |
729e4ab9 | 469 | LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status)); |
73c04bcf A |
470 | if(U_FAILURE(status)) { |
471 | errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status)); | |
472 | break; | |
b75a7d8f | 473 | } |
73c04bcf A |
474 | const DataMap *myCase = NULL; |
475 | while(casingTest->nextCase(myCase, status)) { | |
73c04bcf A |
476 | input = myCase->getString("Input", status); |
477 | output = myCase->getString("Output", status); | |
b75a7d8f | 478 | |
46f4442e A |
479 | if(whichCase!=TEST_FOLD) { |
480 | locale = myCase->getString("Locale", status); | |
481 | } | |
482 | locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), ""); | |
483 | ||
46f4442e | 484 | #if !UCONFIG_NO_BREAK_ITERATION |
73c04bcf A |
485 | if(whichCase==TEST_TITLE) { |
486 | type = myCase->getInt("Type", status); | |
487 | if(type>=0) { | |
729e4ab9 | 488 | iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status)); |
46f4442e A |
489 | } else if(type==-2) { |
490 | // Open a trivial break iterator that only delivers { 0, length } | |
491 | // or even just { 0 } as boundaries. | |
492 | static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;" | |
493 | UParseError parseError; | |
b331163b | 494 | iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status)); |
73c04bcf A |
495 | } |
496 | } | |
497 | #endif | |
46f4442e A |
498 | options = 0; |
499 | if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) { | |
500 | optionsString = myCase->getString("Options", status); | |
501 | if(optionsString.indexOf((UChar)0x54)>=0) { // T | |
502 | options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I; | |
503 | } | |
504 | if(optionsString.indexOf((UChar)0x4c)>=0) { // L | |
505 | options|=U_TITLECASE_NO_LOWERCASE; | |
506 | } | |
507 | if(optionsString.indexOf((UChar)0x41)>=0) { // A | |
508 | options|=U_TITLECASE_NO_BREAK_ADJUSTMENT; | |
509 | } | |
510 | } | |
73c04bcf A |
511 | |
512 | if(U_FAILURE(status)) { | |
729e4ab9 | 513 | dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status)); |
73c04bcf A |
514 | status = U_ZERO_ERROR; |
515 | } else { | |
729e4ab9 A |
516 | #if UCONFIG_NO_BREAK_ITERATION |
517 | LocalPointer<UMemory> iter; | |
518 | #endif | |
519 | TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options); | |
b75a7d8f | 520 | } |
b75a7d8f | 521 | |
73c04bcf | 522 | #if !UCONFIG_NO_BREAK_ITERATION |
729e4ab9 | 523 | iter.adoptInstead(NULL); |
73c04bcf A |
524 | #endif |
525 | } | |
b75a7d8f | 526 | } |
b75a7d8f A |
527 | } |
528 | ||
73c04bcf A |
529 | #if !UCONFIG_NO_BREAK_ITERATION |
530 | // more tests for API coverage | |
531 | status=U_ZERO_ERROR; | |
532 | input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape(); | |
533 | (result=input).toTitle(NULL); | |
534 | if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) { | |
729e4ab9 | 535 | dataerrln("UnicodeString::toTitle(NULL) failed."); |
73c04bcf | 536 | } |
b75a7d8f A |
537 | #endif |
538 | } | |
4388f060 A |
539 | |
540 | void | |
541 | StringCaseTest::TestFullCaseFoldingIterator() { | |
542 | UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi"); | |
543 | UnicodeString ss=UNICODE_STRING_SIMPLE("ss"); | |
544 | FullCaseFoldingIterator iter; | |
545 | int32_t count=0; | |
546 | int32_t countSpecific=0; | |
547 | UChar32 c; | |
548 | UnicodeString full; | |
549 | while((c=iter.next(full))>=0) { | |
550 | ++count; | |
551 | // Check that the full Case_Folding has more than 1 code point. | |
552 | if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) { | |
553 | errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c); | |
554 | continue; | |
555 | } | |
556 | // Check that full == Case_Folding(c). | |
557 | UnicodeString cf(c); | |
558 | cf.foldCase(); | |
559 | if(full!=cf) { | |
560 | errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c); | |
561 | continue; | |
562 | } | |
563 | // Spot-check a couple of specific cases. | |
564 | if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) { | |
565 | ++countSpecific; | |
566 | } | |
567 | } | |
568 | if(countSpecific!=3) { | |
569 | errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases"); | |
570 | } | |
571 | if(count<70) { | |
572 | errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count); | |
573 | } | |
574 | } | |
a62d09fc A |
575 | |
576 | void | |
577 | StringCaseTest::TestLongUpper() { | |
578 | if (quick) { | |
579 | logln("not exhaustive mode: skipping this test"); | |
580 | return; | |
581 | } | |
582 | // Ticket #12663, crash with an extremely long string where | |
583 | // U+0390 maps to 0399 0308 0301 so that the result is three times as long | |
584 | // and overflows an int32_t. | |
585 | int32_t length = 0x40000004; // more than 1G UChars | |
586 | UnicodeString s(length, (UChar32)0x390, length); | |
587 | UnicodeString result; | |
588 | UChar *dest = result.getBuffer(length + 1); | |
589 | if (s.isBogus() || dest == NULL) { | |
590 | logln("Out of memory, unable to run this test on this machine."); | |
591 | return; | |
592 | } | |
593 | IcuTestErrorCode errorCode(*this, "TestLongUpper"); | |
594 | int32_t destLength = u_strToUpper(dest, result.getCapacity(), | |
595 | s.getBuffer(), s.length(), "", errorCode); | |
596 | result.releaseBuffer(destLength); | |
597 | if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) { | |
598 | errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)", | |
599 | errorCode.errorName(), (long)destLength); | |
600 | } | |
601 | } |