]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
2ca993e8 | 6 | * Copyright (C) 2002-2016, International Business Machines |
b75a7d8f A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: strcase.cpp | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
b75a7d8f A |
12 | * tab size: 8 (not used) |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2002mar12 | |
16 | * created by: Markus W. Scherer | |
17 | * | |
18 | * Test file for string casing C++ API functions. | |
19 | */ | |
20 | ||
f3c0d7a5 A |
21 | #include "unicode/std_string.h" |
22 | #include "unicode/casemap.h" | |
23 | #include "unicode/edits.h" | |
b75a7d8f | 24 | #include "unicode/uchar.h" |
b75a7d8f A |
25 | #include "unicode/ures.h" |
26 | #include "unicode/uloc.h" | |
27 | #include "unicode/locid.h" | |
28 | #include "unicode/ubrk.h" | |
73c04bcf A |
29 | #include "unicode/unistr.h" |
30 | #include "unicode/ucasemap.h" | |
4388f060 | 31 | #include "ucase.h" |
b75a7d8f | 32 | #include "ustrtest.h" |
374ca955 | 33 | #include "unicode/tstdtmod.h" |
b331163b | 34 | #include "cmemory.h" |
46f4442e | 35 | |
f3c0d7a5 A |
36 | struct EditChange { |
37 | UBool change; | |
38 | int32_t oldLength, newLength; | |
39 | }; | |
40 | ||
41 | class StringCaseTest: public IntlTest { | |
42 | public: | |
43 | StringCaseTest(); | |
44 | virtual ~StringCaseTest(); | |
45 | ||
46 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); | |
47 | ||
48 | void TestCaseConversion(); | |
49 | ||
50 | void TestCasingImpl(const UnicodeString &input, | |
51 | const UnicodeString &output, | |
52 | int32_t whichCase, | |
53 | void *iter, const char *localeID, uint32_t options); | |
54 | void TestCasing(); | |
55 | void TestFullCaseFoldingIterator(); | |
56 | void TestGreekUpper(); | |
57 | void TestLongUpper(); | |
58 | void TestMalformedUTF8(); | |
59 | void TestBufferOverflow(); | |
60 | void TestEdits(); | |
61 | void TestCaseMapWithEdits(); | |
62 | void TestCaseMapUTF8WithEdits(); | |
63 | void TestLongUnicodeString(); | |
64 | void TestBug13127(); | |
65 | ||
66 | private: | |
67 | void assertGreekUpper(const char16_t *s, const char16_t *expected); | |
68 | void checkEditsIter( | |
69 | const UnicodeString &name, Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators | |
70 | const EditChange expected[], int32_t expLength, UBool withUnchanged, | |
71 | UErrorCode &errorCode); | |
72 | ||
73 | Locale GREEK_LOCALE_; | |
74 | }; | |
75 | ||
76 | StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {} | |
77 | ||
374ca955 | 78 | StringCaseTest::~StringCaseTest() {} |
b75a7d8f | 79 | |
f3c0d7a5 A |
80 | extern IntlTest *createStringCaseTest() { |
81 | return new StringCaseTest(); | |
82 | } | |
83 | ||
b75a7d8f A |
84 | void |
85 | StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
4388f060 A |
86 | if(exec) { |
87 | logln("TestSuite StringCaseTest: "); | |
88 | } | |
89 | TESTCASE_AUTO_BEGIN; | |
90 | TESTCASE_AUTO(TestCaseConversion); | |
729e4ab9 | 91 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION |
4388f060 | 92 | TESTCASE_AUTO(TestCasing); |
f3c0d7a5 | 93 | TESTCASE_AUTO(TestBug13127); |
b75a7d8f | 94 | #endif |
4388f060 | 95 | TESTCASE_AUTO(TestFullCaseFoldingIterator); |
f3c0d7a5 | 96 | TESTCASE_AUTO(TestGreekUpper); |
a62d09fc | 97 | TESTCASE_AUTO(TestLongUpper); |
f3c0d7a5 A |
98 | TESTCASE_AUTO(TestMalformedUTF8); |
99 | TESTCASE_AUTO(TestBufferOverflow); | |
100 | TESTCASE_AUTO(TestEdits); | |
101 | TESTCASE_AUTO(TestCaseMapWithEdits); | |
102 | TESTCASE_AUTO(TestCaseMapUTF8WithEdits); | |
103 | TESTCASE_AUTO(TestLongUnicodeString); | |
4388f060 | 104 | TESTCASE_AUTO_END; |
b75a7d8f A |
105 | } |
106 | ||
107 | void | |
108 | StringCaseTest::TestCaseConversion() | |
109 | { | |
73c04bcf | 110 | static const UChar uppercaseGreek[] = |
b75a7d8f A |
111 | { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4, |
112 | 0x39f, 0x3a3, 0 }; | |
113 | // "IESUS CHRISTOS" | |
114 | ||
73c04bcf | 115 | static const UChar lowercaseGreek[] = |
b75a7d8f A |
116 | { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4, |
117 | 0x3bf, 0x3c2, 0 }; | |
118 | // "iesus christos" | |
119 | ||
73c04bcf | 120 | static const UChar lowercaseTurkish[] = |
b75a7d8f A |
121 | { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f, |
122 | 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 }; | |
123 | ||
73c04bcf | 124 | static const UChar uppercaseTurkish[] = |
b75a7d8f A |
125 | { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20, |
126 | 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 }; | |
127 | ||
128 | UnicodeString expectedResult; | |
129 | UnicodeString test3; | |
130 | ||
131 | test3 += (UChar32)0x0130; | |
132 | test3 += "STANBUL, NOT CONSTANTINOPLE!"; | |
133 | ||
134 | UnicodeString test4(test3); | |
73c04bcf | 135 | test4.toLower(Locale("")); |
b75a7d8f A |
136 | expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape(); |
137 | if (test4 != expectedResult) | |
138 | errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
139 | ||
140 | test4 = test3; | |
141 | test4.toLower(Locale("tr", "TR")); | |
142 | expectedResult = lowercaseTurkish; | |
143 | if (test4 != expectedResult) | |
144 | errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
145 | ||
146 | test3 = "topkap"; | |
147 | test3 += (UChar32)0x0131; | |
148 | test3 += " palace, istanbul"; | |
149 | test4 = test3; | |
150 | ||
73c04bcf | 151 | test4.toUpper(Locale("")); |
b75a7d8f A |
152 | expectedResult = "TOPKAPI PALACE, ISTANBUL"; |
153 | if (test4 != expectedResult) | |
154 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
155 | ||
156 | test4 = test3; | |
157 | test4.toUpper(Locale("tr", "TR")); | |
158 | expectedResult = uppercaseTurkish; | |
159 | if (test4 != expectedResult) | |
160 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
161 | ||
162 | test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe"); | |
163 | ||
164 | test3.toUpper(Locale("de", "DE")); | |
165 | expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE"); | |
166 | if (test3 != expectedResult) | |
167 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\"."); | |
168 | ||
169 | test4.replace(0, test4.length(), uppercaseGreek); | |
170 | ||
171 | test4.toLower(Locale("el", "GR")); | |
172 | expectedResult = lowercaseGreek; | |
173 | if (test4 != expectedResult) | |
174 | errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
175 | ||
176 | test4.replace(0, test4.length(), lowercaseGreek); | |
177 | ||
178 | test4.toUpper(); | |
179 | expectedResult = uppercaseGreek; | |
180 | if (test4 != expectedResult) | |
181 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
182 | ||
183 | // more string case mapping tests with the new implementation | |
184 | { | |
185 | static const UChar | |
186 | ||
187 | beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff }, | |
188 | lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, | |
189 | lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, | |
190 | ||
191 | beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff }, | |
192 | upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, | |
193 | upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, | |
194 | ||
195 | beforeMiniUpper[]= { 0xdf, 0x61 }, | |
196 | miniUpper[]= { 0x53, 0x53, 0x41 }; | |
197 | ||
198 | UnicodeString s; | |
199 | ||
200 | /* lowercase with root locale */ | |
2ca993e8 | 201 | s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); |
b75a7d8f | 202 | s.toLower(""); |
2ca993e8 | 203 | if( s.length()!=UPRV_LENGTHOF(lowerRoot) || |
b75a7d8f A |
204 | s!=UnicodeString(FALSE, lowerRoot, s.length()) |
205 | ) { | |
2ca993e8 | 206 | errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\""); |
b75a7d8f A |
207 | } |
208 | ||
209 | /* lowercase with turkish locale */ | |
2ca993e8 | 210 | s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); |
b75a7d8f | 211 | s.setCharAt(0, beforeLower[0]).toLower(Locale("tr")); |
2ca993e8 | 212 | if( s.length()!=UPRV_LENGTHOF(lowerTurkish) || |
b75a7d8f A |
213 | s!=UnicodeString(FALSE, lowerTurkish, s.length()) |
214 | ) { | |
2ca993e8 | 215 | errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\""); |
b75a7d8f A |
216 | } |
217 | ||
218 | /* uppercase with root locale */ | |
2ca993e8 | 219 | s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); |
73c04bcf | 220 | s.setCharAt(0, beforeUpper[0]).toUpper(Locale("")); |
2ca993e8 | 221 | if( s.length()!=UPRV_LENGTHOF(upperRoot) || |
b75a7d8f A |
222 | s!=UnicodeString(FALSE, upperRoot, s.length()) |
223 | ) { | |
2ca993e8 | 224 | errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\""); |
b75a7d8f A |
225 | } |
226 | ||
227 | /* uppercase with turkish locale */ | |
2ca993e8 | 228 | s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); |
b75a7d8f | 229 | s.toUpper(Locale("tr")); |
2ca993e8 | 230 | if( s.length()!=UPRV_LENGTHOF(upperTurkish) || |
b75a7d8f A |
231 | s!=UnicodeString(FALSE, upperTurkish, s.length()) |
232 | ) { | |
2ca993e8 | 233 | errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\""); |
b75a7d8f A |
234 | } |
235 | ||
236 | /* uppercase a short string with root locale */ | |
2ca993e8 | 237 | s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper)); |
b75a7d8f | 238 | s.setCharAt(0, beforeMiniUpper[0]).toUpper(""); |
2ca993e8 | 239 | if( s.length()!=UPRV_LENGTHOF(miniUpper) || |
b75a7d8f A |
240 | s!=UnicodeString(FALSE, miniUpper, s.length()) |
241 | ) { | |
2ca993e8 | 242 | errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\""); |
b75a7d8f A |
243 | } |
244 | } | |
245 | ||
246 | // test some supplementary characters (>= Unicode 3.1) | |
247 | { | |
248 | UnicodeString t; | |
249 | ||
250 | UnicodeString | |
251 | deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(), | |
252 | deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(), | |
253 | deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape(); | |
254 | (t=deseretInput).toLower(); | |
255 | if(t!=deseretLower) { | |
256 | errln("error lowercasing Deseret (plane 1) characters"); | |
257 | } | |
258 | (t=deseretInput).toUpper(); | |
259 | if(t!=deseretUpper) { | |
260 | errln("error uppercasing Deseret (plane 1) characters"); | |
261 | } | |
262 | } | |
263 | ||
264 | // test some more cases that looked like problems | |
265 | { | |
266 | UnicodeString t; | |
267 | ||
268 | UnicodeString | |
269 | ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(), | |
270 | ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(), | |
271 | ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape(); | |
272 | (t=ljInput).toLower("en"); | |
273 | if(t!=ljLower) { | |
274 | errln("error lowercasing LJ characters"); | |
275 | } | |
276 | (t=ljInput).toUpper("en"); | |
277 | if(t!=ljUpper) { | |
278 | errln("error uppercasing LJ characters"); | |
279 | } | |
280 | } | |
281 | ||
282 | #if !UCONFIG_NO_NORMALIZATION | |
283 | // some context-sensitive casing depends on normalization data being present | |
284 | ||
285 | // Unicode 3.1.1 SpecialCasing tests | |
286 | { | |
287 | UnicodeString t; | |
288 | ||
289 | // sigmas preceded and/or followed by cased letters | |
290 | UnicodeString | |
291 | sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(), | |
292 | sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(), | |
293 | sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(); | |
294 | ||
295 | (t=sigmas).toLower(); | |
296 | if(t!=sigmasLower) { | |
297 | errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\""); | |
298 | } | |
299 | ||
73c04bcf | 300 | (t=sigmas).toUpper(Locale("")); |
b75a7d8f A |
301 | if(t!=sigmasUpper) { |
302 | errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\""); | |
303 | } | |
304 | ||
305 | // turkish & azerbaijani dotless i & dotted I | |
306 | // remove dot above if there was a capital I before and there are no more accents above | |
307 | UnicodeString | |
308 | dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(), | |
309 | dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(), | |
310 | dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape(); | |
311 | ||
312 | (t=dots).toLower("tr"); | |
313 | if(t!=dotsTurkish) { | |
314 | errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\""); | |
315 | } | |
316 | ||
317 | (t=dots).toLower("de"); | |
318 | if(t!=dotsDefault) { | |
319 | errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); | |
320 | } | |
321 | } | |
322 | ||
323 | // more Unicode 3.1.1 tests | |
324 | { | |
325 | UnicodeString t; | |
326 | ||
327 | // lithuanian dot above in uppercasing | |
328 | UnicodeString | |
329 | dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(), | |
330 | dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(), | |
331 | dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape(); | |
332 | ||
333 | (t=dots).toUpper("lt"); | |
334 | if(t!=dotsLithuanian) { | |
335 | errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\""); | |
336 | } | |
337 | ||
338 | (t=dots).toUpper("de"); | |
339 | if(t!=dotsDefault) { | |
340 | errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); | |
341 | } | |
342 | ||
343 | // lithuanian adds dot above to i in lowercasing if there are more above accents | |
344 | UnicodeString | |
345 | i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(), | |
346 | iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(), | |
347 | iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape(); | |
348 | ||
349 | (t=i).toLower("lt"); | |
350 | if(t!=iLithuanian) { | |
351 | errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\""); | |
352 | } | |
353 | ||
354 | (t=i).toLower("de"); | |
355 | if(t!=iDefault) { | |
356 | errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\""); | |
357 | } | |
358 | } | |
359 | ||
360 | #endif | |
361 | ||
362 | // test case folding | |
363 | { | |
364 | UnicodeString | |
365 | s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(), | |
366 | f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(), | |
367 | g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(), | |
368 | t; | |
369 | ||
370 | (t=s).foldCase(); | |
371 | if(f!=t) { | |
372 | errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\""); | |
373 | } | |
374 | ||
375 | // alternate handling for dotted I/dotless i (U+0130, U+0131) | |
376 | (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I); | |
377 | if(g!=t) { | |
378 | errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\""); | |
379 | } | |
380 | } | |
381 | } | |
382 | ||
73c04bcf A |
383 | // data-driven case mapping tests ------------------------------------------ *** |
384 | ||
385 | enum { | |
386 | TEST_LOWER, | |
387 | TEST_UPPER, | |
73c04bcf | 388 | TEST_TITLE, |
46f4442e | 389 | TEST_FOLD, |
73c04bcf A |
390 | TEST_COUNT |
391 | }; | |
b75a7d8f | 392 | |
73c04bcf A |
393 | // names of TestData children in casing.txt |
394 | static const char *const dataNames[TEST_COUNT+1]={ | |
395 | "lowercasing", | |
396 | "uppercasing", | |
73c04bcf | 397 | "titlecasing", |
46f4442e | 398 | "casefolding", |
73c04bcf A |
399 | "" |
400 | }; | |
374ca955 | 401 | |
73c04bcf A |
402 | void |
403 | StringCaseTest::TestCasingImpl(const UnicodeString &input, | |
404 | const UnicodeString &output, | |
405 | int32_t whichCase, | |
46f4442e | 406 | void *iter, const char *localeID, uint32_t options) { |
73c04bcf A |
407 | // UnicodeString |
408 | UnicodeString result; | |
409 | const char *name; | |
46f4442e | 410 | Locale locale(localeID); |
73c04bcf A |
411 | |
412 | result=input; | |
413 | switch(whichCase) { | |
414 | case TEST_LOWER: | |
415 | name="toLower"; | |
46f4442e | 416 | result.toLower(locale); |
73c04bcf A |
417 | break; |
418 | case TEST_UPPER: | |
419 | name="toUpper"; | |
46f4442e A |
420 | result.toUpper(locale); |
421 | break; | |
422 | #if !UCONFIG_NO_BREAK_ITERATION | |
423 | case TEST_TITLE: | |
424 | name="toTitle"; | |
425 | result.toTitle((BreakIterator *)iter, locale, options); | |
426 | break; | |
427 | #endif | |
428 | case TEST_FOLD: | |
429 | name="foldCase"; | |
430 | result.foldCase(options); | |
73c04bcf A |
431 | break; |
432 | default: | |
433 | name=""; | |
434 | break; // won't happen | |
b75a7d8f | 435 | } |
73c04bcf | 436 | if(result!=output) { |
729e4ab9 | 437 | dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); |
b75a7d8f | 438 | } |
46f4442e A |
439 | #if !UCONFIG_NO_BREAK_ITERATION |
440 | if(whichCase==TEST_TITLE && options==0) { | |
441 | result=input; | |
442 | result.toTitle((BreakIterator *)iter, locale); | |
443 | if(result!=output) { | |
729e4ab9 | 444 | dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); |
46f4442e A |
445 | } |
446 | } | |
447 | #endif | |
b75a7d8f | 448 | |
73c04bcf A |
449 | // UTF-8 |
450 | char utf8In[100], utf8Out[100]; | |
451 | int32_t utf8InLength, utf8OutLength, resultLength; | |
452 | UChar *buffer; | |
453 | ||
729e4ab9 A |
454 | IcuTestErrorCode errorCode(*this, "TestCasingImpl"); |
455 | LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); | |
46f4442e A |
456 | #if !UCONFIG_NO_BREAK_ITERATION |
457 | if(iter!=NULL) { | |
458 | // Clone the break iterator so that the UCaseMap can safely adopt it. | |
57a6839d | 459 | UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode); |
729e4ab9 | 460 | ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); |
46f4442e A |
461 | } |
462 | #endif | |
73c04bcf | 463 | |
729e4ab9 | 464 | u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); |
73c04bcf A |
465 | switch(whichCase) { |
466 | case TEST_LOWER: | |
467 | name="ucasemap_utf8ToLower"; | |
729e4ab9 | 468 | utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), |
73c04bcf | 469 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 470 | utf8In, utf8InLength, errorCode); |
73c04bcf A |
471 | break; |
472 | case TEST_UPPER: | |
473 | name="ucasemap_utf8ToUpper"; | |
729e4ab9 | 474 | utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), |
73c04bcf | 475 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 476 | utf8In, utf8InLength, errorCode); |
73c04bcf | 477 | break; |
46f4442e A |
478 | #if !UCONFIG_NO_BREAK_ITERATION |
479 | case TEST_TITLE: | |
480 | name="ucasemap_utf8ToTitle"; | |
729e4ab9 | 481 | utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), |
46f4442e | 482 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 483 | utf8In, utf8InLength, errorCode); |
46f4442e A |
484 | break; |
485 | #endif | |
486 | case TEST_FOLD: | |
487 | name="ucasemap_utf8FoldCase"; | |
729e4ab9 | 488 | utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), |
46f4442e | 489 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 490 | utf8In, utf8InLength, errorCode); |
46f4442e | 491 | break; |
73c04bcf A |
492 | default: |
493 | name=""; | |
494 | utf8OutLength=0; | |
495 | break; // won't happen | |
b75a7d8f | 496 | } |
73c04bcf | 497 | buffer=result.getBuffer(utf8OutLength); |
729e4ab9 A |
498 | u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); |
499 | result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); | |
b75a7d8f | 500 | |
729e4ab9 A |
501 | if(errorCode.isFailure()) { |
502 | errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); | |
503 | errorCode.reset(); | |
73c04bcf A |
504 | } else if(result!=output) { |
505 | errln("error: %s() got a wrong result for a test case from casing.res", name); | |
46f4442e | 506 | errln("expected \"" + output + "\" got \"" + result + "\"" ); |
73c04bcf | 507 | } |
73c04bcf A |
508 | } |
509 | ||
73c04bcf A |
510 | void |
511 | StringCaseTest::TestCasing() { | |
512 | UErrorCode status = U_ZERO_ERROR; | |
729e4ab9 A |
513 | #if !UCONFIG_NO_BREAK_ITERATION |
514 | LocalUBreakIteratorPointer iter; | |
515 | #endif | |
73c04bcf | 516 | char cLocaleID[100]; |
46f4442e A |
517 | UnicodeString locale, input, output, optionsString, result; |
518 | uint32_t options; | |
73c04bcf | 519 | int32_t whichCase, type; |
729e4ab9 | 520 | LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status)); |
73c04bcf A |
521 | if(U_SUCCESS(status)) { |
522 | for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) { | |
46f4442e A |
523 | #if UCONFIG_NO_BREAK_ITERATION |
524 | if(whichCase==TEST_TITLE) { | |
525 | continue; | |
526 | } | |
527 | #endif | |
729e4ab9 | 528 | LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status)); |
73c04bcf A |
529 | if(U_FAILURE(status)) { |
530 | errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status)); | |
531 | break; | |
b75a7d8f | 532 | } |
73c04bcf A |
533 | const DataMap *myCase = NULL; |
534 | while(casingTest->nextCase(myCase, status)) { | |
73c04bcf A |
535 | input = myCase->getString("Input", status); |
536 | output = myCase->getString("Output", status); | |
b75a7d8f | 537 | |
46f4442e A |
538 | if(whichCase!=TEST_FOLD) { |
539 | locale = myCase->getString("Locale", status); | |
540 | } | |
541 | locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), ""); | |
542 | ||
46f4442e | 543 | #if !UCONFIG_NO_BREAK_ITERATION |
73c04bcf A |
544 | if(whichCase==TEST_TITLE) { |
545 | type = myCase->getInt("Type", status); | |
546 | if(type>=0) { | |
729e4ab9 | 547 | iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status)); |
46f4442e A |
548 | } else if(type==-2) { |
549 | // Open a trivial break iterator that only delivers { 0, length } | |
550 | // or even just { 0 } as boundaries. | |
551 | static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;" | |
552 | UParseError parseError; | |
b331163b | 553 | iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status)); |
73c04bcf A |
554 | } |
555 | } | |
556 | #endif | |
46f4442e A |
557 | options = 0; |
558 | if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) { | |
559 | optionsString = myCase->getString("Options", status); | |
560 | if(optionsString.indexOf((UChar)0x54)>=0) { // T | |
561 | options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I; | |
562 | } | |
563 | if(optionsString.indexOf((UChar)0x4c)>=0) { // L | |
564 | options|=U_TITLECASE_NO_LOWERCASE; | |
565 | } | |
566 | if(optionsString.indexOf((UChar)0x41)>=0) { // A | |
567 | options|=U_TITLECASE_NO_BREAK_ADJUSTMENT; | |
568 | } | |
569 | } | |
73c04bcf A |
570 | |
571 | if(U_FAILURE(status)) { | |
729e4ab9 | 572 | dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status)); |
73c04bcf A |
573 | status = U_ZERO_ERROR; |
574 | } else { | |
729e4ab9 A |
575 | #if UCONFIG_NO_BREAK_ITERATION |
576 | LocalPointer<UMemory> iter; | |
577 | #endif | |
578 | TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options); | |
b75a7d8f | 579 | } |
b75a7d8f | 580 | |
73c04bcf | 581 | #if !UCONFIG_NO_BREAK_ITERATION |
729e4ab9 | 582 | iter.adoptInstead(NULL); |
73c04bcf A |
583 | #endif |
584 | } | |
b75a7d8f | 585 | } |
b75a7d8f A |
586 | } |
587 | ||
73c04bcf A |
588 | #if !UCONFIG_NO_BREAK_ITERATION |
589 | // more tests for API coverage | |
590 | status=U_ZERO_ERROR; | |
591 | input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape(); | |
592 | (result=input).toTitle(NULL); | |
593 | if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) { | |
729e4ab9 | 594 | dataerrln("UnicodeString::toTitle(NULL) failed."); |
73c04bcf | 595 | } |
b75a7d8f A |
596 | #endif |
597 | } | |
4388f060 A |
598 | |
599 | void | |
600 | StringCaseTest::TestFullCaseFoldingIterator() { | |
601 | UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi"); | |
602 | UnicodeString ss=UNICODE_STRING_SIMPLE("ss"); | |
603 | FullCaseFoldingIterator iter; | |
604 | int32_t count=0; | |
605 | int32_t countSpecific=0; | |
606 | UChar32 c; | |
607 | UnicodeString full; | |
608 | while((c=iter.next(full))>=0) { | |
609 | ++count; | |
610 | // Check that the full Case_Folding has more than 1 code point. | |
611 | if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) { | |
612 | errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c); | |
613 | continue; | |
614 | } | |
615 | // Check that full == Case_Folding(c). | |
616 | UnicodeString cf(c); | |
617 | cf.foldCase(); | |
618 | if(full!=cf) { | |
619 | errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c); | |
620 | continue; | |
621 | } | |
622 | // Spot-check a couple of specific cases. | |
623 | if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) { | |
624 | ++countSpecific; | |
625 | } | |
626 | } | |
627 | if(countSpecific!=3) { | |
628 | errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases"); | |
629 | } | |
630 | if(count<70) { | |
631 | errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count); | |
632 | } | |
633 | } | |
a62d09fc | 634 | |
f3c0d7a5 A |
635 | void |
636 | StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) { | |
637 | UnicodeString s16(s); | |
638 | UnicodeString expected16(expected); | |
639 | UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")"; | |
640 | UnicodeString result16(s16); | |
641 | result16.toUpper(GREEK_LOCALE_); | |
642 | assertEquals(msg, expected16, result16); | |
643 | ||
644 | msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap="; | |
645 | int32_t length = expected16.length(); | |
646 | int32_t capacities[] = { | |
647 | // Keep in sync with the UTF-8 capacities near the bottom of this function. | |
648 | 0, length / 2, length - 1, length, length + 1 | |
649 | }; | |
650 | for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) { | |
651 | int32_t cap = capacities[i]; | |
652 | UChar *dest16 = result16.getBuffer(expected16.length() + 1); | |
653 | u_memset(dest16, 0x55AA, result16.getCapacity()); | |
654 | UErrorCode errorCode = U_ZERO_ERROR; | |
655 | length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode); | |
656 | assertEquals(msg + cap, expected16.length(), length); | |
657 | UErrorCode expectedErrorCode; | |
658 | if (cap < expected16.length()) { | |
659 | expectedErrorCode = U_BUFFER_OVERFLOW_ERROR; | |
660 | } else if (cap == expected16.length()) { | |
661 | expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING; | |
662 | } else { | |
663 | expectedErrorCode = U_ZERO_ERROR; | |
664 | assertEquals(msg + cap + " NUL", 0, dest16[length]); | |
665 | } | |
666 | assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode); | |
667 | result16.releaseBuffer(length); | |
668 | if (cap >= expected16.length()) { | |
669 | assertEquals(msg + cap, expected16, result16); | |
670 | } | |
671 | } | |
672 | ||
673 | UErrorCode errorCode = U_ZERO_ERROR; | |
674 | LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode)); | |
675 | assertSuccess("ucasemap_open", errorCode); | |
676 | std::string s8; | |
677 | s16.toUTF8String(s8); | |
678 | msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")"; | |
679 | char dest8[1000]; | |
680 | length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8), | |
681 | s8.data(), s8.length(), &errorCode); | |
682 | assertSuccess("ucasemap_utf8ToUpper", errorCode); | |
683 | StringPiece result8(dest8, length); | |
684 | UnicodeString result16From8 = UnicodeString::fromUTF8(result8); | |
685 | assertEquals(msg, expected16, result16From8); | |
686 | ||
687 | msg += " cap="; | |
688 | capacities[1] = length / 2; | |
689 | capacities[2] = length - 1; | |
690 | capacities[3] = length; | |
691 | capacities[4] = length + 1; | |
692 | char dest8b[1000]; | |
693 | int32_t expected8Length = length; // Assuming the previous call worked. | |
694 | for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) { | |
695 | int32_t cap = capacities[i]; | |
696 | memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b)); | |
697 | UErrorCode errorCode = U_ZERO_ERROR; | |
698 | length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap, | |
699 | s8.data(), s8.length(), &errorCode); | |
700 | assertEquals(msg + cap, expected8Length, length); | |
701 | UErrorCode expectedErrorCode; | |
702 | if (cap < expected8Length) { | |
703 | expectedErrorCode = U_BUFFER_OVERFLOW_ERROR; | |
704 | } else if (cap == expected8Length) { | |
705 | expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING; | |
706 | } else { | |
707 | expectedErrorCode = U_ZERO_ERROR; | |
708 | // Casts to int32_t to avoid matching UBool. | |
709 | assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]); | |
710 | } | |
711 | assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode); | |
712 | if (cap >= expected8Length) { | |
713 | assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length)); | |
714 | } | |
715 | } | |
716 | } | |
717 | ||
718 | void | |
719 | StringCaseTest::TestGreekUpper() { | |
720 | // http://bugs.icu-project.org/trac/ticket/5456 | |
721 | assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ"); | |
722 | // https://bugzilla.mozilla.org/show_bug.cgi?id=307039 | |
723 | // https://bug307039.bmoattachments.org/attachment.cgi?id=194893 | |
724 | assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ"); | |
725 | assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ"); | |
726 | assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ"); | |
727 | assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ"); | |
728 | assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ"); | |
729 | assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ"); | |
730 | assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ"); | |
731 | // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html | |
732 | assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ"); | |
733 | assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ"); | |
734 | // http://unicode.org/udhr/d/udhr_ell_polytonic.html | |
735 | assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ"); | |
736 | assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ"); | |
737 | // From Google bug report | |
738 | assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ"); | |
739 | // http://crbug.com/234797 | |
740 | assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!"); | |
741 | assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ"); | |
742 | assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ."); | |
743 | // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/ | |
744 | assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ"); | |
745 | } | |
746 | ||
a62d09fc A |
747 | void |
748 | StringCaseTest::TestLongUpper() { | |
749 | if (quick) { | |
750 | logln("not exhaustive mode: skipping this test"); | |
751 | return; | |
752 | } | |
753 | // Ticket #12663, crash with an extremely long string where | |
754 | // U+0390 maps to 0399 0308 0301 so that the result is three times as long | |
755 | // and overflows an int32_t. | |
756 | int32_t length = 0x40000004; // more than 1G UChars | |
757 | UnicodeString s(length, (UChar32)0x390, length); | |
758 | UnicodeString result; | |
759 | UChar *dest = result.getBuffer(length + 1); | |
760 | if (s.isBogus() || dest == NULL) { | |
761 | logln("Out of memory, unable to run this test on this machine."); | |
762 | return; | |
763 | } | |
764 | IcuTestErrorCode errorCode(*this, "TestLongUpper"); | |
765 | int32_t destLength = u_strToUpper(dest, result.getCapacity(), | |
766 | s.getBuffer(), s.length(), "", errorCode); | |
767 | result.releaseBuffer(destLength); | |
768 | if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) { | |
769 | errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)", | |
770 | errorCode.errorName(), (long)destLength); | |
771 | } | |
772 | } | |
f3c0d7a5 A |
773 | |
774 | void StringCaseTest::TestMalformedUTF8() { | |
775 | // ticket #12639 | |
776 | IcuTestErrorCode errorCode(*this, "TestMalformedUTF8"); | |
777 | LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode)); | |
778 | if (errorCode.isFailure()) { | |
779 | errln("ucasemap_open(English) failed - %s", errorCode.errorName()); | |
780 | return; | |
781 | } | |
782 | char src[1] = { (char)0x85 }; // malformed UTF-8 | |
783 | char dest[3] = { 0, 0, 0 }; | |
784 | int32_t destLength; | |
785 | #if !UCONFIG_NO_BREAK_ITERATION | |
786 | destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode); | |
787 | if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { | |
788 | errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x", | |
789 | errorCode.errorName(), (int)destLength, dest[0]); | |
790 | } | |
791 | #endif | |
792 | ||
793 | errorCode.reset(); | |
794 | dest[0] = 0; | |
795 | destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode); | |
796 | if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { | |
797 | errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x", | |
798 | errorCode.errorName(), (int)destLength, dest[0]); | |
799 | } | |
800 | ||
801 | errorCode.reset(); | |
802 | dest[0] = 0; | |
803 | destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode); | |
804 | if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { | |
805 | errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x", | |
806 | errorCode.errorName(), (int)destLength, dest[0]); | |
807 | } | |
808 | ||
809 | errorCode.reset(); | |
810 | dest[0] = 0; | |
811 | destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode); | |
812 | if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { | |
813 | errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x", | |
814 | errorCode.errorName(), (int)destLength, dest[0]); | |
815 | } | |
816 | } | |
817 | ||
818 | void StringCaseTest::TestBufferOverflow() { | |
819 | // Ticket #12849, incorrect result from Title Case preflight operation, | |
820 | // when buffer overflow error is expected. | |
821 | IcuTestErrorCode errorCode(*this, "TestBufferOverflow"); | |
822 | LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode)); | |
823 | if (errorCode.isFailure()) { | |
824 | errln("ucasemap_open(English) failed - %s", errorCode.errorName()); | |
825 | return; | |
826 | } | |
827 | ||
828 | UnicodeString data("hello world"); | |
829 | int32_t result; | |
830 | #if !UCONFIG_NO_BREAK_ITERATION | |
831 | result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode); | |
832 | if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) { | |
833 | errln("%s:%d ucasemap_toTitle(\"hello world\") failed: " | |
834 | "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)", | |
835 | __FILE__, __LINE__, data.length(), errorCode.errorName(), result); | |
836 | } | |
837 | #endif | |
838 | errorCode.reset(); | |
839 | ||
840 | std::string data_utf8; | |
841 | data.toUTF8String(data_utf8); | |
842 | #if !UCONFIG_NO_BREAK_ITERATION | |
843 | result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), data_utf8.length(), errorCode); | |
844 | if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) { | |
845 | errln("%s:%d ucasemap_toTitle(\"hello world\") failed: " | |
846 | "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)", | |
847 | __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result); | |
848 | } | |
849 | #endif | |
850 | errorCode.reset(); | |
851 | } | |
852 | ||
853 | void StringCaseTest::checkEditsIter( | |
854 | const UnicodeString &name, | |
855 | Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators | |
856 | const EditChange expected[], int32_t expLength, UBool withUnchanged, | |
857 | UErrorCode &errorCode) { | |
858 | assertFalse(name, ei2.findSourceIndex(-1, errorCode)); | |
859 | ||
860 | int32_t expSrcIndex = 0; | |
861 | int32_t expDestIndex = 0; | |
862 | int32_t expReplIndex = 0; | |
863 | for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) { | |
864 | const EditChange &expect = expected[expIndex]; | |
865 | UnicodeString msg = UnicodeString(name).append(u' ') + expIndex; | |
866 | if (withUnchanged || expect.change) { | |
867 | assertTrue(msg, ei1.next(errorCode)); | |
868 | assertEquals(msg, expect.change, ei1.hasChange()); | |
869 | assertEquals(msg, expect.oldLength, ei1.oldLength()); | |
870 | assertEquals(msg, expect.newLength, ei1.newLength()); | |
871 | assertEquals(msg, expSrcIndex, ei1.sourceIndex()); | |
872 | assertEquals(msg, expDestIndex, ei1.destinationIndex()); | |
873 | assertEquals(msg, expReplIndex, ei1.replacementIndex()); | |
874 | } | |
875 | ||
876 | if (expect.oldLength > 0) { | |
877 | assertTrue(msg, ei2.findSourceIndex(expSrcIndex, errorCode)); | |
878 | assertEquals(msg, expect.change, ei2.hasChange()); | |
879 | assertEquals(msg, expect.oldLength, ei2.oldLength()); | |
880 | assertEquals(msg, expect.newLength, ei2.newLength()); | |
881 | assertEquals(msg, expSrcIndex, ei2.sourceIndex()); | |
882 | assertEquals(msg, expDestIndex, ei2.destinationIndex()); | |
883 | assertEquals(msg, expReplIndex, ei2.replacementIndex()); | |
884 | if (!withUnchanged) { | |
885 | // For some iterators, move past the current range | |
886 | // so that findSourceIndex() has to look before the current index. | |
887 | ei2.next(errorCode); | |
888 | ei2.next(errorCode); | |
889 | } | |
890 | } | |
891 | ||
892 | expSrcIndex += expect.oldLength; | |
893 | expDestIndex += expect.newLength; | |
894 | if (expect.change) { | |
895 | expReplIndex += expect.newLength; | |
896 | } | |
897 | } | |
898 | // TODO: remove casts from u"" when merging into trunk | |
899 | UnicodeString msg = UnicodeString(name).append(u" end"); | |
900 | assertFalse(msg, ei1.next(errorCode)); | |
901 | assertFalse(msg, ei1.hasChange()); | |
902 | assertEquals(msg, 0, ei1.oldLength()); | |
903 | assertEquals(msg, 0, ei1.newLength()); | |
904 | assertEquals(msg, expSrcIndex, ei1.sourceIndex()); | |
905 | assertEquals(msg, expDestIndex, ei1.destinationIndex()); | |
906 | assertEquals(msg, expReplIndex, ei1.replacementIndex()); | |
907 | ||
908 | assertFalse(name, ei2.findSourceIndex(expSrcIndex, errorCode)); | |
909 | } | |
910 | ||
911 | void StringCaseTest::TestEdits() { | |
912 | IcuTestErrorCode errorCode(*this, "TestEdits"); | |
913 | Edits edits; | |
914 | assertFalse("new Edits", edits.hasChanges()); | |
915 | assertEquals("new Edits", 0, edits.lengthDelta()); | |
916 | edits.addUnchanged(1); // multiple unchanged ranges are combined | |
917 | edits.addUnchanged(10000); // too long, and they are split | |
918 | edits.addReplace(0, 0); | |
919 | edits.addUnchanged(2); | |
920 | assertFalse("unchanged 10003", edits.hasChanges()); | |
921 | assertEquals("unchanged 10003", 0, edits.lengthDelta()); | |
922 | edits.addReplace(1, 1); // multiple short equal-length edits are compressed | |
923 | edits.addUnchanged(0); | |
924 | edits.addReplace(1, 1); | |
925 | edits.addReplace(1, 1); | |
926 | edits.addReplace(0, 10); | |
927 | edits.addReplace(100, 0); | |
928 | edits.addReplace(3000, 4000); // variable-length encoding | |
929 | edits.addReplace(100000, 100000); | |
930 | assertTrue("some edits", edits.hasChanges()); | |
931 | assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta()); | |
932 | UErrorCode outErrorCode = U_ZERO_ERROR; | |
933 | assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode)); | |
934 | ||
935 | static const EditChange coarseExpectedChanges[] = { | |
936 | { FALSE, 10003, 10003 }, | |
937 | { TRUE, 103103, 104013 } | |
938 | }; | |
939 | checkEditsIter(u"coarse", | |
940 | edits.getCoarseIterator(), edits.getCoarseIterator(), | |
941 | coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode); | |
942 | checkEditsIter(u"coarse changes", | |
943 | edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(), | |
944 | coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode); | |
945 | ||
946 | static const EditChange fineExpectedChanges[] = { | |
947 | { FALSE, 10003, 10003 }, | |
948 | { TRUE, 1, 1 }, | |
949 | { TRUE, 1, 1 }, | |
950 | { TRUE, 1, 1 }, | |
951 | { TRUE, 0, 10 }, | |
952 | { TRUE, 100, 0 }, | |
953 | { TRUE, 3000, 4000 }, | |
954 | { TRUE, 100000, 100000 } | |
955 | }; | |
956 | checkEditsIter(u"fine", | |
957 | edits.getFineIterator(), edits.getFineIterator(), | |
958 | fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode); | |
959 | checkEditsIter(u"fine changes", | |
960 | edits.getFineChangesIterator(), edits.getFineChangesIterator(), | |
961 | fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode); | |
962 | ||
963 | edits.reset(); | |
964 | assertFalse("reset", edits.hasChanges()); | |
965 | assertEquals("reset", 0, edits.lengthDelta()); | |
966 | Edits::Iterator ei = edits.getCoarseChangesIterator(); | |
967 | assertFalse("reset then iterator", ei.next(errorCode)); | |
968 | } | |
969 | ||
970 | void StringCaseTest::TestCaseMapWithEdits() { | |
971 | IcuTestErrorCode errorCode(*this, "TestEdits"); | |
972 | UChar dest[20]; | |
973 | Edits edits; | |
974 | ||
975 | int32_t length = CaseMap::toLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT, | |
976 | u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); | |
977 | assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length)); | |
978 | static const EditChange lowerExpectedChanges[] = { | |
979 | { TRUE, 1, 1 }, | |
980 | { FALSE, 4, 4 }, | |
981 | { TRUE, 1, 1 }, | |
982 | { FALSE, 2, 2 } | |
983 | }; | |
984 | checkEditsIter(u"toLower(IstanBul)", | |
985 | edits.getFineIterator(), edits.getFineIterator(), | |
986 | lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges), | |
987 | TRUE, errorCode); | |
988 | ||
989 | edits.reset(); | |
990 | length = CaseMap::toUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT, | |
991 | u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode); | |
992 | assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length)); | |
993 | static const EditChange upperExpectedChanges[] = { | |
994 | { FALSE, 1, 1 }, | |
995 | { TRUE, 1, 1 }, | |
996 | { TRUE, 1, 1 }, | |
997 | { TRUE, 1, 1 }, | |
998 | { TRUE, 1, 1 }, | |
999 | { TRUE, 1, 1 } | |
1000 | }; | |
1001 | checkEditsIter(u"toUpper(Πατάτα)", | |
1002 | edits.getFineIterator(), edits.getFineIterator(), | |
1003 | upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges), | |
1004 | TRUE, errorCode); | |
1005 | ||
1006 | edits.reset(); | |
1007 | ||
1008 | #if !UCONFIG_NO_BREAK_ITERATION | |
1009 | length = CaseMap::toTitle("nl", | |
1010 | UCASEMAP_OMIT_UNCHANGED_TEXT | | |
1011 | U_TITLECASE_NO_BREAK_ADJUSTMENT | | |
1012 | U_TITLECASE_NO_LOWERCASE, | |
1013 | NULL, u"IjssEL IglOo", 12, | |
1014 | dest, UPRV_LENGTHOF(dest), &edits, errorCode); | |
1015 | assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length)); | |
1016 | static const EditChange titleExpectedChanges[] = { | |
1017 | { FALSE, 1, 1 }, | |
1018 | { TRUE, 1, 1 }, | |
1019 | { FALSE, 10, 10 } | |
1020 | }; | |
1021 | checkEditsIter(u"toTitle(IjssEL IglOo)", | |
1022 | edits.getFineIterator(), edits.getFineIterator(), | |
1023 | titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges), | |
1024 | TRUE, errorCode); | |
1025 | #endif | |
1026 | ||
1027 | edits.reset(); | |
1028 | length = CaseMap::fold(UCASEMAP_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
1029 | u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); | |
1030 | assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length)); | |
1031 | static const EditChange foldExpectedChanges[] = { | |
1032 | { TRUE, 1, 1 }, | |
1033 | { TRUE, 1, 2 }, | |
1034 | { FALSE, 3, 3 }, | |
1035 | { TRUE, 1, 1 }, | |
1036 | { FALSE, 2, 2 } | |
1037 | }; | |
1038 | checkEditsIter(u"foldCase(IßtanBul)", | |
1039 | edits.getFineIterator(), edits.getFineIterator(), | |
1040 | foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges), | |
1041 | TRUE, errorCode); | |
1042 | } | |
1043 | ||
1044 | void StringCaseTest::TestCaseMapUTF8WithEdits() { | |
1045 | IcuTestErrorCode errorCode(*this, "TestEdits"); | |
1046 | char dest[50]; | |
1047 | Edits edits; | |
1048 | ||
1049 | int32_t length = CaseMap::utf8ToLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT, | |
1050 | u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); | |
1051 | assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), | |
1052 | UnicodeString::fromUTF8(StringPiece(dest, length))); | |
1053 | static const EditChange lowerExpectedChanges[] = { | |
1054 | { TRUE, 1, 2 }, | |
1055 | { FALSE, 4, 4 }, | |
1056 | { TRUE, 1, 1 }, | |
1057 | { FALSE, 2, 2 } | |
1058 | }; | |
1059 | checkEditsIter(u"toLower(IstanBul)", | |
1060 | edits.getFineIterator(), edits.getFineIterator(), | |
1061 | lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges), | |
1062 | TRUE, errorCode); | |
1063 | ||
1064 | edits.reset(); | |
1065 | length = CaseMap::utf8ToUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT, | |
1066 | u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode); | |
1067 | assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), | |
1068 | UnicodeString::fromUTF8(StringPiece(dest, length))); | |
1069 | static const EditChange upperExpectedChanges[] = { | |
1070 | { FALSE, 2, 2 }, | |
1071 | { TRUE, 2, 2 }, | |
1072 | { TRUE, 2, 2 }, | |
1073 | { TRUE, 2, 2 }, | |
1074 | { TRUE, 2, 2 }, | |
1075 | { TRUE, 2, 2 } | |
1076 | }; | |
1077 | checkEditsIter(u"toUpper(Πατάτα)", | |
1078 | edits.getFineIterator(), edits.getFineIterator(), | |
1079 | upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges), | |
1080 | TRUE, errorCode); | |
1081 | ||
1082 | edits.reset(); | |
1083 | #if !UCONFIG_NO_BREAK_ITERATION | |
1084 | length = CaseMap::utf8ToTitle("nl", | |
1085 | UCASEMAP_OMIT_UNCHANGED_TEXT | | |
1086 | U_TITLECASE_NO_BREAK_ADJUSTMENT | | |
1087 | U_TITLECASE_NO_LOWERCASE, | |
1088 | NULL, u8"IjssEL IglOo", 12, | |
1089 | dest, UPRV_LENGTHOF(dest), &edits, errorCode); | |
1090 | assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), | |
1091 | UnicodeString::fromUTF8(StringPiece(dest, length))); | |
1092 | static const EditChange titleExpectedChanges[] = { | |
1093 | { FALSE, 1, 1 }, | |
1094 | { TRUE, 1, 1 }, | |
1095 | { FALSE, 10, 10 } | |
1096 | }; | |
1097 | checkEditsIter(u"toTitle(IjssEL IglOo)", | |
1098 | edits.getFineIterator(), edits.getFineIterator(), | |
1099 | titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges), | |
1100 | TRUE, errorCode); | |
1101 | #endif | |
1102 | ||
1103 | edits.reset(); | |
1104 | length = CaseMap::utf8Fold(UCASEMAP_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
1105 | u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode); | |
1106 | assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), | |
1107 | UnicodeString::fromUTF8(StringPiece(dest, length))); | |
1108 | static const EditChange foldExpectedChanges[] = { | |
1109 | { TRUE, 1, 2 }, | |
1110 | { TRUE, 2, 2 }, | |
1111 | { FALSE, 3, 3 }, | |
1112 | { TRUE, 1, 1 }, | |
1113 | { FALSE, 2, 2 } | |
1114 | }; | |
1115 | checkEditsIter(u"foldCase(IßtanBul)", | |
1116 | edits.getFineIterator(), edits.getFineIterator(), | |
1117 | foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges), | |
1118 | TRUE, errorCode); | |
1119 | } | |
1120 | ||
1121 | void StringCaseTest::TestLongUnicodeString() { | |
1122 | // Code coverage for UnicodeString case mapping code handling | |
1123 | // long strings or many changes in a string. | |
1124 | UnicodeString s(TRUE, | |
1125 | (const UChar *) | |
1126 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1127 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1128 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1129 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1130 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1131 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51); | |
1132 | UnicodeString expected(TRUE, | |
1133 | (const UChar *) | |
1134 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1135 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1136 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1137 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1138 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1139 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51); | |
1140 | s.toUpper(Locale::getRoot()); | |
1141 | assertEquals("string length 306", expected, s); | |
1142 | } | |
1143 | ||
1144 | void StringCaseTest::TestBug13127() { | |
1145 | // Test case crashed when the bug was present. | |
1146 | const char16_t *s16 = u"日本語"; | |
1147 | UnicodeString s(TRUE, s16, -1); | |
1148 | s.toTitle(0, Locale::getEnglish()); | |
1149 | } |