]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
2ca993e8 | 6 | * Copyright (C) 2002-2016, International Business Machines |
b75a7d8f A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: strcase.cpp | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
b75a7d8f A |
12 | * tab size: 8 (not used) |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2002mar12 | |
16 | * created by: Markus W. Scherer | |
17 | * | |
18 | * Test file for string casing C++ API functions. | |
19 | */ | |
20 | ||
f3c0d7a5 | 21 | #include "unicode/std_string.h" |
0f5d89e8 | 22 | #include "unicode/brkiter.h" |
f3c0d7a5 A |
23 | #include "unicode/casemap.h" |
24 | #include "unicode/edits.h" | |
b75a7d8f | 25 | #include "unicode/uchar.h" |
b75a7d8f A |
26 | #include "unicode/ures.h" |
27 | #include "unicode/uloc.h" | |
28 | #include "unicode/locid.h" | |
29 | #include "unicode/ubrk.h" | |
73c04bcf A |
30 | #include "unicode/unistr.h" |
31 | #include "unicode/ucasemap.h" | |
0f5d89e8 | 32 | #include "unicode/ustring.h" |
4388f060 | 33 | #include "ucase.h" |
b75a7d8f | 34 | #include "ustrtest.h" |
374ca955 | 35 | #include "unicode/tstdtmod.h" |
b331163b | 36 | #include "cmemory.h" |
0f5d89e8 | 37 | #include "testutil.h" |
f3c0d7a5 A |
38 | |
39 | class StringCaseTest: public IntlTest { | |
40 | public: | |
41 | StringCaseTest(); | |
42 | virtual ~StringCaseTest(); | |
43 | ||
44 | void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); | |
45 | ||
46 | void TestCaseConversion(); | |
47 | ||
48 | void TestCasingImpl(const UnicodeString &input, | |
49 | const UnicodeString &output, | |
50 | int32_t whichCase, | |
51 | void *iter, const char *localeID, uint32_t options); | |
52 | void TestCasing(); | |
0f5d89e8 | 53 | void TestTitleOptions(); |
f3c0d7a5 A |
54 | void TestFullCaseFoldingIterator(); |
55 | void TestGreekUpper(); | |
56 | void TestLongUpper(); | |
57 | void TestMalformedUTF8(); | |
58 | void TestBufferOverflow(); | |
59 | void TestEdits(); | |
0f5d89e8 A |
60 | void TestCopyMoveEdits(); |
61 | void TestEditsFindFwdBwd(); | |
62 | void TestMergeEdits(); | |
f3c0d7a5 A |
63 | void TestCaseMapWithEdits(); |
64 | void TestCaseMapUTF8WithEdits(); | |
0f5d89e8 A |
65 | void TestCaseMapToString(); |
66 | void TestCaseMapUTF8ToString(); | |
f3c0d7a5 A |
67 | void TestLongUnicodeString(); |
68 | void TestBug13127(); | |
0f5d89e8 A |
69 | void TestInPlaceTitle(); |
70 | void TestCaseMapEditsIteratorDocs(); | |
71 | void TestCaseMapGreekExtended(); | |
f3c0d7a5 A |
72 | |
73 | private: | |
74 | void assertGreekUpper(const char16_t *s, const char16_t *expected); | |
f3c0d7a5 A |
75 | |
76 | Locale GREEK_LOCALE_; | |
77 | }; | |
78 | ||
79 | StringCaseTest::StringCaseTest() : GREEK_LOCALE_("el") {} | |
80 | ||
374ca955 | 81 | StringCaseTest::~StringCaseTest() {} |
b75a7d8f | 82 | |
f3c0d7a5 A |
83 | extern IntlTest *createStringCaseTest() { |
84 | return new StringCaseTest(); | |
85 | } | |
86 | ||
b75a7d8f A |
87 | void |
88 | StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { | |
4388f060 A |
89 | if(exec) { |
90 | logln("TestSuite StringCaseTest: "); | |
91 | } | |
92 | TESTCASE_AUTO_BEGIN; | |
93 | TESTCASE_AUTO(TestCaseConversion); | |
729e4ab9 | 94 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION |
4388f060 | 95 | TESTCASE_AUTO(TestCasing); |
0f5d89e8 | 96 | TESTCASE_AUTO(TestTitleOptions); |
b75a7d8f | 97 | #endif |
4388f060 | 98 | TESTCASE_AUTO(TestFullCaseFoldingIterator); |
f3c0d7a5 | 99 | TESTCASE_AUTO(TestGreekUpper); |
a62d09fc | 100 | TESTCASE_AUTO(TestLongUpper); |
f3c0d7a5 A |
101 | TESTCASE_AUTO(TestMalformedUTF8); |
102 | TESTCASE_AUTO(TestBufferOverflow); | |
103 | TESTCASE_AUTO(TestEdits); | |
0f5d89e8 A |
104 | TESTCASE_AUTO(TestCopyMoveEdits); |
105 | TESTCASE_AUTO(TestEditsFindFwdBwd); | |
106 | TESTCASE_AUTO(TestMergeEdits); | |
f3c0d7a5 A |
107 | TESTCASE_AUTO(TestCaseMapWithEdits); |
108 | TESTCASE_AUTO(TestCaseMapUTF8WithEdits); | |
0f5d89e8 A |
109 | TESTCASE_AUTO(TestCaseMapToString); |
110 | TESTCASE_AUTO(TestCaseMapUTF8ToString); | |
f3c0d7a5 | 111 | TESTCASE_AUTO(TestLongUnicodeString); |
0f5d89e8 A |
112 | #if !UCONFIG_NO_BREAK_ITERATION |
113 | TESTCASE_AUTO(TestBug13127); | |
114 | TESTCASE_AUTO(TestInPlaceTitle); | |
115 | #endif | |
116 | TESTCASE_AUTO(TestCaseMapEditsIteratorDocs); | |
117 | TESTCASE_AUTO(TestCaseMapGreekExtended); | |
4388f060 | 118 | TESTCASE_AUTO_END; |
b75a7d8f A |
119 | } |
120 | ||
121 | void | |
122 | StringCaseTest::TestCaseConversion() | |
123 | { | |
73c04bcf | 124 | static const UChar uppercaseGreek[] = |
b75a7d8f A |
125 | { 0x399, 0x395, 0x3a3, 0x3a5, 0x3a3, 0x20, 0x03a7, 0x3a1, 0x399, 0x3a3, 0x3a4, |
126 | 0x39f, 0x3a3, 0 }; | |
127 | // "IESUS CHRISTOS" | |
128 | ||
73c04bcf | 129 | static const UChar lowercaseGreek[] = |
b75a7d8f A |
130 | { 0x3b9, 0x3b5, 0x3c3, 0x3c5, 0x3c2, 0x20, 0x03c7, 0x3c1, 0x3b9, 0x3c3, 0x3c4, |
131 | 0x3bf, 0x3c2, 0 }; | |
132 | // "iesus christos" | |
133 | ||
73c04bcf | 134 | static const UChar lowercaseTurkish[] = |
b75a7d8f A |
135 | { 0x69, 0x73, 0x74, 0x61, 0x6e, 0x62, 0x75, 0x6c, 0x2c, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x63, 0x6f, |
136 | 0x6e, 0x73, 0x74, 0x61, 0x6e, 0x74, 0x0131, 0x6e, 0x6f, 0x70, 0x6c, 0x65, 0x21, 0 }; | |
137 | ||
73c04bcf | 138 | static const UChar uppercaseTurkish[] = |
b75a7d8f A |
139 | { 0x54, 0x4f, 0x50, 0x4b, 0x41, 0x50, 0x49, 0x20, 0x50, 0x41, 0x4c, 0x41, 0x43, 0x45, 0x2c, 0x20, |
140 | 0x0130, 0x53, 0x54, 0x41, 0x4e, 0x42, 0x55, 0x4c, 0 }; | |
141 | ||
142 | UnicodeString expectedResult; | |
143 | UnicodeString test3; | |
144 | ||
145 | test3 += (UChar32)0x0130; | |
146 | test3 += "STANBUL, NOT CONSTANTINOPLE!"; | |
147 | ||
148 | UnicodeString test4(test3); | |
73c04bcf | 149 | test4.toLower(Locale("")); |
b75a7d8f A |
150 | expectedResult = UnicodeString("i\\u0307stanbul, not constantinople!", "").unescape(); |
151 | if (test4 != expectedResult) | |
152 | errln("1. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
153 | ||
154 | test4 = test3; | |
155 | test4.toLower(Locale("tr", "TR")); | |
156 | expectedResult = lowercaseTurkish; | |
157 | if (test4 != expectedResult) | |
158 | errln("2. toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
159 | ||
160 | test3 = "topkap"; | |
161 | test3 += (UChar32)0x0131; | |
162 | test3 += " palace, istanbul"; | |
163 | test4 = test3; | |
164 | ||
73c04bcf | 165 | test4.toUpper(Locale("")); |
b75a7d8f A |
166 | expectedResult = "TOPKAPI PALACE, ISTANBUL"; |
167 | if (test4 != expectedResult) | |
168 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
169 | ||
170 | test4 = test3; | |
171 | test4.toUpper(Locale("tr", "TR")); | |
172 | expectedResult = uppercaseTurkish; | |
173 | if (test4 != expectedResult) | |
174 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
175 | ||
176 | test3 = CharsToUnicodeString("S\\u00FC\\u00DFmayrstra\\u00DFe"); | |
177 | ||
178 | test3.toUpper(Locale("de", "DE")); | |
179 | expectedResult = CharsToUnicodeString("S\\u00DCSSMAYRSTRASSE"); | |
180 | if (test3 != expectedResult) | |
181 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test3 + "\"."); | |
182 | ||
183 | test4.replace(0, test4.length(), uppercaseGreek); | |
184 | ||
185 | test4.toLower(Locale("el", "GR")); | |
186 | expectedResult = lowercaseGreek; | |
187 | if (test4 != expectedResult) | |
188 | errln("toLower failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
189 | ||
190 | test4.replace(0, test4.length(), lowercaseGreek); | |
191 | ||
192 | test4.toUpper(); | |
193 | expectedResult = uppercaseGreek; | |
194 | if (test4 != expectedResult) | |
195 | errln("toUpper failed: expected \"" + expectedResult + "\", got \"" + test4 + "\"."); | |
196 | ||
197 | // more string case mapping tests with the new implementation | |
198 | { | |
199 | static const UChar | |
200 | ||
201 | beforeLower[]= { 0x61, 0x42, 0x49, 0x3a3, 0xdf, 0x3a3, 0x2f, 0xd93f, 0xdfff }, | |
202 | lowerRoot[]= { 0x61, 0x62, 0x69, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, | |
203 | lowerTurkish[]={ 0x61, 0x62, 0x131, 0x3c3, 0xdf, 0x3c2, 0x2f, 0xd93f, 0xdfff }, | |
204 | ||
205 | beforeUpper[]= { 0x61, 0x42, 0x69, 0x3c2, 0xdf, 0x3c3, 0x2f, 0xfb03, 0xfb03, 0xfb03, 0xd93f, 0xdfff }, | |
206 | upperRoot[]= { 0x41, 0x42, 0x49, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, | |
207 | upperTurkish[]={ 0x41, 0x42, 0x130, 0x3a3, 0x53, 0x53, 0x3a3, 0x2f, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0x46, 0x46, 0x49, 0xd93f, 0xdfff }, | |
208 | ||
209 | beforeMiniUpper[]= { 0xdf, 0x61 }, | |
210 | miniUpper[]= { 0x53, 0x53, 0x41 }; | |
211 | ||
212 | UnicodeString s; | |
213 | ||
214 | /* lowercase with root locale */ | |
2ca993e8 | 215 | s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); |
b75a7d8f | 216 | s.toLower(""); |
2ca993e8 | 217 | if( s.length()!=UPRV_LENGTHOF(lowerRoot) || |
b75a7d8f A |
218 | s!=UnicodeString(FALSE, lowerRoot, s.length()) |
219 | ) { | |
2ca993e8 | 220 | errln("error in toLower(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerRoot, UPRV_LENGTHOF(lowerRoot)) + "\""); |
b75a7d8f A |
221 | } |
222 | ||
223 | /* lowercase with turkish locale */ | |
2ca993e8 | 224 | s=UnicodeString(FALSE, beforeLower, UPRV_LENGTHOF(beforeLower)); |
b75a7d8f | 225 | s.setCharAt(0, beforeLower[0]).toLower(Locale("tr")); |
2ca993e8 | 226 | if( s.length()!=UPRV_LENGTHOF(lowerTurkish) || |
b75a7d8f A |
227 | s!=UnicodeString(FALSE, lowerTurkish, s.length()) |
228 | ) { | |
2ca993e8 | 229 | errln("error in toLower(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, lowerTurkish, UPRV_LENGTHOF(lowerTurkish)) + "\""); |
b75a7d8f A |
230 | } |
231 | ||
232 | /* uppercase with root locale */ | |
2ca993e8 | 233 | s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); |
73c04bcf | 234 | s.setCharAt(0, beforeUpper[0]).toUpper(Locale("")); |
2ca993e8 | 235 | if( s.length()!=UPRV_LENGTHOF(upperRoot) || |
b75a7d8f A |
236 | s!=UnicodeString(FALSE, upperRoot, s.length()) |
237 | ) { | |
2ca993e8 | 238 | errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperRoot, UPRV_LENGTHOF(upperRoot)) + "\""); |
b75a7d8f A |
239 | } |
240 | ||
241 | /* uppercase with turkish locale */ | |
2ca993e8 | 242 | s=UnicodeString(FALSE, beforeUpper, UPRV_LENGTHOF(beforeUpper)); |
b75a7d8f | 243 | s.toUpper(Locale("tr")); |
2ca993e8 | 244 | if( s.length()!=UPRV_LENGTHOF(upperTurkish) || |
b75a7d8f A |
245 | s!=UnicodeString(FALSE, upperTurkish, s.length()) |
246 | ) { | |
2ca993e8 | 247 | errln("error in toUpper(turkish locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, upperTurkish, UPRV_LENGTHOF(upperTurkish)) + "\""); |
b75a7d8f A |
248 | } |
249 | ||
250 | /* uppercase a short string with root locale */ | |
2ca993e8 | 251 | s=UnicodeString(FALSE, beforeMiniUpper, UPRV_LENGTHOF(beforeMiniUpper)); |
b75a7d8f | 252 | s.setCharAt(0, beforeMiniUpper[0]).toUpper(""); |
2ca993e8 | 253 | if( s.length()!=UPRV_LENGTHOF(miniUpper) || |
b75a7d8f A |
254 | s!=UnicodeString(FALSE, miniUpper, s.length()) |
255 | ) { | |
2ca993e8 | 256 | errln("error in toUpper(root locale)=\"" + s + "\" expected \"" + UnicodeString(FALSE, miniUpper, UPRV_LENGTHOF(miniUpper)) + "\""); |
b75a7d8f A |
257 | } |
258 | } | |
259 | ||
260 | // test some supplementary characters (>= Unicode 3.1) | |
261 | { | |
262 | UnicodeString t; | |
263 | ||
264 | UnicodeString | |
265 | deseretInput=UnicodeString("\\U0001043C\\U00010414", "").unescape(), | |
266 | deseretLower=UnicodeString("\\U0001043C\\U0001043C", "").unescape(), | |
267 | deseretUpper=UnicodeString("\\U00010414\\U00010414", "").unescape(); | |
268 | (t=deseretInput).toLower(); | |
269 | if(t!=deseretLower) { | |
270 | errln("error lowercasing Deseret (plane 1) characters"); | |
271 | } | |
272 | (t=deseretInput).toUpper(); | |
273 | if(t!=deseretUpper) { | |
274 | errln("error uppercasing Deseret (plane 1) characters"); | |
275 | } | |
276 | } | |
277 | ||
278 | // test some more cases that looked like problems | |
279 | { | |
280 | UnicodeString t; | |
281 | ||
282 | UnicodeString | |
283 | ljInput=UnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 \\U0001043C\\U00010414", "").unescape(), | |
284 | ljLower=UnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 \\U0001043C\\U0001043C", "").unescape(), | |
285 | ljUpper=UnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 \\U00010414\\U00010414", "").unescape(); | |
286 | (t=ljInput).toLower("en"); | |
287 | if(t!=ljLower) { | |
288 | errln("error lowercasing LJ characters"); | |
289 | } | |
290 | (t=ljInput).toUpper("en"); | |
291 | if(t!=ljUpper) { | |
292 | errln("error uppercasing LJ characters"); | |
293 | } | |
294 | } | |
295 | ||
296 | #if !UCONFIG_NO_NORMALIZATION | |
297 | // some context-sensitive casing depends on normalization data being present | |
298 | ||
299 | // Unicode 3.1.1 SpecialCasing tests | |
300 | { | |
301 | UnicodeString t; | |
302 | ||
303 | // sigmas preceded and/or followed by cased letters | |
304 | UnicodeString | |
305 | sigmas=UnicodeString("i\\u0307\\u03a3\\u0308j \\u0307\\u03a3\\u0308j i\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(), | |
306 | sigmasLower=UnicodeString("i\\u0307\\u03c3\\u0308j \\u0307\\u03c3\\u0308j i\\u00ad\\u03c2\\u0308 \\u0307\\u03c3\\u0308 ", "").unescape(), | |
307 | sigmasUpper=UnicodeString("I\\u0307\\u03a3\\u0308J \\u0307\\u03a3\\u0308J I\\u00ad\\u03a3\\u0308 \\u0307\\u03a3\\u0308 ", "").unescape(); | |
308 | ||
309 | (t=sigmas).toLower(); | |
310 | if(t!=sigmasLower) { | |
311 | errln("error in sigmas.toLower()=\"" + t + "\" expected \"" + sigmasLower + "\""); | |
312 | } | |
313 | ||
73c04bcf | 314 | (t=sigmas).toUpper(Locale("")); |
b75a7d8f A |
315 | if(t!=sigmasUpper) { |
316 | errln("error in sigmas.toUpper()=\"" + t + "\" expected \"" + sigmasUpper + "\""); | |
317 | } | |
318 | ||
319 | // turkish & azerbaijani dotless i & dotted I | |
320 | // remove dot above if there was a capital I before and there are no more accents above | |
321 | UnicodeString | |
322 | dots=UnicodeString("I \\u0130 I\\u0307 I\\u0327\\u0307 I\\u0301\\u0307 I\\u0327\\u0307\\u0301", "").unescape(), | |
323 | dotsTurkish=UnicodeString("\\u0131 i i i\\u0327 \\u0131\\u0301\\u0307 i\\u0327\\u0301", "").unescape(), | |
324 | dotsDefault=UnicodeString("i i\\u0307 i\\u0307 i\\u0327\\u0307 i\\u0301\\u0307 i\\u0327\\u0307\\u0301", "").unescape(); | |
325 | ||
326 | (t=dots).toLower("tr"); | |
327 | if(t!=dotsTurkish) { | |
328 | errln("error in dots.toLower(tr)=\"" + t + "\" expected \"" + dotsTurkish + "\""); | |
329 | } | |
330 | ||
331 | (t=dots).toLower("de"); | |
332 | if(t!=dotsDefault) { | |
333 | errln("error in dots.toLower(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); | |
334 | } | |
335 | } | |
336 | ||
337 | // more Unicode 3.1.1 tests | |
338 | { | |
339 | UnicodeString t; | |
340 | ||
341 | // lithuanian dot above in uppercasing | |
342 | UnicodeString | |
343 | dots=UnicodeString("a\\u0307 \\u0307 i\\u0307 j\\u0327\\u0307 j\\u0301\\u0307", "").unescape(), | |
344 | dotsLithuanian=UnicodeString("A\\u0307 \\u0307 I J\\u0327 J\\u0301\\u0307", "").unescape(), | |
345 | dotsDefault=UnicodeString("A\\u0307 \\u0307 I\\u0307 J\\u0327\\u0307 J\\u0301\\u0307", "").unescape(); | |
346 | ||
347 | (t=dots).toUpper("lt"); | |
348 | if(t!=dotsLithuanian) { | |
349 | errln("error in dots.toUpper(lt)=\"" + t + "\" expected \"" + dotsLithuanian + "\""); | |
350 | } | |
351 | ||
352 | (t=dots).toUpper("de"); | |
353 | if(t!=dotsDefault) { | |
354 | errln("error in dots.toUpper(de)=\"" + t + "\" expected \"" + dotsDefault + "\""); | |
355 | } | |
356 | ||
357 | // lithuanian adds dot above to i in lowercasing if there are more above accents | |
358 | UnicodeString | |
359 | i=UnicodeString("I I\\u0301 J J\\u0301 \\u012e \\u012e\\u0301 \\u00cc\\u00cd\\u0128", "").unescape(), | |
360 | iLithuanian=UnicodeString("i i\\u0307\\u0301 j j\\u0307\\u0301 \\u012f \\u012f\\u0307\\u0301 i\\u0307\\u0300i\\u0307\\u0301i\\u0307\\u0303", "").unescape(), | |
361 | iDefault=UnicodeString("i i\\u0301 j j\\u0301 \\u012f \\u012f\\u0301 \\u00ec\\u00ed\\u0129", "").unescape(); | |
362 | ||
363 | (t=i).toLower("lt"); | |
364 | if(t!=iLithuanian) { | |
365 | errln("error in i.toLower(lt)=\"" + t + "\" expected \"" + iLithuanian + "\""); | |
366 | } | |
367 | ||
368 | (t=i).toLower("de"); | |
369 | if(t!=iDefault) { | |
370 | errln("error in i.toLower(de)=\"" + t + "\" expected \"" + iDefault + "\""); | |
371 | } | |
372 | } | |
373 | ||
374 | #endif | |
375 | ||
376 | // test case folding | |
377 | { | |
378 | UnicodeString | |
379 | s=UnicodeString("A\\u00df\\u00b5\\ufb03\\U0001040c\\u0130\\u0131", "").unescape(), | |
380 | f=UnicodeString("ass\\u03bcffi\\U00010434i\\u0307\\u0131", "").unescape(), | |
381 | g=UnicodeString("ass\\u03bcffi\\U00010434i\\u0131", "").unescape(), | |
382 | t; | |
383 | ||
384 | (t=s).foldCase(); | |
385 | if(f!=t) { | |
386 | errln("error in foldCase(\"" + s + "\", default)=\"" + t + "\" but expected \"" + f + "\""); | |
387 | } | |
388 | ||
389 | // alternate handling for dotted I/dotless i (U+0130, U+0131) | |
390 | (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I); | |
391 | if(g!=t) { | |
392 | errln("error in foldCase(\"" + s + "\", U_FOLD_CASE_EXCLUDE_SPECIAL_I)=\"" + t + "\" but expected \"" + g + "\""); | |
393 | } | |
394 | } | |
395 | } | |
396 | ||
73c04bcf A |
397 | // data-driven case mapping tests ------------------------------------------ *** |
398 | ||
399 | enum { | |
400 | TEST_LOWER, | |
401 | TEST_UPPER, | |
73c04bcf | 402 | TEST_TITLE, |
46f4442e | 403 | TEST_FOLD, |
73c04bcf A |
404 | TEST_COUNT |
405 | }; | |
b75a7d8f | 406 | |
73c04bcf A |
407 | // names of TestData children in casing.txt |
408 | static const char *const dataNames[TEST_COUNT+1]={ | |
409 | "lowercasing", | |
410 | "uppercasing", | |
73c04bcf | 411 | "titlecasing", |
46f4442e | 412 | "casefolding", |
73c04bcf A |
413 | "" |
414 | }; | |
374ca955 | 415 | |
73c04bcf A |
416 | void |
417 | StringCaseTest::TestCasingImpl(const UnicodeString &input, | |
418 | const UnicodeString &output, | |
419 | int32_t whichCase, | |
46f4442e | 420 | void *iter, const char *localeID, uint32_t options) { |
73c04bcf A |
421 | // UnicodeString |
422 | UnicodeString result; | |
423 | const char *name; | |
46f4442e | 424 | Locale locale(localeID); |
73c04bcf A |
425 | |
426 | result=input; | |
427 | switch(whichCase) { | |
428 | case TEST_LOWER: | |
429 | name="toLower"; | |
46f4442e | 430 | result.toLower(locale); |
73c04bcf A |
431 | break; |
432 | case TEST_UPPER: | |
433 | name="toUpper"; | |
46f4442e A |
434 | result.toUpper(locale); |
435 | break; | |
436 | #if !UCONFIG_NO_BREAK_ITERATION | |
437 | case TEST_TITLE: | |
438 | name="toTitle"; | |
439 | result.toTitle((BreakIterator *)iter, locale, options); | |
440 | break; | |
441 | #endif | |
442 | case TEST_FOLD: | |
443 | name="foldCase"; | |
444 | result.foldCase(options); | |
73c04bcf A |
445 | break; |
446 | default: | |
447 | name=""; | |
448 | break; // won't happen | |
b75a7d8f | 449 | } |
73c04bcf | 450 | if(result!=output) { |
729e4ab9 | 451 | dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name); |
b75a7d8f | 452 | } |
46f4442e A |
453 | #if !UCONFIG_NO_BREAK_ITERATION |
454 | if(whichCase==TEST_TITLE && options==0) { | |
455 | result=input; | |
456 | result.toTitle((BreakIterator *)iter, locale); | |
457 | if(result!=output) { | |
729e4ab9 | 458 | dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res"); |
46f4442e A |
459 | } |
460 | } | |
461 | #endif | |
b75a7d8f | 462 | |
73c04bcf A |
463 | // UTF-8 |
464 | char utf8In[100], utf8Out[100]; | |
465 | int32_t utf8InLength, utf8OutLength, resultLength; | |
466 | UChar *buffer; | |
467 | ||
729e4ab9 A |
468 | IcuTestErrorCode errorCode(*this, "TestCasingImpl"); |
469 | LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode)); | |
46f4442e A |
470 | #if !UCONFIG_NO_BREAK_ITERATION |
471 | if(iter!=NULL) { | |
472 | // Clone the break iterator so that the UCaseMap can safely adopt it. | |
57a6839d | 473 | UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode); |
729e4ab9 | 474 | ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode); |
46f4442e A |
475 | } |
476 | #endif | |
73c04bcf | 477 | |
729e4ab9 | 478 | u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode); |
73c04bcf A |
479 | switch(whichCase) { |
480 | case TEST_LOWER: | |
481 | name="ucasemap_utf8ToLower"; | |
729e4ab9 | 482 | utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), |
73c04bcf | 483 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 484 | utf8In, utf8InLength, errorCode); |
73c04bcf A |
485 | break; |
486 | case TEST_UPPER: | |
487 | name="ucasemap_utf8ToUpper"; | |
729e4ab9 | 488 | utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(), |
73c04bcf | 489 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 490 | utf8In, utf8InLength, errorCode); |
73c04bcf | 491 | break; |
46f4442e A |
492 | #if !UCONFIG_NO_BREAK_ITERATION |
493 | case TEST_TITLE: | |
494 | name="ucasemap_utf8ToTitle"; | |
729e4ab9 | 495 | utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(), |
46f4442e | 496 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 497 | utf8In, utf8InLength, errorCode); |
46f4442e A |
498 | break; |
499 | #endif | |
500 | case TEST_FOLD: | |
501 | name="ucasemap_utf8FoldCase"; | |
729e4ab9 | 502 | utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(), |
46f4442e | 503 | utf8Out, (int32_t)sizeof(utf8Out), |
729e4ab9 | 504 | utf8In, utf8InLength, errorCode); |
46f4442e | 505 | break; |
73c04bcf A |
506 | default: |
507 | name=""; | |
508 | utf8OutLength=0; | |
509 | break; // won't happen | |
b75a7d8f | 510 | } |
73c04bcf | 511 | buffer=result.getBuffer(utf8OutLength); |
729e4ab9 A |
512 | u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode); |
513 | result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0); | |
b75a7d8f | 514 | |
729e4ab9 A |
515 | if(errorCode.isFailure()) { |
516 | errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode)); | |
517 | errorCode.reset(); | |
73c04bcf A |
518 | } else if(result!=output) { |
519 | errln("error: %s() got a wrong result for a test case from casing.res", name); | |
46f4442e | 520 | errln("expected \"" + output + "\" got \"" + result + "\"" ); |
73c04bcf | 521 | } |
73c04bcf A |
522 | } |
523 | ||
73c04bcf A |
524 | void |
525 | StringCaseTest::TestCasing() { | |
526 | UErrorCode status = U_ZERO_ERROR; | |
729e4ab9 A |
527 | #if !UCONFIG_NO_BREAK_ITERATION |
528 | LocalUBreakIteratorPointer iter; | |
529 | #endif | |
73c04bcf | 530 | char cLocaleID[100]; |
46f4442e A |
531 | UnicodeString locale, input, output, optionsString, result; |
532 | uint32_t options; | |
73c04bcf | 533 | int32_t whichCase, type; |
729e4ab9 | 534 | LocalPointer<TestDataModule> driver(TestDataModule::getTestDataModule("casing", *this, status)); |
73c04bcf A |
535 | if(U_SUCCESS(status)) { |
536 | for(whichCase=0; whichCase<TEST_COUNT; ++whichCase) { | |
46f4442e A |
537 | #if UCONFIG_NO_BREAK_ITERATION |
538 | if(whichCase==TEST_TITLE) { | |
539 | continue; | |
540 | } | |
541 | #endif | |
729e4ab9 | 542 | LocalPointer<TestData> casingTest(driver->createTestData(dataNames[whichCase], status)); |
73c04bcf A |
543 | if(U_FAILURE(status)) { |
544 | errln("TestCasing failed to createTestData(%s) - %s", dataNames[whichCase], u_errorName(status)); | |
545 | break; | |
b75a7d8f | 546 | } |
73c04bcf A |
547 | const DataMap *myCase = NULL; |
548 | while(casingTest->nextCase(myCase, status)) { | |
73c04bcf A |
549 | input = myCase->getString("Input", status); |
550 | output = myCase->getString("Output", status); | |
b75a7d8f | 551 | |
46f4442e A |
552 | if(whichCase!=TEST_FOLD) { |
553 | locale = myCase->getString("Locale", status); | |
554 | } | |
555 | locale.extract(0, 0x7fffffff, cLocaleID, sizeof(cLocaleID), ""); | |
556 | ||
46f4442e | 557 | #if !UCONFIG_NO_BREAK_ITERATION |
73c04bcf A |
558 | if(whichCase==TEST_TITLE) { |
559 | type = myCase->getInt("Type", status); | |
560 | if(type>=0) { | |
729e4ab9 | 561 | iter.adoptInstead(ubrk_open((UBreakIteratorType)type, cLocaleID, NULL, 0, &status)); |
46f4442e A |
562 | } else if(type==-2) { |
563 | // Open a trivial break iterator that only delivers { 0, length } | |
564 | // or even just { 0 } as boundaries. | |
565 | static const UChar rules[] = { 0x2e, 0x2a, 0x3b }; // ".*;" | |
566 | UParseError parseError; | |
b331163b | 567 | iter.adoptInstead(ubrk_openRules(rules, UPRV_LENGTHOF(rules), NULL, 0, &parseError, &status)); |
73c04bcf A |
568 | } |
569 | } | |
570 | #endif | |
46f4442e A |
571 | options = 0; |
572 | if(whichCase==TEST_TITLE || whichCase==TEST_FOLD) { | |
573 | optionsString = myCase->getString("Options", status); | |
574 | if(optionsString.indexOf((UChar)0x54)>=0) { // T | |
575 | options|=U_FOLD_CASE_EXCLUDE_SPECIAL_I; | |
576 | } | |
577 | if(optionsString.indexOf((UChar)0x4c)>=0) { // L | |
578 | options|=U_TITLECASE_NO_LOWERCASE; | |
579 | } | |
580 | if(optionsString.indexOf((UChar)0x41)>=0) { // A | |
581 | options|=U_TITLECASE_NO_BREAK_ADJUSTMENT; | |
582 | } | |
583 | } | |
73c04bcf A |
584 | |
585 | if(U_FAILURE(status)) { | |
729e4ab9 | 586 | dataerrln("error: TestCasing() setup failed for %s test case from casing.res: %s", dataNames[whichCase], u_errorName(status)); |
73c04bcf A |
587 | status = U_ZERO_ERROR; |
588 | } else { | |
729e4ab9 A |
589 | #if UCONFIG_NO_BREAK_ITERATION |
590 | LocalPointer<UMemory> iter; | |
591 | #endif | |
592 | TestCasingImpl(input, output, whichCase, iter.getAlias(), cLocaleID, options); | |
b75a7d8f | 593 | } |
b75a7d8f | 594 | |
73c04bcf | 595 | #if !UCONFIG_NO_BREAK_ITERATION |
729e4ab9 | 596 | iter.adoptInstead(NULL); |
73c04bcf A |
597 | #endif |
598 | } | |
b75a7d8f | 599 | } |
b75a7d8f A |
600 | } |
601 | ||
73c04bcf A |
602 | #if !UCONFIG_NO_BREAK_ITERATION |
603 | // more tests for API coverage | |
604 | status=U_ZERO_ERROR; | |
605 | input=UNICODE_STRING_SIMPLE("sTrA\\u00dfE").unescape(); | |
606 | (result=input).toTitle(NULL); | |
607 | if(result!=UNICODE_STRING_SIMPLE("Stra\\u00dfe").unescape()) { | |
729e4ab9 | 608 | dataerrln("UnicodeString::toTitle(NULL) failed."); |
73c04bcf | 609 | } |
b75a7d8f A |
610 | #endif |
611 | } | |
4388f060 | 612 | |
0f5d89e8 A |
613 | void |
614 | StringCaseTest::TestTitleOptions() { | |
615 | // New options in ICU 60. | |
616 | TestCasingImpl(u"ʻcAt! ʻeTc.", u"ʻCat! ʻetc.", TEST_TITLE, | |
617 | nullptr, "", U_TITLECASE_WHOLE_STRING); | |
618 | TestCasingImpl(u"a ʻCaT. A ʻdOg! ʻeTc.", u"A ʻCaT. A ʻdOg! ʻETc.", TEST_TITLE, | |
619 | nullptr, "", U_TITLECASE_SENTENCES|U_TITLECASE_NO_LOWERCASE); | |
620 | TestCasingImpl(u"49eRs", u"49ers", TEST_TITLE, | |
621 | nullptr, "", U_TITLECASE_WHOLE_STRING); | |
622 | TestCasingImpl(u"«丰(aBc)»", u"«丰(abc)»", TEST_TITLE, | |
623 | nullptr, "", U_TITLECASE_WHOLE_STRING); | |
624 | TestCasingImpl(u"49eRs", u"49Ers", TEST_TITLE, | |
625 | nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED); | |
626 | TestCasingImpl(u"«丰(aBc)»", u"«丰(Abc)»", TEST_TITLE, | |
627 | nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_ADJUST_TO_CASED); | |
628 | TestCasingImpl(u" john. Smith", u" John. Smith", TEST_TITLE, | |
629 | nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_LOWERCASE); | |
630 | TestCasingImpl(u" john. Smith", u" john. smith", TEST_TITLE, | |
631 | nullptr, "", U_TITLECASE_WHOLE_STRING|U_TITLECASE_NO_BREAK_ADJUSTMENT); | |
632 | TestCasingImpl(u"«ijs»", u"«IJs»", TEST_TITLE, | |
633 | nullptr, "nl-BE", U_TITLECASE_WHOLE_STRING); | |
634 | TestCasingImpl(u"«ijs»", u"«İjs»", TEST_TITLE, | |
635 | nullptr, "tr-DE", U_TITLECASE_WHOLE_STRING); | |
636 | ||
637 | #if !UCONFIG_NO_BREAK_ITERATION | |
638 | // Test conflicting settings. | |
639 | // If & when we add more options, then the ORed combinations may become | |
640 | // indistinguishable from valid values. | |
641 | IcuTestErrorCode errorCode(*this, "TestTitleOptions"); | |
642 | CaseMap::toTitle("", U_TITLECASE_NO_BREAK_ADJUSTMENT|U_TITLECASE_ADJUST_TO_CASED, nullptr, | |
643 | u"", 0, nullptr, 0, nullptr, errorCode); | |
644 | if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) { | |
645 | errln("CaseMap::toTitle(multiple adjustment options) -> %s not illegal argument", | |
646 | errorCode.errorName()); | |
647 | } | |
648 | errorCode.reset(); | |
649 | CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING|U_TITLECASE_SENTENCES, nullptr, | |
650 | u"", 0, nullptr, 0, nullptr, errorCode); | |
651 | if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) { | |
652 | errln("CaseMap::toTitle(multiple iterator options) -> %s not illegal argument", | |
653 | errorCode.errorName()); | |
654 | } | |
655 | errorCode.reset(); | |
656 | LocalPointer<BreakIterator> iter( | |
657 | BreakIterator::createCharacterInstance(Locale::getRoot(), errorCode)); | |
658 | CaseMap::toTitle("", U_TITLECASE_WHOLE_STRING, iter.getAlias(), | |
659 | u"", 0, nullptr, 0, nullptr, errorCode); | |
660 | if (errorCode.get() != U_ILLEGAL_ARGUMENT_ERROR) { | |
661 | errln("CaseMap::toTitle(iterator option + iterator) -> %s not illegal argument", | |
662 | errorCode.errorName()); | |
663 | } | |
664 | errorCode.reset(); | |
665 | #endif | |
666 | } | |
667 | ||
4388f060 A |
668 | void |
669 | StringCaseTest::TestFullCaseFoldingIterator() { | |
670 | UnicodeString ffi=UNICODE_STRING_SIMPLE("ffi"); | |
671 | UnicodeString ss=UNICODE_STRING_SIMPLE("ss"); | |
672 | FullCaseFoldingIterator iter; | |
673 | int32_t count=0; | |
674 | int32_t countSpecific=0; | |
675 | UChar32 c; | |
676 | UnicodeString full; | |
677 | while((c=iter.next(full))>=0) { | |
678 | ++count; | |
679 | // Check that the full Case_Folding has more than 1 code point. | |
680 | if(!full.hasMoreChar32Than(0, 0x7fffffff, 1)) { | |
681 | errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding has at most 1 code point", (long)c); | |
682 | continue; | |
683 | } | |
684 | // Check that full == Case_Folding(c). | |
685 | UnicodeString cf(c); | |
686 | cf.foldCase(); | |
687 | if(full!=cf) { | |
688 | errln("error: FullCaseFoldingIterator.next()=U+%04lX full Case_Folding != cf(c)", (long)c); | |
689 | continue; | |
690 | } | |
691 | // Spot-check a couple of specific cases. | |
692 | if((full==ffi && c==0xfb03) || (full==ss && (c==0xdf || c==0x1e9e))) { | |
693 | ++countSpecific; | |
694 | } | |
695 | } | |
696 | if(countSpecific!=3) { | |
697 | errln("error: FullCaseFoldingIterator did not yield exactly the expected specific cases"); | |
698 | } | |
699 | if(count<70) { | |
700 | errln("error: FullCaseFoldingIterator yielded only %d (cp, full) pairs", (int)count); | |
701 | } | |
702 | } | |
a62d09fc | 703 | |
f3c0d7a5 A |
704 | void |
705 | StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) { | |
706 | UnicodeString s16(s); | |
707 | UnicodeString expected16(expected); | |
708 | UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")"; | |
709 | UnicodeString result16(s16); | |
710 | result16.toUpper(GREEK_LOCALE_); | |
711 | assertEquals(msg, expected16, result16); | |
712 | ||
713 | msg = UnicodeString("u_strToUpper/Greek(\"") + s16 + "\") cap="; | |
714 | int32_t length = expected16.length(); | |
715 | int32_t capacities[] = { | |
716 | // Keep in sync with the UTF-8 capacities near the bottom of this function. | |
717 | 0, length / 2, length - 1, length, length + 1 | |
718 | }; | |
719 | for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) { | |
720 | int32_t cap = capacities[i]; | |
721 | UChar *dest16 = result16.getBuffer(expected16.length() + 1); | |
722 | u_memset(dest16, 0x55AA, result16.getCapacity()); | |
723 | UErrorCode errorCode = U_ZERO_ERROR; | |
724 | length = u_strToUpper(dest16, cap, s16.getBuffer(), s16.length(), "el", &errorCode); | |
725 | assertEquals(msg + cap, expected16.length(), length); | |
726 | UErrorCode expectedErrorCode; | |
727 | if (cap < expected16.length()) { | |
728 | expectedErrorCode = U_BUFFER_OVERFLOW_ERROR; | |
729 | } else if (cap == expected16.length()) { | |
730 | expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING; | |
731 | } else { | |
732 | expectedErrorCode = U_ZERO_ERROR; | |
733 | assertEquals(msg + cap + " NUL", 0, dest16[length]); | |
734 | } | |
735 | assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode); | |
736 | result16.releaseBuffer(length); | |
737 | if (cap >= expected16.length()) { | |
738 | assertEquals(msg + cap, expected16, result16); | |
739 | } | |
740 | } | |
741 | ||
742 | UErrorCode errorCode = U_ZERO_ERROR; | |
743 | LocalUCaseMapPointer csm(ucasemap_open("el", 0, &errorCode)); | |
744 | assertSuccess("ucasemap_open", errorCode); | |
745 | std::string s8; | |
746 | s16.toUTF8String(s8); | |
747 | msg = UnicodeString("ucasemap_utf8ToUpper/Greek(\"") + s16 + "\")"; | |
748 | char dest8[1000]; | |
749 | length = ucasemap_utf8ToUpper(csm.getAlias(), dest8, UPRV_LENGTHOF(dest8), | |
3d1f044b | 750 | s8.data(), static_cast<int32_t>(s8.length()), &errorCode); |
f3c0d7a5 A |
751 | assertSuccess("ucasemap_utf8ToUpper", errorCode); |
752 | StringPiece result8(dest8, length); | |
753 | UnicodeString result16From8 = UnicodeString::fromUTF8(result8); | |
754 | assertEquals(msg, expected16, result16From8); | |
755 | ||
756 | msg += " cap="; | |
757 | capacities[1] = length / 2; | |
758 | capacities[2] = length - 1; | |
759 | capacities[3] = length; | |
760 | capacities[4] = length + 1; | |
761 | char dest8b[1000]; | |
762 | int32_t expected8Length = length; // Assuming the previous call worked. | |
763 | for (int32_t i = 0; i < UPRV_LENGTHOF(capacities); ++i) { | |
764 | int32_t cap = capacities[i]; | |
765 | memset(dest8b, 0x5A, UPRV_LENGTHOF(dest8b)); | |
766 | UErrorCode errorCode = U_ZERO_ERROR; | |
767 | length = ucasemap_utf8ToUpper(csm.getAlias(), dest8b, cap, | |
3d1f044b | 768 | s8.data(), static_cast<int32_t>(s8.length()), &errorCode); |
f3c0d7a5 A |
769 | assertEquals(msg + cap, expected8Length, length); |
770 | UErrorCode expectedErrorCode; | |
771 | if (cap < expected8Length) { | |
772 | expectedErrorCode = U_BUFFER_OVERFLOW_ERROR; | |
773 | } else if (cap == expected8Length) { | |
774 | expectedErrorCode = U_STRING_NOT_TERMINATED_WARNING; | |
775 | } else { | |
776 | expectedErrorCode = U_ZERO_ERROR; | |
777 | // Casts to int32_t to avoid matching UBool. | |
778 | assertEquals(msg + cap + " NUL", (int32_t)0, (int32_t)dest8b[length]); | |
779 | } | |
780 | assertEquals(msg + cap + " errorCode", expectedErrorCode, errorCode); | |
781 | if (cap >= expected8Length) { | |
782 | assertEquals(msg + cap + " (memcmp)", 0, memcmp(dest8, dest8b, expected8Length)); | |
783 | } | |
784 | } | |
785 | } | |
786 | ||
787 | void | |
788 | StringCaseTest::TestGreekUpper() { | |
789 | // http://bugs.icu-project.org/trac/ticket/5456 | |
790 | assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ"); | |
791 | // https://bugzilla.mozilla.org/show_bug.cgi?id=307039 | |
792 | // https://bug307039.bmoattachments.org/attachment.cgi?id=194893 | |
793 | assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ"); | |
794 | assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ"); | |
795 | assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ"); | |
796 | assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ"); | |
797 | assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ"); | |
798 | assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ"); | |
799 | assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ"); | |
800 | // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html | |
801 | assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ"); | |
802 | assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ"); | |
803 | // http://unicode.org/udhr/d/udhr_ell_polytonic.html | |
804 | assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ"); | |
805 | assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ"); | |
806 | // From Google bug report | |
807 | assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ"); | |
808 | // http://crbug.com/234797 | |
809 | assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!"); | |
810 | assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ"); | |
811 | assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ."); | |
812 | // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/ | |
813 | assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ"); | |
0f5d89e8 | 814 | assertGreekUpper(u"ή.", u"Ή."); |
f3c0d7a5 A |
815 | } |
816 | ||
a62d09fc A |
817 | void |
818 | StringCaseTest::TestLongUpper() { | |
819 | if (quick) { | |
820 | logln("not exhaustive mode: skipping this test"); | |
821 | return; | |
822 | } | |
823 | // Ticket #12663, crash with an extremely long string where | |
824 | // U+0390 maps to 0399 0308 0301 so that the result is three times as long | |
825 | // and overflows an int32_t. | |
826 | int32_t length = 0x40000004; // more than 1G UChars | |
827 | UnicodeString s(length, (UChar32)0x390, length); | |
828 | UnicodeString result; | |
829 | UChar *dest = result.getBuffer(length + 1); | |
830 | if (s.isBogus() || dest == NULL) { | |
831 | logln("Out of memory, unable to run this test on this machine."); | |
832 | return; | |
833 | } | |
834 | IcuTestErrorCode errorCode(*this, "TestLongUpper"); | |
835 | int32_t destLength = u_strToUpper(dest, result.getCapacity(), | |
836 | s.getBuffer(), s.length(), "", errorCode); | |
837 | result.releaseBuffer(destLength); | |
838 | if (errorCode.reset() != U_INDEX_OUTOFBOUNDS_ERROR) { | |
839 | errln("expected U_INDEX_OUTOFBOUNDS_ERROR, got %s (destLength is undefined, got %ld)", | |
840 | errorCode.errorName(), (long)destLength); | |
841 | } | |
842 | } | |
f3c0d7a5 A |
843 | |
844 | void StringCaseTest::TestMalformedUTF8() { | |
845 | // ticket #12639 | |
846 | IcuTestErrorCode errorCode(*this, "TestMalformedUTF8"); | |
847 | LocalUCaseMapPointer csm(ucasemap_open("en", U_TITLECASE_NO_BREAK_ADJUSTMENT, errorCode)); | |
848 | if (errorCode.isFailure()) { | |
849 | errln("ucasemap_open(English) failed - %s", errorCode.errorName()); | |
850 | return; | |
851 | } | |
852 | char src[1] = { (char)0x85 }; // malformed UTF-8 | |
853 | char dest[3] = { 0, 0, 0 }; | |
854 | int32_t destLength; | |
855 | #if !UCONFIG_NO_BREAK_ITERATION | |
856 | destLength = ucasemap_utf8ToTitle(csm.getAlias(), dest, 3, src, 1, errorCode); | |
857 | if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { | |
858 | errln("ucasemap_utf8ToTitle(\\x85) failed: %s destLength=%d dest[0]=0x%02x", | |
859 | errorCode.errorName(), (int)destLength, dest[0]); | |
860 | } | |
861 | #endif | |
862 | ||
863 | errorCode.reset(); | |
864 | dest[0] = 0; | |
865 | destLength = ucasemap_utf8ToLower(csm.getAlias(), dest, 3, src, 1, errorCode); | |
866 | if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { | |
867 | errln("ucasemap_utf8ToLower(\\x85) failed: %s destLength=%d dest[0]=0x%02x", | |
868 | errorCode.errorName(), (int)destLength, dest[0]); | |
869 | } | |
870 | ||
871 | errorCode.reset(); | |
872 | dest[0] = 0; | |
873 | destLength = ucasemap_utf8ToUpper(csm.getAlias(), dest, 3, src, 1, errorCode); | |
874 | if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { | |
875 | errln("ucasemap_utf8ToUpper(\\x85) failed: %s destLength=%d dest[0]=0x%02x", | |
876 | errorCode.errorName(), (int)destLength, dest[0]); | |
877 | } | |
878 | ||
879 | errorCode.reset(); | |
880 | dest[0] = 0; | |
881 | destLength = ucasemap_utf8FoldCase(csm.getAlias(), dest, 3, src, 1, errorCode); | |
882 | if (errorCode.isFailure() || destLength != 1 || dest[0] != src[0]) { | |
883 | errln("ucasemap_utf8FoldCase(\\x85) failed: %s destLength=%d dest[0]=0x%02x", | |
884 | errorCode.errorName(), (int)destLength, dest[0]); | |
885 | } | |
886 | } | |
887 | ||
888 | void StringCaseTest::TestBufferOverflow() { | |
889 | // Ticket #12849, incorrect result from Title Case preflight operation, | |
890 | // when buffer overflow error is expected. | |
891 | IcuTestErrorCode errorCode(*this, "TestBufferOverflow"); | |
892 | LocalUCaseMapPointer csm(ucasemap_open("en", 0, errorCode)); | |
893 | if (errorCode.isFailure()) { | |
894 | errln("ucasemap_open(English) failed - %s", errorCode.errorName()); | |
895 | return; | |
896 | } | |
897 | ||
898 | UnicodeString data("hello world"); | |
899 | int32_t result; | |
900 | #if !UCONFIG_NO_BREAK_ITERATION | |
901 | result = ucasemap_toTitle(csm.getAlias(), NULL, 0, data.getBuffer(), data.length(), errorCode); | |
902 | if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != data.length()) { | |
903 | errln("%s:%d ucasemap_toTitle(\"hello world\") failed: " | |
904 | "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)", | |
905 | __FILE__, __LINE__, data.length(), errorCode.errorName(), result); | |
906 | } | |
907 | #endif | |
908 | errorCode.reset(); | |
909 | ||
910 | std::string data_utf8; | |
911 | data.toUTF8String(data_utf8); | |
912 | #if !UCONFIG_NO_BREAK_ITERATION | |
3d1f044b | 913 | result = ucasemap_utf8ToTitle(csm.getAlias(), NULL, 0, data_utf8.c_str(), static_cast<int32_t>(data_utf8.length()), errorCode); |
f3c0d7a5 A |
914 | if (errorCode.get() != U_BUFFER_OVERFLOW_ERROR || result != (int32_t)data_utf8.length()) { |
915 | errln("%s:%d ucasemap_toTitle(\"hello world\") failed: " | |
916 | "expected (U_BUFFER_OVERFLOW_ERROR, %d), got (%s, %d)", | |
917 | __FILE__, __LINE__, data_utf8.length(), errorCode.errorName(), result); | |
918 | } | |
919 | #endif | |
920 | errorCode.reset(); | |
921 | } | |
922 | ||
f3c0d7a5 A |
923 | void StringCaseTest::TestEdits() { |
924 | IcuTestErrorCode errorCode(*this, "TestEdits"); | |
925 | Edits edits; | |
0f5d89e8 A |
926 | assertFalse("new Edits hasChanges", edits.hasChanges()); |
927 | assertEquals("new Edits numberOfChanges", 0, edits.numberOfChanges()); | |
f3c0d7a5 A |
928 | assertEquals("new Edits", 0, edits.lengthDelta()); |
929 | edits.addUnchanged(1); // multiple unchanged ranges are combined | |
930 | edits.addUnchanged(10000); // too long, and they are split | |
931 | edits.addReplace(0, 0); | |
932 | edits.addUnchanged(2); | |
0f5d89e8 A |
933 | assertFalse("unchanged 10003 hasChanges", edits.hasChanges()); |
934 | assertEquals("unchanged 10003 numberOfChanges", 0, edits.numberOfChanges()); | |
f3c0d7a5 | 935 | assertEquals("unchanged 10003", 0, edits.lengthDelta()); |
0f5d89e8 | 936 | edits.addReplace(2, 1); // multiple short equal-lengths edits are compressed |
f3c0d7a5 | 937 | edits.addUnchanged(0); |
0f5d89e8 A |
938 | edits.addReplace(2, 1); |
939 | edits.addReplace(2, 1); | |
f3c0d7a5 A |
940 | edits.addReplace(0, 10); |
941 | edits.addReplace(100, 0); | |
942 | edits.addReplace(3000, 4000); // variable-length encoding | |
943 | edits.addReplace(100000, 100000); | |
0f5d89e8 A |
944 | assertTrue("some edits hasChanges", edits.hasChanges()); |
945 | assertEquals("some edits numberOfChanges", 7, edits.numberOfChanges()); | |
946 | assertEquals("some edits", -3 + 10 - 100 + 1000, edits.lengthDelta()); | |
f3c0d7a5 A |
947 | UErrorCode outErrorCode = U_ZERO_ERROR; |
948 | assertFalse("edits done: copyErrorTo", edits.copyErrorTo(outErrorCode)); | |
949 | ||
950 | static const EditChange coarseExpectedChanges[] = { | |
951 | { FALSE, 10003, 10003 }, | |
0f5d89e8 | 952 | { TRUE, 103106, 104013 } |
f3c0d7a5 | 953 | }; |
0f5d89e8 | 954 | TestUtility::checkEditsIter(*this, u"coarse", |
f3c0d7a5 A |
955 | edits.getCoarseIterator(), edits.getCoarseIterator(), |
956 | coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode); | |
0f5d89e8 | 957 | TestUtility::checkEditsIter(*this, u"coarse changes", |
f3c0d7a5 A |
958 | edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(), |
959 | coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode); | |
960 | ||
961 | static const EditChange fineExpectedChanges[] = { | |
962 | { FALSE, 10003, 10003 }, | |
0f5d89e8 A |
963 | { TRUE, 2, 1 }, |
964 | { TRUE, 2, 1 }, | |
965 | { TRUE, 2, 1 }, | |
f3c0d7a5 A |
966 | { TRUE, 0, 10 }, |
967 | { TRUE, 100, 0 }, | |
968 | { TRUE, 3000, 4000 }, | |
969 | { TRUE, 100000, 100000 } | |
970 | }; | |
0f5d89e8 | 971 | TestUtility::checkEditsIter(*this, u"fine", |
f3c0d7a5 A |
972 | edits.getFineIterator(), edits.getFineIterator(), |
973 | fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode); | |
0f5d89e8 | 974 | TestUtility::checkEditsIter(*this, u"fine changes", |
f3c0d7a5 A |
975 | edits.getFineChangesIterator(), edits.getFineChangesIterator(), |
976 | fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode); | |
977 | ||
978 | edits.reset(); | |
0f5d89e8 A |
979 | assertFalse("reset hasChanges", edits.hasChanges()); |
980 | assertEquals("reset numberOfChanges", 0, edits.numberOfChanges()); | |
f3c0d7a5 A |
981 | assertEquals("reset", 0, edits.lengthDelta()); |
982 | Edits::Iterator ei = edits.getCoarseChangesIterator(); | |
983 | assertFalse("reset then iterator", ei.next(errorCode)); | |
984 | } | |
985 | ||
0f5d89e8 A |
986 | void StringCaseTest::TestCopyMoveEdits() { |
987 | IcuTestErrorCode errorCode(*this, "TestCopyMoveEdits"); | |
988 | // Exceed the stack array capacity. | |
989 | Edits a; | |
990 | for (int32_t i = 0; i < 250; ++i) { | |
991 | a.addReplace(i % 10, (i % 10) + 1); | |
992 | } | |
993 | assertEquals("a: many edits, length delta", 250, a.lengthDelta()); | |
994 | ||
995 | // copy | |
996 | Edits b(a); | |
997 | assertEquals("b: copy of many edits, length delta", 250, b.lengthDelta()); | |
998 | assertEquals("a remains: many edits, length delta", 250, a.lengthDelta()); | |
999 | TestUtility::checkEqualEdits(*this, u"b copy of a", a, b, errorCode); | |
1000 | ||
1001 | // assign | |
1002 | Edits c; | |
1003 | c.addUnchanged(99); | |
1004 | c.addReplace(88, 77); | |
1005 | c = b; | |
1006 | assertEquals("c: assigned many edits, length delta", 250, c.lengthDelta()); | |
1007 | assertEquals("b remains: many edits, length delta", 250, b.lengthDelta()); | |
1008 | TestUtility::checkEqualEdits(*this, u"c = b", b, c, errorCode); | |
1009 | ||
1010 | // std::move trouble on these platforms. | |
1011 | // See https://ssl.icu-project.org/trac/ticket/13393 | |
1012 | #if !(U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) | |
1013 | // move constructor empties object with heap array | |
1014 | Edits d(std::move(a)); | |
1015 | assertEquals("d: move-constructed many edits, length delta", 250, d.lengthDelta()); | |
1016 | assertFalse("a moved away: no more hasChanges", a.hasChanges()); | |
1017 | TestUtility::checkEqualEdits(*this, u"d() <- a", d, b, errorCode); | |
1018 | Edits empty; | |
1019 | TestUtility::checkEqualEdits(*this, u"a moved away", empty, a, errorCode); | |
1020 | ||
1021 | // move assignment empties object with heap array | |
1022 | Edits e; | |
1023 | e.addReplace(0, 1000); | |
1024 | e = std::move(b); | |
1025 | assertEquals("e: move-assigned many edits, length delta", 250, e.lengthDelta()); | |
1026 | assertFalse("b moved away: no more hasChanges", b.hasChanges()); | |
1027 | TestUtility::checkEqualEdits(*this, u"e <- b", e, c, errorCode); | |
1028 | TestUtility::checkEqualEdits(*this, u"b moved away", empty, b, errorCode); | |
1029 | ||
1030 | // Edits::Iterator default constructor. | |
1031 | Edits::Iterator iter; | |
1032 | assertFalse("Edits::Iterator().next()", iter.next(errorCode)); | |
1033 | assertSuccess("Edits::Iterator().next()", errorCode); | |
1034 | iter = e.getFineChangesIterator(); | |
1035 | assertTrue("iter.next()", iter.next(errorCode)); | |
1036 | assertSuccess("iter.next()", errorCode); | |
1037 | assertTrue("iter.hasChange()", iter.hasChange()); | |
1038 | assertEquals("iter.newLength()", 1, iter.newLength()); | |
1039 | #endif | |
1040 | } | |
1041 | ||
1042 | void StringCaseTest::TestEditsFindFwdBwd() { | |
1043 | IcuTestErrorCode errorCode(*this, "TestEditsFindFwdBwd"); | |
1044 | // Some users need index mappings to be efficient when they are out of order. | |
1045 | // The most interesting failure case for this test is it taking a very long time. | |
1046 | Edits e; | |
1047 | constexpr int32_t N = 200000; | |
1048 | for (int32_t i = 0; i < N; ++i) { | |
1049 | e.addUnchanged(1); | |
1050 | e.addReplace(3, 1); | |
1051 | } | |
1052 | Edits::Iterator iter = e.getFineIterator(); | |
1053 | for (int32_t i = 0; i <= N; i += 2) { | |
1054 | assertEquals("ascending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode)); | |
1055 | assertEquals("ascending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode)); | |
1056 | } | |
1057 | for (int32_t i = N; i >= 0; i -= 2) { | |
1058 | assertEquals("descending", i * 2 + 1, iter.sourceIndexFromDestinationIndex(i + 1, errorCode)); | |
1059 | assertEquals("descending", i * 2, iter.sourceIndexFromDestinationIndex(i, errorCode)); | |
1060 | } | |
1061 | } | |
1062 | ||
1063 | void StringCaseTest::TestMergeEdits() { | |
1064 | // For debugging, set -v to see matching edits up to a failure. | |
1065 | IcuTestErrorCode errorCode(*this, "TestMergeEdits"); | |
1066 | Edits ab, bc, ac, expected_ac; | |
1067 | ||
1068 | // Simple: Two parallel non-changes. | |
1069 | ab.addUnchanged(2); | |
1070 | bc.addUnchanged(2); | |
1071 | expected_ac.addUnchanged(2); | |
1072 | ||
1073 | // Simple: Two aligned changes. | |
1074 | ab.addReplace(3, 2); | |
1075 | bc.addReplace(2, 1); | |
1076 | expected_ac.addReplace(3, 1); | |
1077 | ||
1078 | // Unequal non-changes. | |
1079 | ab.addUnchanged(5); | |
1080 | bc.addUnchanged(3); | |
1081 | expected_ac.addUnchanged(3); | |
1082 | // ab ahead by 2 | |
1083 | ||
1084 | // Overlapping changes accumulate until they share a boundary. | |
1085 | ab.addReplace(4, 3); | |
1086 | bc.addReplace(3, 2); | |
1087 | ab.addReplace(4, 3); | |
1088 | bc.addReplace(3, 2); | |
1089 | ab.addReplace(4, 3); | |
1090 | bc.addReplace(3, 2); | |
1091 | bc.addUnchanged(4); | |
1092 | expected_ac.addReplace(14, 8); | |
1093 | // bc ahead by 2 | |
1094 | ||
1095 | // Balance out intermediate-string lengths. | |
1096 | ab.addUnchanged(2); | |
1097 | expected_ac.addUnchanged(2); | |
1098 | ||
1099 | // Insert something and delete it: Should disappear. | |
1100 | ab.addReplace(0, 5); | |
1101 | ab.addReplace(0, 2); | |
1102 | bc.addReplace(7, 0); | |
1103 | ||
1104 | // Parallel change to make a new boundary. | |
1105 | ab.addReplace(1, 2); | |
1106 | bc.addReplace(2, 3); | |
1107 | expected_ac.addReplace(1, 3); | |
1108 | ||
1109 | // Multiple ab deletions should remain separate at the boundary. | |
1110 | ab.addReplace(1, 0); | |
1111 | ab.addReplace(2, 0); | |
1112 | ab.addReplace(3, 0); | |
1113 | expected_ac.addReplace(1, 0); | |
1114 | expected_ac.addReplace(2, 0); | |
1115 | expected_ac.addReplace(3, 0); | |
1116 | ||
1117 | // Unequal non-changes can be split for another boundary. | |
1118 | ab.addUnchanged(2); | |
1119 | bc.addUnchanged(1); | |
1120 | expected_ac.addUnchanged(1); | |
1121 | // ab ahead by 1 | |
1122 | ||
1123 | // Multiple bc insertions should create a boundary and remain separate. | |
1124 | bc.addReplace(0, 4); | |
1125 | bc.addReplace(0, 5); | |
1126 | bc.addReplace(0, 6); | |
1127 | expected_ac.addReplace(0, 4); | |
1128 | expected_ac.addReplace(0, 5); | |
1129 | expected_ac.addReplace(0, 6); | |
1130 | // ab ahead by 1 | |
1131 | ||
1132 | // Multiple ab deletions in the middle of a bc change are merged. | |
1133 | bc.addReplace(2, 2); | |
1134 | // bc ahead by 1 | |
1135 | ab.addReplace(1, 0); | |
1136 | ab.addReplace(2, 0); | |
1137 | ab.addReplace(3, 0); | |
1138 | ab.addReplace(4, 1); | |
1139 | expected_ac.addReplace(11, 2); | |
1140 | ||
1141 | // Multiple bc insertions in the middle of an ab change are merged. | |
1142 | ab.addReplace(5, 6); | |
1143 | bc.addReplace(3, 3); | |
1144 | // ab ahead by 3 | |
1145 | bc.addReplace(0, 4); | |
1146 | bc.addReplace(0, 5); | |
1147 | bc.addReplace(0, 6); | |
1148 | bc.addReplace(3, 7); | |
1149 | expected_ac.addReplace(5, 25); | |
1150 | ||
1151 | // Delete around a deletion. | |
1152 | ab.addReplace(4, 4); | |
1153 | ab.addReplace(3, 0); | |
1154 | ab.addUnchanged(2); | |
1155 | bc.addReplace(2, 2); | |
1156 | bc.addReplace(4, 0); | |
1157 | expected_ac.addReplace(9, 2); | |
1158 | ||
1159 | // Insert into an insertion. | |
1160 | ab.addReplace(0, 2); | |
1161 | bc.addReplace(1, 1); | |
1162 | bc.addReplace(0, 8); | |
1163 | bc.addUnchanged(4); | |
1164 | expected_ac.addReplace(0, 10); | |
1165 | // bc ahead by 3 | |
1166 | ||
1167 | // Balance out intermediate-string lengths. | |
1168 | ab.addUnchanged(3); | |
1169 | expected_ac.addUnchanged(3); | |
1170 | ||
1171 | // Deletions meet insertions. | |
1172 | // Output order is arbitrary in principle, but we expect insertions first | |
1173 | // and want to keep it that way. | |
1174 | ab.addReplace(2, 0); | |
1175 | ab.addReplace(4, 0); | |
1176 | ab.addReplace(6, 0); | |
1177 | bc.addReplace(0, 1); | |
1178 | bc.addReplace(0, 3); | |
1179 | bc.addReplace(0, 5); | |
1180 | expected_ac.addReplace(0, 1); | |
1181 | expected_ac.addReplace(0, 3); | |
1182 | expected_ac.addReplace(0, 5); | |
1183 | expected_ac.addReplace(2, 0); | |
1184 | expected_ac.addReplace(4, 0); | |
1185 | expected_ac.addReplace(6, 0); | |
1186 | ||
1187 | // End with a non-change, so that further edits are never reordered. | |
1188 | ab.addUnchanged(1); | |
1189 | bc.addUnchanged(1); | |
1190 | expected_ac.addUnchanged(1); | |
1191 | ||
1192 | ac.mergeAndAppend(ab, bc, errorCode); | |
1193 | assertSuccess("ab+bc", errorCode); | |
1194 | if (!TestUtility::checkEqualEdits(*this, u"ab+bc", expected_ac, ac, errorCode)) { | |
1195 | return; | |
1196 | } | |
1197 | ||
1198 | // Append more Edits. | |
1199 | Edits ab2, bc2; | |
1200 | ab2.addUnchanged(5); | |
1201 | bc2.addReplace(1, 2); | |
1202 | bc2.addUnchanged(4); | |
1203 | expected_ac.addReplace(1, 2); | |
1204 | expected_ac.addUnchanged(4); | |
1205 | ac.mergeAndAppend(ab2, bc2, errorCode); | |
1206 | assertSuccess("ab2+bc2", errorCode); | |
1207 | if (!TestUtility::checkEqualEdits(*this, u"ab2+bc2", expected_ac, ac, errorCode)) { | |
1208 | return; | |
1209 | } | |
1210 | ||
1211 | // Append empty edits. | |
1212 | Edits empty; | |
1213 | ac.mergeAndAppend(empty, empty, errorCode); | |
1214 | assertSuccess("empty+empty", errorCode); | |
1215 | if (!TestUtility::checkEqualEdits(*this, u"empty+empty", expected_ac, ac, errorCode)) { | |
1216 | return; | |
1217 | } | |
1218 | ||
1219 | // Error: Append more edits with mismatched intermediate-string lengths. | |
1220 | Edits mismatch; | |
1221 | mismatch.addReplace(1, 1); | |
1222 | ac.mergeAndAppend(ab2, mismatch, errorCode); | |
1223 | assertEquals("ab2+mismatch", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get()); | |
1224 | errorCode.reset(); | |
1225 | ac.mergeAndAppend(mismatch, bc2, errorCode); | |
1226 | assertEquals("mismatch+bc2", U_ILLEGAL_ARGUMENT_ERROR, errorCode.get()); | |
1227 | errorCode.reset(); | |
1228 | } | |
1229 | ||
f3c0d7a5 | 1230 | void StringCaseTest::TestCaseMapWithEdits() { |
0f5d89e8 | 1231 | IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits"); |
f3c0d7a5 A |
1232 | UChar dest[20]; |
1233 | Edits edits; | |
1234 | ||
0f5d89e8 | 1235 | int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT, |
f3c0d7a5 A |
1236 | u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); |
1237 | assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length)); | |
1238 | static const EditChange lowerExpectedChanges[] = { | |
1239 | { TRUE, 1, 1 }, | |
1240 | { FALSE, 4, 4 }, | |
1241 | { TRUE, 1, 1 }, | |
1242 | { FALSE, 2, 2 } | |
1243 | }; | |
0f5d89e8 | 1244 | TestUtility::checkEditsIter(*this, u"toLower(IstanBul)", |
f3c0d7a5 A |
1245 | edits.getFineIterator(), edits.getFineIterator(), |
1246 | lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges), | |
1247 | TRUE, errorCode); | |
1248 | ||
1249 | edits.reset(); | |
0f5d89e8 | 1250 | length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT, |
f3c0d7a5 A |
1251 | u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode); |
1252 | assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length)); | |
1253 | static const EditChange upperExpectedChanges[] = { | |
1254 | { FALSE, 1, 1 }, | |
1255 | { TRUE, 1, 1 }, | |
1256 | { TRUE, 1, 1 }, | |
1257 | { TRUE, 1, 1 }, | |
1258 | { TRUE, 1, 1 }, | |
1259 | { TRUE, 1, 1 } | |
1260 | }; | |
0f5d89e8 | 1261 | TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)", |
f3c0d7a5 A |
1262 | edits.getFineIterator(), edits.getFineIterator(), |
1263 | upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges), | |
1264 | TRUE, errorCode); | |
1265 | ||
1266 | edits.reset(); | |
1267 | ||
1268 | #if !UCONFIG_NO_BREAK_ITERATION | |
1269 | length = CaseMap::toTitle("nl", | |
0f5d89e8 | 1270 | U_OMIT_UNCHANGED_TEXT | |
f3c0d7a5 A |
1271 | U_TITLECASE_NO_BREAK_ADJUSTMENT | |
1272 | U_TITLECASE_NO_LOWERCASE, | |
0f5d89e8 | 1273 | nullptr, u"IjssEL IglOo", 12, |
f3c0d7a5 A |
1274 | dest, UPRV_LENGTHOF(dest), &edits, errorCode); |
1275 | assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length)); | |
1276 | static const EditChange titleExpectedChanges[] = { | |
1277 | { FALSE, 1, 1 }, | |
1278 | { TRUE, 1, 1 }, | |
1279 | { FALSE, 10, 10 } | |
1280 | }; | |
0f5d89e8 | 1281 | TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)", |
f3c0d7a5 A |
1282 | edits.getFineIterator(), edits.getFineIterator(), |
1283 | titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges), | |
1284 | TRUE, errorCode); | |
1285 | #endif | |
1286 | ||
0f5d89e8 A |
1287 | // No explicit nor automatic edits.reset(). Edits should be appended. |
1288 | length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
f3c0d7a5 A |
1289 | u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); |
1290 | assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length)); | |
1291 | static const EditChange foldExpectedChanges[] = { | |
0f5d89e8 A |
1292 | #if !UCONFIG_NO_BREAK_ITERATION |
1293 | // From titlecasing. | |
1294 | { FALSE, 1, 1 }, | |
1295 | { TRUE, 1, 1 }, | |
1296 | { FALSE, 10, 10 }, | |
1297 | #endif | |
1298 | // From case folding. | |
f3c0d7a5 A |
1299 | { TRUE, 1, 1 }, |
1300 | { TRUE, 1, 2 }, | |
1301 | { FALSE, 3, 3 }, | |
1302 | { TRUE, 1, 1 }, | |
1303 | { FALSE, 2, 2 } | |
1304 | }; | |
0f5d89e8 | 1305 | TestUtility::checkEditsIter(*this, u"foldCase(no Edits reset, IßtanBul)", |
f3c0d7a5 A |
1306 | edits.getFineIterator(), edits.getFineIterator(), |
1307 | foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges), | |
1308 | TRUE, errorCode); | |
1309 | } | |
1310 | ||
1311 | void StringCaseTest::TestCaseMapUTF8WithEdits() { | |
0f5d89e8 | 1312 | IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8WithEdits"); |
f3c0d7a5 A |
1313 | char dest[50]; |
1314 | Edits edits; | |
1315 | ||
0f5d89e8 | 1316 | int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, |
340931cb | 1317 | reinterpret_cast<const char*>(u8"IstanBul"), 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode); |
f3c0d7a5 A |
1318 | assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), |
1319 | UnicodeString::fromUTF8(StringPiece(dest, length))); | |
1320 | static const EditChange lowerExpectedChanges[] = { | |
1321 | { TRUE, 1, 2 }, | |
1322 | { FALSE, 4, 4 }, | |
1323 | { TRUE, 1, 1 }, | |
1324 | { FALSE, 2, 2 } | |
1325 | }; | |
0f5d89e8 | 1326 | TestUtility::checkEditsIter(*this, u"toLower(IstanBul)", |
f3c0d7a5 A |
1327 | edits.getFineIterator(), edits.getFineIterator(), |
1328 | lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges), | |
1329 | TRUE, errorCode); | |
1330 | ||
1331 | edits.reset(); | |
0f5d89e8 | 1332 | length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, |
340931cb | 1333 | reinterpret_cast<const char*>(u8"Πατάτα"), 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode); |
f3c0d7a5 A |
1334 | assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), |
1335 | UnicodeString::fromUTF8(StringPiece(dest, length))); | |
1336 | static const EditChange upperExpectedChanges[] = { | |
1337 | { FALSE, 2, 2 }, | |
1338 | { TRUE, 2, 2 }, | |
1339 | { TRUE, 2, 2 }, | |
1340 | { TRUE, 2, 2 }, | |
1341 | { TRUE, 2, 2 }, | |
1342 | { TRUE, 2, 2 } | |
1343 | }; | |
0f5d89e8 | 1344 | TestUtility::checkEditsIter(*this, u"toUpper(Πατάτα)", |
f3c0d7a5 A |
1345 | edits.getFineIterator(), edits.getFineIterator(), |
1346 | upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges), | |
1347 | TRUE, errorCode); | |
1348 | ||
1349 | edits.reset(); | |
1350 | #if !UCONFIG_NO_BREAK_ITERATION | |
1351 | length = CaseMap::utf8ToTitle("nl", | |
0f5d89e8 | 1352 | U_OMIT_UNCHANGED_TEXT | |
f3c0d7a5 A |
1353 | U_TITLECASE_NO_BREAK_ADJUSTMENT | |
1354 | U_TITLECASE_NO_LOWERCASE, | |
340931cb | 1355 | nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), 12, |
f3c0d7a5 A |
1356 | dest, UPRV_LENGTHOF(dest), &edits, errorCode); |
1357 | assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), | |
1358 | UnicodeString::fromUTF8(StringPiece(dest, length))); | |
1359 | static const EditChange titleExpectedChanges[] = { | |
1360 | { FALSE, 1, 1 }, | |
1361 | { TRUE, 1, 1 }, | |
1362 | { FALSE, 10, 10 } | |
1363 | }; | |
0f5d89e8 | 1364 | TestUtility::checkEditsIter(*this, u"toTitle(IjssEL IglOo)", |
f3c0d7a5 A |
1365 | edits.getFineIterator(), edits.getFineIterator(), |
1366 | titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges), | |
1367 | TRUE, errorCode); | |
1368 | #endif | |
1369 | ||
0f5d89e8 A |
1370 | // No explicit nor automatic edits.reset(). Edits should be appended. |
1371 | length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_EDITS_NO_RESET | | |
1372 | U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
340931cb | 1373 | reinterpret_cast<const char*>(u8"IßtanBul"), 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode); |
f3c0d7a5 A |
1374 | assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), |
1375 | UnicodeString::fromUTF8(StringPiece(dest, length))); | |
1376 | static const EditChange foldExpectedChanges[] = { | |
0f5d89e8 A |
1377 | #if !UCONFIG_NO_BREAK_ITERATION |
1378 | // From titlecasing. | |
1379 | { FALSE, 1, 1 }, | |
1380 | { TRUE, 1, 1 }, | |
1381 | { FALSE, 10, 10 }, | |
1382 | #endif | |
1383 | // From case folding. | |
f3c0d7a5 A |
1384 | { TRUE, 1, 2 }, |
1385 | { TRUE, 2, 2 }, | |
1386 | { FALSE, 3, 3 }, | |
1387 | { TRUE, 1, 1 }, | |
1388 | { FALSE, 2, 2 } | |
1389 | }; | |
0f5d89e8 | 1390 | TestUtility::checkEditsIter(*this, u"foldCase(IßtanBul)", |
f3c0d7a5 A |
1391 | edits.getFineIterator(), edits.getFineIterator(), |
1392 | foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges), | |
1393 | TRUE, errorCode); | |
1394 | } | |
1395 | ||
0f5d89e8 A |
1396 | void StringCaseTest::TestCaseMapToString() { |
1397 | // This test function name is parallel with one in UCharacterCaseTest.java. | |
1398 | // It is a bit of a misnomer until we have CaseMap API that writes to | |
1399 | // a UnicodeString, at which point we should change this code here. | |
1400 | IcuTestErrorCode errorCode(*this, "TestCaseMapToString"); | |
1401 | UChar dest[20]; | |
1402 | ||
1403 | // Omit unchanged text. | |
1404 | int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT, | |
1405 | u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); | |
1406 | assertEquals(u"toLower(IstanBul)", | |
1407 | UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length)); | |
1408 | length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT, | |
1409 | u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); | |
1410 | assertEquals(u"toUpper(Πατάτα)", | |
1411 | UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length)); | |
1412 | #if !UCONFIG_NO_BREAK_ITERATION | |
1413 | length = CaseMap::toTitle("nl", | |
1414 | U_OMIT_UNCHANGED_TEXT | | |
1415 | U_TITLECASE_NO_BREAK_ADJUSTMENT | | |
1416 | U_TITLECASE_NO_LOWERCASE, | |
1417 | nullptr, u"IjssEL IglOo", 12, | |
1418 | dest, UPRV_LENGTHOF(dest), nullptr, errorCode); | |
1419 | assertEquals(u"toTitle(IjssEL IglOo)", | |
1420 | UnicodeString(u"J"), UnicodeString(TRUE, dest, length)); | |
1421 | #endif | |
1422 | length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
1423 | u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); | |
1424 | assertEquals(u"foldCase(IßtanBul)", | |
1425 | UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length)); | |
1426 | ||
1427 | // Return the whole result string. | |
1428 | length = CaseMap::toLower("tr", 0, | |
1429 | u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); | |
1430 | assertEquals(u"toLower(IstanBul)", | |
1431 | UnicodeString(u"ıstanbul"), UnicodeString(TRUE, dest, length)); | |
1432 | length = CaseMap::toUpper("el", 0, | |
1433 | u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); | |
1434 | assertEquals(u"toUpper(Πατάτα)", | |
1435 | UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(TRUE, dest, length)); | |
1436 | #if !UCONFIG_NO_BREAK_ITERATION | |
1437 | length = CaseMap::toTitle("nl", | |
1438 | U_TITLECASE_NO_BREAK_ADJUSTMENT | | |
1439 | U_TITLECASE_NO_LOWERCASE, | |
1440 | nullptr, u"IjssEL IglOo", 12, | |
1441 | dest, UPRV_LENGTHOF(dest), nullptr, errorCode); | |
1442 | assertEquals(u"toTitle(IjssEL IglOo)", | |
1443 | UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length)); | |
1444 | #endif | |
1445 | length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
1446 | u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); | |
1447 | assertEquals(u"foldCase(IßtanBul)", | |
1448 | UnicodeString(u"ısstanbul"), UnicodeString(TRUE, dest, length)); | |
1449 | } | |
1450 | ||
1451 | void StringCaseTest::TestCaseMapUTF8ToString() { | |
1452 | IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString"); | |
1453 | std::string dest; | |
1454 | StringByteSink<std::string> sink(&dest); | |
1455 | ||
1456 | // Omit unchanged text. | |
340931cb | 1457 | CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, reinterpret_cast<const char*>(u8"IstanBul"), sink, nullptr, errorCode); |
0f5d89e8 A |
1458 | assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString::fromUTF8(dest)); |
1459 | dest.clear(); | |
340931cb | 1460 | CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, reinterpret_cast<const char*>(u8"Πατάτα"), sink, nullptr, errorCode); |
0f5d89e8 A |
1461 | assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), |
1462 | UnicodeString::fromUTF8(dest)); | |
1463 | #if !UCONFIG_NO_BREAK_ITERATION | |
1464 | dest.clear(); | |
1465 | CaseMap::utf8ToTitle( | |
1466 | "nl", U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE, | |
340931cb | 1467 | nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), sink, nullptr, errorCode); |
0f5d89e8 A |
1468 | assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), |
1469 | UnicodeString::fromUTF8(dest)); | |
1470 | #endif | |
1471 | dest.clear(); | |
1472 | CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, | |
340931cb | 1473 | reinterpret_cast<const char*>(u8"IßtanBul"), sink, nullptr, errorCode); |
0f5d89e8 A |
1474 | assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), |
1475 | UnicodeString::fromUTF8(dest)); | |
1476 | ||
1477 | // Return the whole result string. | |
1478 | dest.clear(); | |
340931cb | 1479 | CaseMap::utf8ToLower("tr", 0, reinterpret_cast<const char*>(u8"IstanBul"), sink, nullptr, errorCode); |
0f5d89e8 A |
1480 | assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"), |
1481 | UnicodeString::fromUTF8(dest)); | |
1482 | dest.clear(); | |
340931cb | 1483 | CaseMap::utf8ToUpper("el", 0, reinterpret_cast<const char*>(u8"Πατάτα"), sink, nullptr, errorCode); |
0f5d89e8 A |
1484 | assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"), |
1485 | UnicodeString::fromUTF8(dest)); | |
1486 | #if !UCONFIG_NO_BREAK_ITERATION | |
1487 | dest.clear(); | |
1488 | CaseMap::utf8ToTitle("nl", U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE, | |
340931cb | 1489 | nullptr, reinterpret_cast<const char*>(u8"IjssEL IglOo"), sink, nullptr, errorCode); |
0f5d89e8 A |
1490 | assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"), |
1491 | UnicodeString::fromUTF8(dest)); | |
1492 | #endif | |
1493 | dest.clear(); | |
340931cb | 1494 | CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, reinterpret_cast<const char*>(u8"IßtanBul"), sink, nullptr, errorCode); |
0f5d89e8 A |
1495 | assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"), |
1496 | UnicodeString::fromUTF8(dest)); | |
1497 | } | |
1498 | ||
f3c0d7a5 A |
1499 | void StringCaseTest::TestLongUnicodeString() { |
1500 | // Code coverage for UnicodeString case mapping code handling | |
1501 | // long strings or many changes in a string. | |
1502 | UnicodeString s(TRUE, | |
1503 | (const UChar *) | |
1504 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1505 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1506 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1507 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1508 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF" | |
1509 | u"aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeF", 6 * 51); | |
1510 | UnicodeString expected(TRUE, | |
1511 | (const UChar *) | |
1512 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1513 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1514 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1515 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1516 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF" | |
1517 | u"AAAAAAAAAABBBBBBBBBBCCCCCCCCCCDDDDDDDDDDEEEEEEEEEEF", 6 * 51); | |
1518 | s.toUpper(Locale::getRoot()); | |
1519 | assertEquals("string length 306", expected, s); | |
1520 | } | |
1521 | ||
0f5d89e8 | 1522 | #if !UCONFIG_NO_BREAK_ITERATION |
f3c0d7a5 A |
1523 | void StringCaseTest::TestBug13127() { |
1524 | // Test case crashed when the bug was present. | |
1525 | const char16_t *s16 = u"日本語"; | |
1526 | UnicodeString s(TRUE, s16, -1); | |
1527 | s.toTitle(0, Locale::getEnglish()); | |
1528 | } | |
0f5d89e8 A |
1529 | |
1530 | void StringCaseTest::TestInPlaceTitle() { | |
1531 | // Similar to TestBug13127. u_strToTitle() can modify the buffer in-place. | |
1532 | IcuTestErrorCode errorCode(*this, "TestInPlaceTitle"); | |
1533 | char16_t s[32] = u"ß ß ß日本語 abcdef"; | |
1534 | const char16_t *expected = u"Ss Ss Ss日本語 Abcdef"; | |
1535 | int32_t length = u_strToTitle(s, UPRV_LENGTHOF(s), s, -1, nullptr, "", errorCode); | |
1536 | assertEquals("u_strToTitle(in-place) length", u_strlen(expected), length); | |
1537 | assertEquals("u_strToTitle(in-place)", expected, s); | |
1538 | } | |
1539 | #endif | |
1540 | ||
1541 | void StringCaseTest::TestCaseMapEditsIteratorDocs() { | |
1542 | IcuTestErrorCode status(*this, "TestCaseMapEditsIteratorDocs"); | |
1543 | const char16_t* input = u"abcßDeF"; | |
1544 | int32_t inputLength = u_strlen(input); | |
1545 | // output: "abcssdef" | |
1546 | ||
1547 | char16_t output[10]; | |
1548 | Edits edits; | |
1549 | CaseMap::fold(0, input, -1, output, 10, &edits, status); | |
1550 | ||
1551 | static const char16_t* fineIteratorExpected[] = { | |
1552 | u"{ src[0..3] ≡ dest[0..3] (no-change) }", | |
1553 | u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }", | |
1554 | u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }", | |
1555 | u"{ src[5..6] ≡ dest[6..7] (no-change) }", | |
1556 | u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }", | |
1557 | }; | |
1558 | static const char16_t* fineChangesIteratorExpected[] = { | |
1559 | u"{ src[3..4] ⇝ dest[3..5], repl[0..2] }", | |
1560 | u"{ src[4..5] ⇝ dest[5..6], repl[2..3] }", | |
1561 | u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }", | |
1562 | }; | |
1563 | static const char16_t* coarseIteratorExpected[] = { | |
1564 | u"{ src[0..3] ≡ dest[0..3] (no-change) }", | |
1565 | u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }", | |
1566 | u"{ src[5..6] ≡ dest[6..7] (no-change) }", | |
1567 | u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }", | |
1568 | }; | |
1569 | static const char16_t* coarseChangesIteratorExpected[] = { | |
1570 | u"{ src[3..5] ⇝ dest[3..6], repl[0..3] }", | |
1571 | u"{ src[6..7] ⇝ dest[7..8], repl[3..4] }", | |
1572 | }; | |
1573 | ||
1574 | // Expected destination indices when source index is queried | |
1575 | static int32_t expectedDestFineEditIndices[] = {0, 0, 0, 3, 5, 6, 7}; | |
1576 | static int32_t expectedDestCoarseEditIndices[] = {0, 0, 0, 3, 3, 6, 7}; | |
1577 | static int32_t expectedDestFineStringIndices[] = {0, 1, 2, 3, 5, 6, 7}; | |
1578 | static int32_t expectedDestCoarseStringIndices[] = {0, 1, 2, 3, 6, 6, 7}; | |
1579 | ||
1580 | // Expected source indices when destination index is queried | |
1581 | static int32_t expectedSrcFineEditIndices[] = { 0, 0, 0, 3, 3, 4, 5, 6 }; | |
1582 | static int32_t expectedSrcCoarseEditIndices[] = { 0, 0, 0, 3, 3, 3, 5, 6 }; | |
1583 | static int32_t expectedSrcFineStringIndices[] = { 0, 1, 2, 3, 4, 4, 5, 6 }; | |
1584 | static int32_t expectedSrcCoarseStringIndices[] = { 0, 1, 2, 3, 5, 5, 5, 6 }; | |
1585 | ||
1586 | // Demonstrate the iterator next() method: | |
1587 | Edits::Iterator fineIterator = edits.getFineIterator(); | |
1588 | int i = 0; | |
1589 | UnicodeString toString; | |
1590 | while (fineIterator.next(status)) { | |
1591 | UnicodeString expected = fineIteratorExpected[i++]; | |
1592 | assertEquals(UnicodeString(u"Iteration #") + i, | |
1593 | expected, | |
1594 | fineIterator.toString(toString.remove())); | |
1595 | } | |
1596 | Edits::Iterator fineChangesIterator = edits.getFineChangesIterator(); | |
1597 | i = 0; | |
1598 | while (fineChangesIterator.next(status)) { | |
1599 | UnicodeString expected = fineChangesIteratorExpected[i++]; | |
1600 | assertEquals(UnicodeString(u"Iteration #") + i, | |
1601 | expected, | |
1602 | fineChangesIterator.toString(toString.remove())); | |
1603 | } | |
1604 | Edits::Iterator coarseIterator = edits.getCoarseIterator(); | |
1605 | i = 0; | |
1606 | while (coarseIterator.next(status)) { | |
1607 | UnicodeString expected = coarseIteratorExpected[i++]; | |
1608 | assertEquals(UnicodeString(u"Iteration #") + i, | |
1609 | expected, | |
1610 | coarseIterator.toString(toString.remove())); | |
1611 | } | |
1612 | Edits::Iterator coarseChangesIterator = edits.getCoarseChangesIterator(); | |
1613 | i = 0; | |
1614 | while (coarseChangesIterator.next(status)) { | |
1615 | UnicodeString expected = coarseChangesIteratorExpected[i++]; | |
1616 | assertEquals(UnicodeString(u"Iteration #") + i, | |
1617 | expected, | |
1618 | coarseChangesIterator.toString(toString.remove())); | |
1619 | } | |
1620 | ||
1621 | // Demonstrate the iterator indexing methods: | |
1622 | // fineIterator should have the same behavior as fineChangesIterator, and | |
1623 | // coarseIterator should have the same behavior as coarseChangesIterator. | |
1624 | for (int32_t srcIndex=0; srcIndex<inputLength; srcIndex++) { | |
1625 | fineIterator.findSourceIndex(srcIndex, status); | |
1626 | fineChangesIterator.findSourceIndex(srcIndex, status); | |
1627 | coarseIterator.findSourceIndex(srcIndex, status); | |
1628 | coarseChangesIterator.findSourceIndex(srcIndex, status); | |
1629 | ||
1630 | assertEquals(UnicodeString("Source index: ") + srcIndex, | |
1631 | expectedDestFineEditIndices[srcIndex], | |
1632 | fineIterator.destinationIndex()); | |
1633 | assertEquals(UnicodeString("Source index: ") + srcIndex, | |
1634 | expectedDestFineEditIndices[srcIndex], | |
1635 | fineChangesIterator.destinationIndex()); | |
1636 | assertEquals(UnicodeString("Source index: ") + srcIndex, | |
1637 | expectedDestCoarseEditIndices[srcIndex], | |
1638 | coarseIterator.destinationIndex()); | |
1639 | assertEquals(UnicodeString("Source index: ") + srcIndex, | |
1640 | expectedDestCoarseEditIndices[srcIndex], | |
1641 | coarseChangesIterator.destinationIndex()); | |
1642 | ||
1643 | assertEquals(UnicodeString("Source index: ") + srcIndex, | |
1644 | expectedDestFineStringIndices[srcIndex], | |
1645 | fineIterator.destinationIndexFromSourceIndex(srcIndex, status)); | |
1646 | assertEquals(UnicodeString("Source index: ") + srcIndex, | |
1647 | expectedDestFineStringIndices[srcIndex], | |
1648 | fineChangesIterator.destinationIndexFromSourceIndex(srcIndex, status)); | |
1649 | assertEquals(UnicodeString("Source index: ") + srcIndex, | |
1650 | expectedDestCoarseStringIndices[srcIndex], | |
1651 | coarseIterator.destinationIndexFromSourceIndex(srcIndex, status)); | |
1652 | assertEquals(UnicodeString("Source index: ") + srcIndex, | |
1653 | expectedDestCoarseStringIndices[srcIndex], | |
1654 | coarseChangesIterator.destinationIndexFromSourceIndex(srcIndex, status)); | |
1655 | } | |
1656 | for (int32_t destIndex=0; destIndex<inputLength; destIndex++) { | |
1657 | fineIterator.findDestinationIndex(destIndex, status); | |
1658 | fineChangesIterator.findDestinationIndex(destIndex, status); | |
1659 | coarseIterator.findDestinationIndex(destIndex, status); | |
1660 | coarseChangesIterator.findDestinationIndex(destIndex, status); | |
1661 | ||
1662 | assertEquals(UnicodeString("Destination index: ") + destIndex, | |
1663 | expectedSrcFineEditIndices[destIndex], | |
1664 | fineIterator.sourceIndex()); | |
1665 | assertEquals(UnicodeString("Destination index: ") + destIndex, | |
1666 | expectedSrcFineEditIndices[destIndex], | |
1667 | fineChangesIterator.sourceIndex()); | |
1668 | assertEquals(UnicodeString("Destination index: ") + destIndex, | |
1669 | expectedSrcCoarseEditIndices[destIndex], | |
1670 | coarseIterator.sourceIndex()); | |
1671 | assertEquals(UnicodeString("Destination index: ") + destIndex, | |
1672 | expectedSrcCoarseEditIndices[destIndex], | |
1673 | coarseChangesIterator.sourceIndex()); | |
1674 | ||
1675 | assertEquals(UnicodeString("Destination index: ") + destIndex, | |
1676 | expectedSrcFineStringIndices[destIndex], | |
1677 | fineIterator.sourceIndexFromDestinationIndex(destIndex, status)); | |
1678 | assertEquals(UnicodeString("Destination index: ") + destIndex, | |
1679 | expectedSrcFineStringIndices[destIndex], | |
1680 | fineChangesIterator.sourceIndexFromDestinationIndex(destIndex, status)); | |
1681 | assertEquals(UnicodeString("Destination index: ") + destIndex, | |
1682 | expectedSrcCoarseStringIndices[destIndex], | |
1683 | coarseIterator.sourceIndexFromDestinationIndex(destIndex, status)); | |
1684 | assertEquals(UnicodeString("Destination index: ") + destIndex, | |
1685 | expectedSrcCoarseStringIndices[destIndex], | |
1686 | coarseChangesIterator.sourceIndexFromDestinationIndex(destIndex, status)); | |
1687 | } | |
1688 | } | |
1689 | ||
1690 | void StringCaseTest::TestCaseMapGreekExtended() { | |
1691 | // Ticket 13851 | |
1692 | UnicodeString s(u"\u1F80\u1F88\u1FFC"); | |
1693 | UnicodeString result(s); | |
1694 | result.toLower(Locale::getRoot()); | |
1695 | assertEquals(u"lower", u"\u1F80\u1F80\u1FF3", result); | |
1696 | #if !UCONFIG_NO_BREAK_ITERATION | |
1697 | result = s; | |
1698 | result.toTitle(nullptr, Locale::getRoot()); | |
1699 | assertEquals(u"title", u"\u1F88\u1F80\u1FF3", result); | |
1700 | #endif | |
1701 | } | |
1702 | ||
1703 | //#endif |