]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f | 3 | /******************************************************************** |
729e4ab9 | 4 | * COPYRIGHT: |
2ca993e8 | 5 | * Copyright (c) 1997-2016, International Business Machines Corporation and |
b75a7d8f A |
6 | * others. All Rights Reserved. |
7 | ********************************************************************/ | |
8 | ||
9 | #include "unicode/utypes.h" | |
10 | ||
11 | #if !UCONFIG_NO_COLLATION | |
12 | ||
13 | #include "unicode/coll.h" | |
57a6839d | 14 | #include "unicode/localpointer.h" |
b75a7d8f A |
15 | #include "unicode/tblcoll.h" |
16 | #include "unicode/unistr.h" | |
17 | #include "unicode/sortkey.h" | |
18 | #include "regcoll.h" | |
19 | #include "sfwdchit.h" | |
729e4ab9 A |
20 | #include "testutil.h" |
21 | #include "cmemory.h" | |
b75a7d8f | 22 | |
b75a7d8f A |
23 | CollationRegressionTest::CollationRegressionTest() |
24 | { | |
25 | UErrorCode status = U_ZERO_ERROR; | |
26 | ||
27 | en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); | |
28 | if(U_FAILURE(status)) { | |
29 | delete en_us; | |
30 | en_us = 0; | |
729e4ab9 | 31 | errcheckln(status, "Collator creation failed with %s", u_errorName(status)); |
b75a7d8f A |
32 | return; |
33 | } | |
34 | } | |
35 | ||
36 | CollationRegressionTest::~CollationRegressionTest() | |
37 | { | |
38 | delete en_us; | |
39 | } | |
40 | ||
41 | ||
42 | // @bug 4048446 | |
43 | // | |
44 | // CollationElementIterator.reset() doesn't work | |
45 | // | |
46 | void CollationRegressionTest::Test4048446(/* char* par */) | |
47 | { | |
48 | const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; | |
49 | const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; | |
50 | CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); | |
51 | CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); | |
52 | UErrorCode status = U_ZERO_ERROR; | |
53 | ||
54 | if (i1 == NULL|| i2 == NULL) | |
55 | { | |
56 | errln("Could not create CollationElementIterator's"); | |
57 | delete i1; | |
58 | delete i2; | |
59 | return; | |
60 | } | |
61 | ||
62 | while (i1->next(status) != CollationElementIterator::NULLORDER) | |
63 | { | |
64 | if (U_FAILURE(status)) | |
65 | { | |
66 | errln("error calling next()"); | |
67 | ||
68 | delete i1; | |
69 | delete i2; | |
70 | return; | |
71 | } | |
72 | } | |
73 | ||
74 | i1->reset(); | |
75 | ||
76 | assertEqual(*i1, *i2); | |
77 | ||
78 | delete i1; | |
79 | delete i2; | |
80 | } | |
81 | ||
82 | // @bug 4051866 | |
83 | // | |
84 | // Collator -> rules -> Collator round-trip broken for expanding characters | |
85 | // | |
86 | void CollationRegressionTest::Test4051866(/* char* par */) | |
87 | { | |
b75a7d8f A |
88 | UnicodeString rules; |
89 | UErrorCode status = U_ZERO_ERROR; | |
90 | ||
57a6839d | 91 | rules += "&n < o "; |
b75a7d8f A |
92 | rules += "& oe ,o"; |
93 | rules += (UChar)0x3080; | |
94 | rules += "& oe ,"; | |
95 | rules += (UChar)0x1530; | |
96 | rules += " ,O"; | |
97 | rules += "& OE ,O"; | |
98 | rules += (UChar)0x3080; | |
99 | rules += "& OE ,"; | |
100 | rules += (UChar)0x1520; | |
101 | rules += "< p ,P"; | |
102 | ||
103 | // Build a collator containing expanding characters | |
b331163b A |
104 | LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), status); |
105 | if (U_FAILURE(status)) { | |
106 | errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status)); | |
107 | return; | |
108 | } | |
b75a7d8f A |
109 | |
110 | // Build another using the rules from the first | |
b331163b | 111 | LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status), status); |
57a6839d | 112 | if (U_FAILURE(status)) { |
b331163b | 113 | errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_errorName(status)); |
57a6839d A |
114 | return; |
115 | } | |
b75a7d8f A |
116 | |
117 | // Make sure they're the same | |
118 | if (!(c1->getRules() == c2->getRules())) | |
119 | { | |
120 | errln("Rules are not equal"); | |
121 | } | |
b75a7d8f A |
122 | } |
123 | ||
124 | // @bug 4053636 | |
125 | // | |
126 | // Collator thinks "black-bird" == "black" | |
127 | // | |
128 | void CollationRegressionTest::Test4053636(/* char* par */) | |
129 | { | |
130 | if (en_us->equals("black_bird", "black")) | |
131 | { | |
132 | errln("black-bird == black"); | |
133 | } | |
134 | } | |
135 | ||
136 | // @bug 4054238 | |
137 | // | |
138 | // CollationElementIterator will not work correctly if the associated | |
139 | // Collator object's mode is changed | |
140 | // | |
141 | void CollationRegressionTest::Test4054238(/* char* par */) | |
142 | { | |
143 | const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; | |
144 | const UnicodeString test3(chars3); | |
145 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
146 | ||
147 | // NOTE: The Java code uses en_us to create the CollationElementIterators | |
148 | // but I'm pretty sure that's wrong, so I've changed this to use c. | |
149 | UErrorCode status = U_ZERO_ERROR; | |
150 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
151 | CollationElementIterator *i1 = c->createCollationElementIterator(test3); | |
152 | delete i1; | |
153 | delete c; | |
154 | } | |
155 | ||
156 | // @bug 4054734 | |
157 | // | |
158 | // Collator::IDENTICAL documented but not implemented | |
159 | // | |
160 | void CollationRegressionTest::Test4054734(/* char* par */) | |
161 | { | |
162 | /* | |
163 | Here's the original Java: | |
164 | ||
165 | String[] decomp = { | |
166 | "\u0001", "<", "\u0002", | |
167 | "\u0001", "=", "\u0001", | |
168 | "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise | |
169 | "\u00C0", "=", "A\u0300" // Decomp should make these equal | |
170 | }; | |
729e4ab9 | 171 | |
b75a7d8f A |
172 | String[] nodecomp = { |
173 | "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave | |
174 | }; | |
175 | */ | |
176 | ||
177 | static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
178 | { | |
179 | {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, | |
180 | {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, | |
181 | {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, | |
182 | {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} | |
183 | }; | |
184 | ||
729e4ab9 | 185 | |
b75a7d8f A |
186 | UErrorCode status = U_ZERO_ERROR; |
187 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
188 | ||
189 | c->setStrength(Collator::IDENTICAL); | |
190 | ||
191 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
2ca993e8 | 192 | compareArray(*c, decomp, UPRV_LENGTHOF(decomp)); |
b75a7d8f A |
193 | |
194 | delete c; | |
195 | } | |
196 | ||
197 | // @bug 4054736 | |
198 | // | |
199 | // Full Decomposition mode not implemented | |
200 | // | |
201 | void CollationRegressionTest::Test4054736(/* char* par */) | |
202 | { | |
203 | UErrorCode status = U_ZERO_ERROR; | |
204 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
205 | ||
206 | c->setStrength(Collator::SECONDARY); | |
207 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
208 | ||
209 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
210 | { | |
211 | {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed | |
212 | }; | |
213 | ||
2ca993e8 | 214 | compareArray(*c, tests, UPRV_LENGTHOF(tests)); |
b75a7d8f A |
215 | |
216 | delete c; | |
217 | } | |
218 | ||
219 | // @bug 4058613 | |
220 | // | |
729e4ab9 | 221 | // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean |
b75a7d8f A |
222 | // |
223 | void CollationRegressionTest::Test4058613(/* char* par */) | |
224 | { | |
225 | // Creating a default collator doesn't work when Korean is the default | |
226 | // locale | |
729e4ab9 | 227 | |
b75a7d8f A |
228 | Locale oldDefault = Locale::getDefault(); |
229 | UErrorCode status = U_ZERO_ERROR; | |
729e4ab9 | 230 | |
b75a7d8f A |
231 | Locale::setDefault(Locale::getKorean(), status); |
232 | ||
233 | if (U_FAILURE(status)) | |
234 | { | |
235 | errln("Could not set default locale to Locale::KOREAN"); | |
236 | return; | |
237 | } | |
238 | ||
239 | Collator *c = NULL; | |
729e4ab9 | 240 | |
b75a7d8f A |
241 | c = Collator::createInstance("en_US", status); |
242 | ||
243 | if (c == NULL || U_FAILURE(status)) | |
244 | { | |
245 | errln("Could not create a Korean collator"); | |
246 | Locale::setDefault(oldDefault, status); | |
247 | delete c; | |
248 | return; | |
249 | } | |
729e4ab9 | 250 | |
b75a7d8f A |
251 | // Since the fix to this bug was to turn off decomposition for Korean collators, |
252 | // ensure that's what we got | |
253 | if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) | |
254 | { | |
255 | errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); | |
256 | } | |
257 | ||
258 | delete c; | |
259 | ||
260 | Locale::setDefault(oldDefault, status); | |
261 | } | |
262 | ||
263 | // @bug 4059820 | |
264 | // | |
265 | // RuleBasedCollator.getRules does not return the exact pattern as input | |
266 | // for expanding character sequences | |
267 | // | |
268 | void CollationRegressionTest::Test4059820(/* char* par */) | |
269 | { | |
270 | UErrorCode status = U_ZERO_ERROR; | |
271 | ||
272 | RuleBasedCollator *c = NULL; | |
57a6839d | 273 | UnicodeString rules = "&9 < a < b , c/a < d < z"; |
729e4ab9 | 274 | |
b75a7d8f A |
275 | c = new RuleBasedCollator(rules, status); |
276 | ||
277 | if (c == NULL || U_FAILURE(status)) | |
278 | { | |
279 | errln("Failure building a collator."); | |
280 | delete c; | |
281 | return; | |
282 | } | |
283 | ||
284 | if ( c->getRules().indexOf("c/a") == -1) | |
285 | { | |
286 | errln("returned rules do not contain 'c/a'"); | |
287 | } | |
288 | ||
289 | delete c; | |
290 | } | |
291 | ||
292 | // @bug 4060154 | |
293 | // | |
294 | // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" | |
295 | // | |
296 | void CollationRegressionTest::Test4060154(/* char* par */) | |
297 | { | |
298 | UErrorCode status = U_ZERO_ERROR; | |
299 | UnicodeString rules; | |
300 | ||
57a6839d | 301 | rules += "&f < g, G < h, H < i, I < j, J"; |
b75a7d8f A |
302 | rules += " & H < "; |
303 | rules += (UChar)0x0131; | |
304 | rules += ", "; | |
305 | rules += (UChar)0x0130; | |
306 | rules += ", i, I"; | |
307 | ||
308 | RuleBasedCollator *c = NULL; | |
729e4ab9 | 309 | |
b75a7d8f A |
310 | c = new RuleBasedCollator(rules, status); |
311 | ||
312 | if (c == NULL || U_FAILURE(status)) | |
313 | { | |
314 | errln("failure building collator."); | |
315 | delete c; | |
316 | return; | |
317 | } | |
318 | ||
319 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
320 | ||
321 | /* | |
322 | String[] tertiary = { | |
323 | "A", "<", "B", | |
324 | "H", "<", "\u0131", | |
325 | "H", "<", "I", | |
326 | "\u0131", "<", "\u0130", | |
327 | "\u0130", "<", "i", | |
328 | "\u0130", ">", "H", | |
329 | }; | |
330 | */ | |
331 | ||
332 | static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
333 | { | |
334 | {0x41, 0}, {0x3c, 0}, {0x42, 0}, | |
335 | {0x48, 0}, {0x3c, 0}, {0x0131, 0}, | |
336 | {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
337 | {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, | |
338 | {0x0130, 0}, {0x3c, 0}, {0x69, 0}, | |
339 | {0x0130, 0}, {0x3e, 0}, {0x48, 0} | |
340 | }; | |
341 | ||
342 | c->setStrength(Collator::TERTIARY); | |
2ca993e8 | 343 | compareArray(*c, tertiary, UPRV_LENGTHOF(tertiary)); |
b75a7d8f A |
344 | |
345 | /* | |
346 | String[] secondary = { | |
347 | "H", "<", "I", | |
348 | "\u0131", "=", "\u0130", | |
349 | }; | |
350 | */ | |
351 | static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
352 | { | |
353 | {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
354 | {0x0131, 0}, {0x3d, 0}, {0x0130, 0} | |
355 | }; | |
356 | ||
357 | c->setStrength(Collator::PRIMARY); | |
2ca993e8 | 358 | compareArray(*c, secondary, UPRV_LENGTHOF(secondary)); |
b75a7d8f A |
359 | |
360 | delete c; | |
73c04bcf | 361 | } |
b75a7d8f A |
362 | |
363 | // @bug 4062418 | |
364 | // | |
365 | // Secondary/Tertiary comparison incorrect in French Secondary | |
366 | // | |
367 | void CollationRegressionTest::Test4062418(/* char* par */) | |
368 | { | |
369 | UErrorCode status = U_ZERO_ERROR; | |
370 | ||
371 | RuleBasedCollator *c = NULL; | |
729e4ab9 A |
372 | |
373 | c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status); | |
b75a7d8f A |
374 | |
375 | if (c == NULL || U_FAILURE(status)) | |
376 | { | |
729e4ab9 | 377 | errln("Failed to create collator for Locale::getCanadaFrench()"); |
b75a7d8f A |
378 | delete c; |
379 | return; | |
380 | } | |
381 | ||
382 | c->setStrength(Collator::SECONDARY); | |
383 | ||
384 | /* | |
385 | String[] tests = { | |
386 | "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater | |
387 | }; | |
388 | */ | |
389 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
390 | { | |
391 | {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0} | |
392 | }; | |
393 | ||
2ca993e8 | 394 | compareArray(*c, tests, UPRV_LENGTHOF(tests)); |
b75a7d8f A |
395 | |
396 | delete c; | |
397 | } | |
398 | ||
399 | // @bug 4065540 | |
400 | // | |
401 | // Collator::compare() method broken if either string contains spaces | |
402 | // | |
403 | void CollationRegressionTest::Test4065540(/* char* par */) | |
404 | { | |
405 | if (en_us->compare("abcd e", "abcd f") == 0) | |
406 | { | |
407 | errln("'abcd e' == 'abcd f'"); | |
408 | } | |
409 | } | |
410 | ||
411 | // @bug 4066189 | |
412 | // | |
413 | // Unicode characters need to be recursively decomposed to get the | |
414 | // correct result. For example, | |
415 | // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. | |
416 | // | |
417 | void CollationRegressionTest::Test4066189(/* char* par */) | |
418 | { | |
419 | static const UChar chars1[] = {0x1EB1, 0}; | |
420 | static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; | |
421 | const UnicodeString test1(chars1); | |
422 | const UnicodeString test2(chars2); | |
423 | UErrorCode status = U_ZERO_ERROR; | |
424 | ||
425 | // NOTE: The java code used en_us to create the | |
426 | // CollationElementIterator's. I'm pretty sure that | |
427 | // was wrong, so I've change the code to use c1 and c2 | |
428 | RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); | |
429 | c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
430 | CollationElementIterator *i1 = c1->createCollationElementIterator(test1); | |
431 | ||
432 | RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); | |
433 | c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
434 | CollationElementIterator *i2 = c2->createCollationElementIterator(test2); | |
435 | ||
436 | assertEqual(*i1, *i2); | |
437 | ||
438 | delete i2; | |
439 | delete c2; | |
440 | delete i1; | |
441 | delete c1; | |
442 | } | |
443 | ||
444 | // @bug 4066696 | |
445 | // | |
446 | // French secondary collation checking at the end of compare iteration fails | |
447 | // | |
448 | void CollationRegressionTest::Test4066696(/* char* par */) | |
449 | { | |
450 | UErrorCode status = U_ZERO_ERROR; | |
451 | RuleBasedCollator *c = NULL; | |
729e4ab9 A |
452 | |
453 | c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status); | |
b75a7d8f A |
454 | |
455 | if (c == NULL || U_FAILURE(status)) | |
456 | { | |
729e4ab9 | 457 | errln("Failure creating collator for Locale::getCanadaFrench()"); |
b75a7d8f A |
458 | delete c; |
459 | return; | |
460 | } | |
461 | ||
462 | c->setStrength(Collator::SECONDARY); | |
463 | ||
464 | /* | |
465 | String[] tests = { | |
466 | "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute | |
467 | }; | |
468 | ||
469 | should be: | |
470 | ||
471 | String[] tests = { | |
472 | "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute | |
473 | }; | |
474 | ||
475 | */ | |
476 | ||
477 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
478 | { | |
479 | {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} | |
480 | }; | |
481 | ||
2ca993e8 | 482 | compareArray(*c, tests, UPRV_LENGTHOF(tests)); |
b75a7d8f A |
483 | |
484 | delete c; | |
485 | } | |
486 | ||
487 | // @bug 4076676 | |
488 | // | |
489 | // Bad canonicalization of same-class combining characters | |
490 | // | |
491 | void CollationRegressionTest::Test4076676(/* char* par */) | |
492 | { | |
493 | // These combining characters are all in the same class, so they should not | |
494 | // be reordered, and they should compare as unequal. | |
495 | static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; | |
496 | static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; | |
497 | ||
498 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
499 | c->setStrength(Collator::TERTIARY); | |
500 | ||
501 | if (c->compare(s1,s2) == 0) | |
502 | { | |
503 | errln("Same-class combining chars were reordered"); | |
504 | } | |
505 | ||
506 | delete c; | |
507 | } | |
508 | ||
509 | // @bug 4079231 | |
510 | // | |
511 | // RuleBasedCollator::operator==(NULL) throws NullPointerException | |
512 | // | |
513 | void CollationRegressionTest::Test4079231(/* char* par */) | |
514 | { | |
515 | // I don't think there's any way to write this test | |
516 | // in C++. The following is equivalent to the Java, | |
517 | // but doesn't compile 'cause NULL can't be converted | |
518 | // to Collator& | |
519 | // | |
520 | // if (en_us->operator==(NULL)) | |
521 | // { | |
522 | // errln("en_us->operator==(NULL) returned TRUE"); | |
523 | // } | |
524 | ||
525 | /* | |
526 | try { | |
527 | if (en_us->equals(null)) { | |
528 | errln("en_us->equals(null) returned true"); | |
529 | } | |
530 | } | |
531 | catch (Exception e) { | |
532 | errln("en_us->equals(null) threw " + e.toString()); | |
533 | } | |
534 | */ | |
535 | } | |
536 | ||
537 | // @bug 4078588 | |
538 | // | |
539 | // RuleBasedCollator breaks on "< a < bb" rule | |
540 | // | |
541 | void CollationRegressionTest::Test4078588(/* char *par */) | |
542 | { | |
543 | UErrorCode status = U_ZERO_ERROR; | |
57a6839d | 544 | RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status); |
b75a7d8f A |
545 | |
546 | if (rbc == NULL || U_FAILURE(status)) | |
547 | { | |
548 | errln("Failed to create RuleBasedCollator."); | |
549 | delete rbc; | |
550 | return; | |
551 | } | |
552 | ||
553 | Collator::EComparisonResult result = rbc->compare("a","bb"); | |
554 | ||
555 | if (result != Collator::LESS) | |
556 | { | |
557 | errln((UnicodeString)"Compare(a,bb) returned " + (int)result | |
558 | + (UnicodeString)"; expected -1"); | |
559 | } | |
560 | ||
561 | delete rbc; | |
562 | } | |
563 | ||
564 | // @bug 4081866 | |
565 | // | |
566 | // Combining characters in different classes not reordered properly. | |
567 | // | |
568 | void CollationRegressionTest::Test4081866(/* char* par */) | |
569 | { | |
570 | // These combining characters are all in different classes, | |
571 | // so they should be reordered and the strings should compare as equal. | |
572 | static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; | |
573 | static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; | |
574 | ||
575 | UErrorCode status = U_ZERO_ERROR; | |
576 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
577 | c->setStrength(Collator::TERTIARY); | |
729e4ab9 | 578 | |
b75a7d8f A |
579 | // Now that the default collators are set to NO_DECOMPOSITION |
580 | // (as a result of fixing bug 4114077), we must set it explicitly | |
581 | // when we're testing reordering behavior. -- lwerner, 5/5/98 | |
582 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
583 | ||
584 | if (c->compare(s1,s2) != 0) | |
585 | { | |
586 | errln("Combining chars were not reordered"); | |
587 | } | |
588 | ||
589 | delete c; | |
590 | } | |
591 | ||
592 | // @bug 4087241 | |
593 | // | |
594 | // string comparison errors in Scandinavian collators | |
595 | // | |
596 | void CollationRegressionTest::Test4087241(/* char* par */) | |
597 | { | |
598 | UErrorCode status = U_ZERO_ERROR; | |
599 | Locale da_DK("da", "DK"); | |
600 | RuleBasedCollator *c = NULL; | |
729e4ab9 | 601 | |
b75a7d8f A |
602 | c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); |
603 | ||
604 | if (c == NULL || U_FAILURE(status)) | |
605 | { | |
606 | errln("Failed to create collator for da_DK locale"); | |
607 | delete c; | |
608 | return; | |
609 | } | |
610 | ||
611 | c->setStrength(Collator::SECONDARY); | |
612 | ||
613 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
614 | { | |
615 | {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae | |
57a6839d | 616 | {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring |
b75a7d8f A |
617 | {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut |
618 | }; | |
619 | ||
2ca993e8 | 620 | compareArray(*c, tests, UPRV_LENGTHOF(tests)); |
b75a7d8f A |
621 | |
622 | delete c; | |
623 | } | |
624 | ||
625 | // @bug 4087243 | |
626 | // | |
627 | // CollationKey takes ignorable strings into account when it shouldn't | |
628 | // | |
629 | void CollationRegressionTest::Test4087243(/* char* par */) | |
630 | { | |
631 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
632 | c->setStrength(Collator::TERTIARY); | |
633 | ||
634 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
635 | { | |
636 | {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A | |
637 | }; | |
638 | ||
2ca993e8 | 639 | compareArray(*c, tests, UPRV_LENGTHOF(tests)); |
b75a7d8f A |
640 | |
641 | delete c; | |
642 | } | |
643 | ||
644 | // @bug 4092260 | |
645 | // | |
646 | // Mu/micro conflict | |
647 | // Micro symbol and greek lowercase letter Mu should sort identically | |
648 | // | |
649 | void CollationRegressionTest::Test4092260(/* char* par */) | |
650 | { | |
651 | UErrorCode status = U_ZERO_ERROR; | |
652 | Locale el("el", ""); | |
653 | Collator *c = NULL; | |
729e4ab9 | 654 | |
b75a7d8f A |
655 | c = Collator::createInstance(el, status); |
656 | ||
657 | if (c == NULL || U_FAILURE(status)) | |
658 | { | |
659 | errln("Failed to create collator for el locale."); | |
660 | delete c; | |
661 | return; | |
662 | } | |
663 | ||
664 | // These now have tertiary differences in UCA | |
665 | c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
666 | ||
667 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
668 | { | |
669 | {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} | |
670 | }; | |
671 | ||
2ca993e8 | 672 | compareArray(*c, tests, UPRV_LENGTHOF(tests)); |
b75a7d8f A |
673 | |
674 | delete c; | |
675 | } | |
676 | ||
677 | // @bug 4095316 | |
678 | // | |
679 | void CollationRegressionTest::Test4095316(/* char* par */) | |
680 | { | |
681 | UErrorCode status = U_ZERO_ERROR; | |
682 | Locale el_GR("el", "GR"); | |
683 | Collator *c = Collator::createInstance(el_GR, status); | |
684 | ||
685 | if (c == NULL || U_FAILURE(status)) | |
686 | { | |
687 | errln("Failed to create collator for el_GR locale"); | |
688 | delete c; | |
689 | return; | |
690 | } | |
691 | // These now have tertiary differences in UCA | |
692 | //c->setStrength(Collator::TERTIARY); | |
693 | c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
694 | ||
695 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
696 | { | |
697 | {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} | |
698 | }; | |
699 | ||
2ca993e8 | 700 | compareArray(*c, tests, UPRV_LENGTHOF(tests)); |
b75a7d8f A |
701 | |
702 | delete c; | |
703 | } | |
704 | ||
705 | // @bug 4101940 | |
706 | // | |
707 | void CollationRegressionTest::Test4101940(/* char* par */) | |
708 | { | |
709 | UErrorCode status = U_ZERO_ERROR; | |
710 | RuleBasedCollator *c = NULL; | |
57a6839d | 711 | UnicodeString rules = "&9 < a < b"; |
b75a7d8f | 712 | UnicodeString nothing = ""; |
729e4ab9 | 713 | |
b75a7d8f A |
714 | c = new RuleBasedCollator(rules, status); |
715 | ||
716 | if (c == NULL || U_FAILURE(status)) | |
717 | { | |
718 | errln("Failed to create RuleBasedCollator"); | |
719 | delete c; | |
720 | return; | |
721 | } | |
722 | ||
723 | CollationElementIterator *i = c->createCollationElementIterator(nothing); | |
724 | i->reset(); | |
725 | ||
726 | if (i->next(status) != CollationElementIterator::NULLORDER) | |
727 | { | |
728 | errln("next did not return NULLORDER"); | |
729 | } | |
730 | ||
731 | delete i; | |
732 | delete c; | |
733 | } | |
734 | ||
735 | // @bug 4103436 | |
736 | // | |
737 | // Collator::compare not handling spaces properly | |
738 | // | |
739 | void CollationRegressionTest::Test4103436(/* char* par */) | |
740 | { | |
741 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
742 | c->setStrength(Collator::TERTIARY); | |
743 | ||
744 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
745 | { | |
746 | {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, | |
747 | {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0} | |
748 | }; | |
749 | ||
2ca993e8 | 750 | compareArray(*c, tests, UPRV_LENGTHOF(tests)); |
b75a7d8f A |
751 | |
752 | delete c; | |
753 | } | |
754 | ||
755 | // @bug 4114076 | |
756 | // | |
757 | // Collation not Unicode conformant with Hangul syllables | |
758 | // | |
759 | void CollationRegressionTest::Test4114076(/* char* par */) | |
760 | { | |
761 | UErrorCode status = U_ZERO_ERROR; | |
762 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
763 | c->setStrength(Collator::TERTIARY); | |
764 | ||
765 | // | |
766 | // With Canonical decomposition, Hangul syllables should get decomposed | |
767 | // into Jamo, but Jamo characters should not be decomposed into | |
768 | // conjoining Jamo | |
769 | // | |
770 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
771 | { | |
772 | {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} | |
773 | }; | |
774 | ||
775 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
2ca993e8 | 776 | compareArray(*c, test1, UPRV_LENGTHOF(test1)); |
b75a7d8f A |
777 | |
778 | // From UTR #15: | |
779 | // *In earlier versions of Unicode, jamo characters like ksf | |
729e4ab9 | 780 | // had compatibility mappings to kf + sf. These mappings were |
b75a7d8f A |
781 | // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.) |
782 | // That is, the following test is obsolete as of 2.1.9 | |
783 | ||
784 | //obsolete- // With Full decomposition, it should go all the way down to | |
785 | //obsolete- // conjoining Jamo characters. | |
786 | //obsolete- // | |
787 | //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
788 | //obsolete- { | |
789 | //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0} | |
790 | //obsolete- }; | |
791 | //obsolete- | |
792 | //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); | |
2ca993e8 | 793 | //obsolete- compareArray(*c, test2, UPRV_LENGTHOF(test2)); |
b75a7d8f A |
794 | |
795 | delete c; | |
796 | } | |
797 | ||
798 | ||
799 | // @bug 4124632 | |
800 | // | |
801 | // Collator::getCollationKey was hanging on certain character sequences | |
802 | // | |
803 | void CollationRegressionTest::Test4124632(/* char* par */) | |
804 | { | |
805 | UErrorCode status = U_ZERO_ERROR; | |
806 | Collator *coll = NULL; | |
729e4ab9 | 807 | |
b75a7d8f | 808 | coll = Collator::createInstance(Locale::getJapan(), status); |
729e4ab9 | 809 | |
b75a7d8f A |
810 | if (coll == NULL || U_FAILURE(status)) |
811 | { | |
812 | errln("Failed to create collator for Locale::JAPAN"); | |
813 | delete coll; | |
46f4442e | 814 | return; |
b75a7d8f A |
815 | } |
816 | ||
817 | static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; | |
818 | CollationKey key; | |
729e4ab9 | 819 | |
b75a7d8f A |
820 | coll->getCollationKey(test, key, status); |
821 | ||
822 | if (key.isBogus() || U_FAILURE(status)) | |
823 | { | |
824 | errln("CollationKey creation failed."); | |
825 | } | |
826 | ||
827 | delete coll; | |
828 | } | |
829 | ||
830 | // @bug 4132736 | |
831 | // | |
832 | // sort order of french words with multiple accents has errors | |
833 | // | |
834 | void CollationRegressionTest::Test4132736(/* char* par */) | |
835 | { | |
836 | UErrorCode status = U_ZERO_ERROR; | |
837 | ||
838 | Collator *c = NULL; | |
729e4ab9 A |
839 | |
840 | c = Collator::createInstance(Locale::getCanadaFrench(), status); | |
b75a7d8f | 841 | c->setStrength(Collator::TERTIARY); |
729e4ab9 | 842 | |
b75a7d8f A |
843 | if (c == NULL || U_FAILURE(status)) |
844 | { | |
729e4ab9 | 845 | errln("Failed to create a collator for Locale::getCanadaFrench()"); |
b75a7d8f | 846 | delete c; |
46f4442e | 847 | return; |
b75a7d8f A |
848 | } |
849 | ||
850 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
851 | { | |
852 | {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0}, | |
853 | {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} | |
854 | }; | |
855 | ||
2ca993e8 | 856 | compareArray(*c, test1, UPRV_LENGTHOF(test1)); |
b75a7d8f A |
857 | |
858 | delete c; | |
859 | } | |
860 | ||
861 | // @bug 4133509 | |
862 | // | |
863 | // The sorting using java.text.CollationKey is not in the exact order | |
864 | // | |
865 | void CollationRegressionTest::Test4133509(/* char* par */) | |
866 | { | |
867 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
868 | { | |
869 | {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0}, | |
870 | {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, | |
871 | {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} | |
872 | }; | |
873 | ||
2ca993e8 | 874 | compareArray(*en_us, test1, UPRV_LENGTHOF(test1)); |
b75a7d8f A |
875 | } |
876 | ||
877 | // @bug 4114077 | |
878 | // | |
729e4ab9 | 879 | // Collation with decomposition off doesn't work for Europe |
b75a7d8f A |
880 | // |
881 | void CollationRegressionTest::Test4114077(/* char* par */) | |
882 | { | |
883 | // Ensure that we get the same results with decomposition off | |
884 | // as we do with it on.... | |
729e4ab9 | 885 | |
b75a7d8f A |
886 | UErrorCode status = U_ZERO_ERROR; |
887 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
888 | c->setStrength(Collator::TERTIARY); | |
729e4ab9 | 889 | |
b75a7d8f A |
890 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = |
891 | { | |
892 | {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent | |
893 | {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0}, | |
894 | {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, | |
895 | {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute | |
896 | // -> a, ring, acute | |
897 | {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal | |
898 | }; | |
899 | ||
900 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
2ca993e8 | 901 | compareArray(*c, test1, UPRV_LENGTHOF(test1)); |
b75a7d8f A |
902 | |
903 | static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
904 | { | |
905 | {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal | |
906 | }; | |
907 | ||
908 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
2ca993e8 | 909 | compareArray(*c, test2, UPRV_LENGTHOF(test2)); |
b75a7d8f A |
910 | |
911 | delete c; | |
912 | } | |
913 | ||
914 | // @bug 4141640 | |
915 | // | |
729e4ab9 | 916 | // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) |
b75a7d8f A |
917 | // |
918 | void CollationRegressionTest::Test4141640(/* char* par */) | |
919 | { | |
920 | // | |
921 | // Rather than just creating a Swedish collator, we might as well | |
922 | // try to instantiate one for every locale available on the system | |
923 | // in order to prevent this sort of bug from cropping up in the future | |
924 | // | |
925 | UErrorCode status = U_ZERO_ERROR; | |
926 | int32_t i, localeCount; | |
927 | const Locale *locales = Locale::getAvailableLocales(localeCount); | |
729e4ab9 | 928 | |
b75a7d8f A |
929 | for (i = 0; i < localeCount; i += 1) |
930 | { | |
931 | Collator *c = NULL; | |
932 | ||
933 | status = U_ZERO_ERROR; | |
934 | c = Collator::createInstance(locales[i], status); | |
935 | ||
936 | if (c == NULL || U_FAILURE(status)) | |
937 | { | |
938 | UnicodeString msg, localeName; | |
939 | ||
940 | msg += "Could not create collator for locale "; | |
941 | msg += locales[i].getName(); | |
942 | ||
943 | errln(msg); | |
944 | } | |
945 | ||
946 | delete c; | |
947 | } | |
948 | } | |
949 | ||
950 | // @bug 4139572 | |
951 | // | |
729e4ab9 | 952 | // getCollationKey throws exception for spanish text |
b75a7d8f A |
953 | // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 |
954 | // | |
955 | void CollationRegressionTest::Test4139572(/* char* par */) | |
956 | { | |
957 | // | |
958 | // Code pasted straight from the bug report | |
959 | // (and then translated to C++ ;-) | |
960 | // | |
961 | // create spanish locale and collator | |
962 | UErrorCode status = U_ZERO_ERROR; | |
963 | Locale l("es", "es"); | |
964 | Collator *col = NULL; | |
729e4ab9 | 965 | |
b75a7d8f A |
966 | col = Collator::createInstance(l, status); |
967 | ||
968 | if (col == NULL || U_FAILURE(status)) | |
969 | { | |
970 | errln("Failed to create a collator for es_es locale."); | |
971 | delete col; | |
972 | return; | |
973 | } | |
974 | ||
975 | CollationKey key; | |
976 | ||
977 | // this spanish phrase kills it! | |
978 | col->getCollationKey("Nombre De Objeto", key, status); | |
979 | ||
980 | if (key.isBogus() || U_FAILURE(status)) | |
981 | { | |
982 | errln("Error creating CollationKey for \"Nombre De Ojbeto\""); | |
983 | } | |
984 | ||
985 | delete col; | |
986 | } | |
987 | /* HSYS : RuleBasedCollator::compare() performance enhancements | |
988 | compare() does not create CollationElementIterator() anymore.*/ | |
729e4ab9 | 989 | |
b75a7d8f A |
990 | class My4146160Collator : public RuleBasedCollator |
991 | { | |
992 | public: | |
993 | My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); | |
994 | ~My4146160Collator(); | |
995 | ||
996 | CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const; | |
997 | ||
998 | CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const; | |
729e4ab9 | 999 | |
b75a7d8f A |
1000 | static int32_t count; |
1001 | }; | |
1002 | ||
1003 | int32_t My4146160Collator::count = 0; | |
1004 | ||
1005 | My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) | |
1006 | : RuleBasedCollator(rbc.getRules(), status) | |
1007 | { | |
1008 | } | |
1009 | ||
1010 | My4146160Collator::~My4146160Collator() | |
1011 | { | |
1012 | } | |
1013 | ||
1014 | CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const | |
1015 | { | |
1016 | count += 1; | |
1017 | return RuleBasedCollator::createCollationElementIterator(text); | |
1018 | } | |
1019 | ||
1020 | CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const | |
1021 | { | |
1022 | count += 1; | |
1023 | return RuleBasedCollator::createCollationElementIterator(text); | |
1024 | } | |
1025 | ||
1026 | // @bug 4146160 | |
1027 | // | |
1028 | // RuleBasedCollator doesn't use createCollationElementIterator internally | |
1029 | // | |
1030 | void CollationRegressionTest::Test4146160(/* char* par */) | |
1031 | { | |
1032 | #if 0 | |
1033 | // | |
1034 | // Use a custom collator class whose createCollationElementIterator | |
1035 | // methods increment a count.... | |
1036 | // | |
1037 | UErrorCode status = U_ZERO_ERROR; | |
1038 | CollationKey key; | |
1039 | ||
1040 | My4146160Collator::count = 0; | |
1041 | My4146160Collator *mc = NULL; | |
729e4ab9 | 1042 | |
b75a7d8f A |
1043 | mc = new My4146160Collator(*en_us, status); |
1044 | ||
1045 | if (mc == NULL || U_FAILURE(status)) | |
1046 | { | |
1047 | errln("Failed to create a My4146160Collator."); | |
1048 | delete mc; | |
1049 | return; | |
1050 | } | |
1051 | ||
1052 | mc->getCollationKey("1", key, status); | |
1053 | ||
1054 | if (key.isBogus() || U_FAILURE(status)) | |
1055 | { | |
1056 | errln("Failure to get a CollationKey from a My4146160Collator."); | |
1057 | delete mc; | |
1058 | return; | |
1059 | } | |
1060 | ||
1061 | if (My4146160Collator::count < 1) | |
1062 | { | |
1063 | errln("My4146160Collator::createCollationElementIterator not called for getCollationKey"); | |
1064 | } | |
1065 | ||
1066 | My4146160Collator::count = 0; | |
1067 | mc->compare("1", "2"); | |
1068 | ||
1069 | if (My4146160Collator::count < 1) | |
1070 | { | |
1071 | errln("My4146160Collator::createtCollationElementIterator not called for compare"); | |
1072 | } | |
1073 | ||
1074 | delete mc; | |
1075 | #endif | |
1076 | } | |
729e4ab9 | 1077 | |
57a6839d A |
1078 | void CollationRegressionTest::Test4179216() { |
1079 | // you can position a CollationElementIterator in the middle of | |
1080 | // a contracting character sequence, yielding a bogus collation | |
1081 | // element | |
1082 | IcuTestErrorCode errorCode(*this, "Test4179216"); | |
1083 | RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode); | |
1084 | UnicodeString testText = "church church catcatcher runcrunchynchy"; | |
1085 | CollationElementIterator *iter = coll.createCollationElementIterator(testText); | |
1086 | ||
1087 | // test that the "ch" combination works properly | |
1088 | iter->setOffset(4, errorCode); | |
1089 | int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1090 | ||
1091 | iter->reset(); | |
1092 | int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1093 | ||
1094 | iter->setOffset(5, errorCode); | |
1095 | int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1096 | ||
1097 | // Compares and prints only 16-bit primary weights. | |
1098 | if (elt4 != elt0 || elt5 != elt0) { | |
1099 | errln("The collation elements at positions 0 (0x%04x), " | |
1100 | "4 (0x%04x), and 5 (0x%04x) don't match.", | |
1101 | elt0, elt4, elt5); | |
1102 | } | |
1103 | ||
1104 | // test that the "cat" combination works properly | |
1105 | iter->setOffset(14, errorCode); | |
1106 | int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1107 | ||
1108 | iter->setOffset(15, errorCode); | |
1109 | int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1110 | ||
1111 | iter->setOffset(16, errorCode); | |
1112 | int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1113 | ||
1114 | iter->setOffset(17, errorCode); | |
1115 | int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1116 | ||
1117 | iter->setOffset(18, errorCode); | |
1118 | int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1119 | ||
1120 | iter->setOffset(19, errorCode); | |
1121 | int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1122 | ||
1123 | // Compares and prints only 16-bit primary weights. | |
1124 | if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 | |
1125 | || elt14 != elt18 || elt14 != elt19) { | |
1126 | errln("\"cat\" elements don't match: elt14 = 0x%04x, " | |
1127 | "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, " | |
1128 | "elt18 = 0x%04x, elt19 = 0x%04x", | |
1129 | elt14, elt15, elt16, elt17, elt18, elt19); | |
1130 | } | |
1131 | ||
1132 | // now generate a complete list of the collation elements, | |
1133 | // first using next() and then using setOffset(), and | |
1134 | // make sure both interfaces return the same set of elements | |
1135 | iter->reset(); | |
1136 | ||
1137 | int32_t elt = iter->next(errorCode); | |
1138 | int32_t count = 0; | |
1139 | while (elt != CollationElementIterator::NULLORDER) { | |
1140 | ++count; | |
1141 | elt = iter->next(errorCode); | |
1142 | } | |
1143 | ||
1144 | LocalArray<UnicodeString> nextElements(new UnicodeString[count]); | |
1145 | LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]); | |
1146 | int32_t lastPos = 0; | |
1147 | ||
1148 | iter->reset(); | |
1149 | elt = iter->next(errorCode); | |
1150 | count = 0; | |
1151 | while (elt != CollationElementIterator::NULLORDER) { | |
1152 | nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); | |
1153 | lastPos = iter->getOffset(); | |
1154 | elt = iter->next(errorCode); | |
1155 | } | |
1156 | int32_t nextElementsLength = count; | |
1157 | count = 0; | |
1158 | for (int32_t i = 0; i < testText.length(); ) { | |
1159 | iter->setOffset(i, errorCode); | |
1160 | lastPos = iter->getOffset(); | |
1161 | elt = iter->next(errorCode); | |
1162 | setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); | |
1163 | i = iter->getOffset(); | |
1164 | } | |
1165 | for (int32_t i = 0; i < nextElementsLength; i++) { | |
1166 | if (nextElements[i] == setOffsetElements[i]) { | |
1167 | logln(nextElements[i]); | |
1168 | } else { | |
1169 | errln(UnicodeString("Error: next() yielded ") + nextElements[i] + | |
1170 | ", but setOffset() yielded " + setOffsetElements[i]); | |
1171 | } | |
1172 | } | |
1173 | delete iter; | |
1174 | } | |
1175 | ||
729e4ab9 A |
1176 | // Ticket 7189 |
1177 | // | |
1178 | // nextSortKeyPart incorrect for EO_S1 collation | |
1179 | static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) { | |
1180 | UCharIterator uiter; | |
1181 | uint32_t state[2] = { 0, 0 }; | |
1182 | int32_t keyLen; | |
1183 | int32_t count = 8; | |
1184 | ||
1185 | uiter_setString(&uiter, text, len); | |
1186 | keyLen = 0; | |
1187 | while (TRUE) { | |
1188 | int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status); | |
1189 | if (U_FAILURE(status)) { | |
1190 | return -1; | |
1191 | } | |
1192 | if (keyPartLen == 0) { | |
1193 | break; | |
1194 | } | |
1195 | keyLen += keyPartLen; | |
1196 | } | |
1197 | return keyLen; | |
1198 | } | |
1199 | ||
1200 | void CollationRegressionTest::TestT7189() { | |
1201 | UErrorCode status = U_ZERO_ERROR; | |
1202 | UCollator *coll; | |
1203 | uint32_t i; | |
1204 | ||
1205 | static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
1206 | // "Achter De Hoven" | |
1207 | { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, | |
1208 | // "ABC" | |
1209 | { 0x41, 0x42, 0x43, 0x00 }, | |
1210 | // "HELLO world!" | |
1211 | { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } | |
1212 | }; | |
1213 | ||
1214 | static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
1215 | // "Achter de Hoven" | |
1216 | { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, | |
1217 | // "abc" | |
1218 | { 0x61, 0x62, 0x63, 0x00 }, | |
1219 | // "hello world!" | |
1220 | { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } | |
1221 | }; | |
1222 | ||
1223 | // Open the collator | |
1224 | coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); | |
1225 | if (U_FAILURE(status)) { | |
1226 | errln("Failed to create a collator for short string EO_S1"); | |
1227 | return; | |
1228 | } | |
1229 | ||
2ca993e8 | 1230 | for (i = 0; i < UPRV_LENGTHOF(text1); i++) { |
729e4ab9 A |
1231 | uint8_t key1[100], key2[100]; |
1232 | int32_t len1, len2; | |
1233 | ||
1234 | len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status); | |
1235 | if (U_FAILURE(status)) { | |
1236 | errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]); | |
1237 | break; | |
1238 | } | |
1239 | len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status); | |
1240 | if (U_FAILURE(status)) { | |
1241 | errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]); | |
1242 | break; | |
1243 | } | |
1244 | ||
1245 | if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { | |
1246 | errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1)); | |
1247 | } else { | |
1248 | logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : " | |
1249 | + TestUtility::hex(key2, len2)); | |
1250 | } | |
1251 | } | |
1252 | ucol_close(coll); | |
1253 | } | |
1254 | ||
4388f060 A |
1255 | void CollationRegressionTest::TestCaseFirstCompression() { |
1256 | RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone(); | |
1257 | UErrorCode status = U_ZERO_ERROR; | |
1258 | ||
1259 | // default | |
1260 | caseFirstCompressionSub(col, "default"); | |
1261 | ||
1262 | // Upper first | |
1263 | col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); | |
1264 | if (U_FAILURE(status)) { | |
1265 | errln("Failed to set UCOL_UPPER_FIRST"); | |
1266 | return; | |
1267 | } | |
1268 | caseFirstCompressionSub(col, "upper first"); | |
1269 | ||
1270 | // Lower first | |
1271 | col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); | |
1272 | if (U_FAILURE(status)) { | |
1273 | errln("Failed to set UCOL_LOWER_FIRST"); | |
1274 | return; | |
1275 | } | |
1276 | caseFirstCompressionSub(col, "lower first"); | |
1277 | ||
1278 | delete col; | |
1279 | } | |
1280 | ||
1281 | void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) { | |
1282 | const int32_t maxLength = 50; | |
1283 | ||
1284 | UChar str1[maxLength]; | |
1285 | UChar str2[maxLength]; | |
1286 | ||
1287 | CollationKey key1, key2; | |
1288 | ||
1289 | for (int32_t len = 1; len <= maxLength; len++) { | |
1290 | int32_t i = 0; | |
1291 | for (; i < len - 1; i++) { | |
1292 | str1[i] = str2[i] = (UChar)0x61; // 'a' | |
1293 | } | |
1294 | str1[i] = (UChar)0x41; // 'A' | |
1295 | str2[i] = (UChar)0x61; // 'a' | |
1296 | ||
1297 | UErrorCode status = U_ZERO_ERROR; | |
1298 | col->getCollationKey(str1, len, key1, status); | |
1299 | col->getCollationKey(str2, len, key2, status); | |
1300 | ||
1301 | UCollationResult cmpKey = key1.compareTo(key2, status); | |
1302 | UCollationResult cmpCol = col->compare(str1, len, str2, len, status); | |
1303 | ||
1304 | if (U_FAILURE(status)) { | |
1305 | errln("Error in caseFirstCompressionSub"); | |
1306 | } else if (cmpKey != cmpCol) { | |
1307 | errln((UnicodeString)"Inconsistent comparison(" + opt | |
1308 | + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len) | |
1309 | + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol); | |
1310 | } | |
1311 | } | |
1312 | } | |
1313 | ||
57a6839d A |
1314 | void CollationRegressionTest::TestTrailingComment() { |
1315 | // ICU ticket #8070: | |
1316 | // Check that the rule parser handles a comment without terminating end-of-line. | |
1317 | IcuTestErrorCode errorCode(*this, "TestTrailingComment"); | |
1318 | RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode); | |
1319 | UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63); | |
1320 | assertTrue("c<b", coll.compare(c, b) < 0); | |
1321 | assertTrue("b<a", coll.compare(b, a) < 0); | |
1322 | } | |
4388f060 | 1323 | |
57a6839d A |
1324 | void CollationRegressionTest::TestBeforeWithTooStrongAfter() { |
1325 | // ICU ticket #9959: | |
1326 | // Forbid rules with a before-reset followed by a stronger relation. | |
1327 | IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter"); | |
1328 | RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode); | |
1329 | if(errorCode.isSuccess()) { | |
1330 | errln("should forbid before-2-reset followed by primary relation"); | |
1331 | } else { | |
1332 | errorCode.reset(); | |
1333 | } | |
1334 | RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode); | |
1335 | if(errorCode.isSuccess()) { | |
1336 | errln("should forbid before-3-reset followed by primary or secondary relation"); | |
1337 | } else { | |
1338 | errorCode.reset(); | |
1339 | } | |
1340 | } | |
4388f060 | 1341 | |
b75a7d8f A |
1342 | void CollationRegressionTest::compareArray(Collator &c, |
1343 | const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN], | |
1344 | int32_t testCount) | |
1345 | { | |
1346 | int32_t i; | |
1347 | Collator::EComparisonResult expectedResult = Collator::EQUAL; | |
1348 | ||
1349 | for (i = 0; i < testCount; i += 3) | |
1350 | { | |
1351 | UnicodeString source(tests[i]); | |
1352 | UnicodeString comparison(tests[i + 1]); | |
1353 | UnicodeString target(tests[i + 2]); | |
1354 | ||
1355 | if (comparison == "<") | |
1356 | { | |
1357 | expectedResult = Collator::LESS; | |
1358 | } | |
1359 | else if (comparison == ">") | |
1360 | { | |
1361 | expectedResult = Collator::GREATER; | |
1362 | } | |
1363 | else if (comparison == "=") | |
1364 | { | |
1365 | expectedResult = Collator::EQUAL; | |
1366 | } | |
1367 | else | |
1368 | { | |
1369 | UnicodeString bogus1("Bogus comparison string \""); | |
1370 | UnicodeString bogus2("\""); | |
1371 | errln(bogus1 + comparison + bogus2); | |
1372 | } | |
1373 | ||
1374 | Collator::EComparisonResult compareResult = c.compare(source, target); | |
1375 | ||
1376 | CollationKey sourceKey, targetKey; | |
1377 | UErrorCode status = U_ZERO_ERROR; | |
1378 | ||
1379 | c.getCollationKey(source, sourceKey, status); | |
1380 | ||
1381 | if (U_FAILURE(status)) | |
1382 | { | |
1383 | errln("Couldn't get collationKey for source"); | |
1384 | continue; | |
1385 | } | |
1386 | ||
1387 | c.getCollationKey(target, targetKey, status); | |
1388 | ||
1389 | if (U_FAILURE(status)) | |
1390 | { | |
1391 | errln("Couldn't get collationKey for target"); | |
1392 | continue; | |
1393 | } | |
1394 | ||
1395 | Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); | |
1396 | ||
1397 | reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult ); | |
1398 | ||
1399 | } | |
1400 | } | |
1401 | ||
1402 | void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) | |
1403 | { | |
1404 | int32_t c1, c2, count = 0; | |
1405 | UErrorCode status = U_ZERO_ERROR; | |
1406 | ||
1407 | do | |
1408 | { | |
1409 | c1 = i1.next(status); | |
1410 | c2 = i2.next(status); | |
1411 | ||
1412 | if (c1 != c2) | |
1413 | { | |
1414 | UnicodeString msg, msg1(" "); | |
729e4ab9 | 1415 | |
b75a7d8f A |
1416 | msg += msg1 + count; |
1417 | msg += ": strength(0x"; | |
1418 | appendHex(c1, 8, msg); | |
1419 | msg += ") != strength(0x"; | |
1420 | appendHex(c2, 8, msg); | |
1421 | msg += ")"; | |
1422 | ||
1423 | errln(msg); | |
1424 | break; | |
1425 | } | |
1426 | ||
1427 | count += 1; | |
1428 | } | |
1429 | while (c1 != CollationElementIterator::NULLORDER); | |
1430 | } | |
1431 | ||
1432 | void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */) | |
1433 | { | |
1434 | if (exec) | |
1435 | { | |
1436 | logln("Collation Regression Tests: "); | |
1437 | } | |
1438 | ||
57a6839d A |
1439 | if(en_us == NULL) { |
1440 | dataerrln("Class collator not instantiated"); | |
1441 | name = ""; | |
1442 | return; | |
b75a7d8f | 1443 | } |
57a6839d A |
1444 | TESTCASE_AUTO_BEGIN; |
1445 | TESTCASE_AUTO(Test4048446); | |
1446 | TESTCASE_AUTO(Test4051866); | |
1447 | TESTCASE_AUTO(Test4053636); | |
1448 | TESTCASE_AUTO(Test4054238); | |
1449 | TESTCASE_AUTO(Test4054734); | |
1450 | TESTCASE_AUTO(Test4054736); | |
1451 | TESTCASE_AUTO(Test4058613); | |
1452 | TESTCASE_AUTO(Test4059820); | |
1453 | TESTCASE_AUTO(Test4060154); | |
1454 | TESTCASE_AUTO(Test4062418); | |
1455 | TESTCASE_AUTO(Test4065540); | |
1456 | TESTCASE_AUTO(Test4066189); | |
1457 | TESTCASE_AUTO(Test4066696); | |
1458 | TESTCASE_AUTO(Test4076676); | |
1459 | TESTCASE_AUTO(Test4078588); | |
1460 | TESTCASE_AUTO(Test4079231); | |
1461 | TESTCASE_AUTO(Test4081866); | |
1462 | TESTCASE_AUTO(Test4087241); | |
1463 | TESTCASE_AUTO(Test4087243); | |
1464 | TESTCASE_AUTO(Test4092260); | |
1465 | TESTCASE_AUTO(Test4095316); | |
1466 | TESTCASE_AUTO(Test4101940); | |
1467 | TESTCASE_AUTO(Test4103436); | |
1468 | TESTCASE_AUTO(Test4114076); | |
1469 | TESTCASE_AUTO(Test4114077); | |
1470 | TESTCASE_AUTO(Test4124632); | |
1471 | TESTCASE_AUTO(Test4132736); | |
1472 | TESTCASE_AUTO(Test4133509); | |
1473 | TESTCASE_AUTO(Test4139572); | |
1474 | TESTCASE_AUTO(Test4141640); | |
1475 | TESTCASE_AUTO(Test4146160); | |
1476 | TESTCASE_AUTO(Test4179216); | |
1477 | TESTCASE_AUTO(TestT7189); | |
1478 | TESTCASE_AUTO(TestCaseFirstCompression); | |
1479 | TESTCASE_AUTO(TestTrailingComment); | |
1480 | TESTCASE_AUTO(TestBeforeWithTooStrongAfter); | |
1481 | TESTCASE_AUTO_END; | |
b75a7d8f A |
1482 | } |
1483 | ||
1484 | #endif /* #if !UCONFIG_NO_COLLATION */ |