]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /******************************************************************** |
729e4ab9 | 2 | * COPYRIGHT: |
4388f060 | 3 | * Copyright (c) 1997-2011, International Business Machines Corporation and |
b75a7d8f A |
4 | * others. All Rights Reserved. |
5 | ********************************************************************/ | |
6 | ||
7 | #include "unicode/utypes.h" | |
8 | ||
9 | #if !UCONFIG_NO_COLLATION | |
10 | ||
11 | #include "unicode/coll.h" | |
12 | #include "unicode/tblcoll.h" | |
13 | #include "unicode/unistr.h" | |
14 | #include "unicode/sortkey.h" | |
15 | #include "regcoll.h" | |
16 | #include "sfwdchit.h" | |
729e4ab9 A |
17 | #include "testutil.h" |
18 | #include "cmemory.h" | |
b75a7d8f A |
19 | |
20 | #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0])) | |
21 | ||
22 | CollationRegressionTest::CollationRegressionTest() | |
23 | { | |
24 | UErrorCode status = U_ZERO_ERROR; | |
25 | ||
26 | en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); | |
27 | if(U_FAILURE(status)) { | |
28 | delete en_us; | |
29 | en_us = 0; | |
729e4ab9 | 30 | errcheckln(status, "Collator creation failed with %s", u_errorName(status)); |
b75a7d8f A |
31 | return; |
32 | } | |
33 | } | |
34 | ||
35 | CollationRegressionTest::~CollationRegressionTest() | |
36 | { | |
37 | delete en_us; | |
38 | } | |
39 | ||
40 | ||
41 | // @bug 4048446 | |
42 | // | |
43 | // CollationElementIterator.reset() doesn't work | |
44 | // | |
45 | void CollationRegressionTest::Test4048446(/* char* par */) | |
46 | { | |
47 | const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; | |
48 | const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; | |
49 | CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); | |
50 | CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); | |
51 | UErrorCode status = U_ZERO_ERROR; | |
52 | ||
53 | if (i1 == NULL|| i2 == NULL) | |
54 | { | |
55 | errln("Could not create CollationElementIterator's"); | |
56 | delete i1; | |
57 | delete i2; | |
58 | return; | |
59 | } | |
60 | ||
61 | while (i1->next(status) != CollationElementIterator::NULLORDER) | |
62 | { | |
63 | if (U_FAILURE(status)) | |
64 | { | |
65 | errln("error calling next()"); | |
66 | ||
67 | delete i1; | |
68 | delete i2; | |
69 | return; | |
70 | } | |
71 | } | |
72 | ||
73 | i1->reset(); | |
74 | ||
75 | assertEqual(*i1, *i2); | |
76 | ||
77 | delete i1; | |
78 | delete i2; | |
79 | } | |
80 | ||
81 | // @bug 4051866 | |
82 | // | |
83 | // Collator -> rules -> Collator round-trip broken for expanding characters | |
84 | // | |
85 | void CollationRegressionTest::Test4051866(/* char* par */) | |
86 | { | |
87 | /* | |
88 | RuleBasedCollator c1 = new RuleBasedCollator("< o " | |
89 | +"& oe ,o\u3080" | |
90 | +"& oe ,\u1530 ,O" | |
91 | +"& OE ,O\u3080" | |
92 | +"& OE ,\u1520" | |
93 | +"< p ,P"); | |
94 | */ | |
95 | ||
96 | UnicodeString rules; | |
97 | UErrorCode status = U_ZERO_ERROR; | |
98 | ||
99 | rules += "< o "; | |
100 | rules += "& oe ,o"; | |
101 | rules += (UChar)0x3080; | |
102 | rules += "& oe ,"; | |
103 | rules += (UChar)0x1530; | |
104 | rules += " ,O"; | |
105 | rules += "& OE ,O"; | |
106 | rules += (UChar)0x3080; | |
107 | rules += "& OE ,"; | |
108 | rules += (UChar)0x1520; | |
109 | rules += "< p ,P"; | |
110 | ||
111 | // Build a collator containing expanding characters | |
112 | RuleBasedCollator *c1 = new RuleBasedCollator(rules, status); | |
113 | ||
114 | // Build another using the rules from the first | |
115 | RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status); | |
116 | ||
117 | // Make sure they're the same | |
118 | if (!(c1->getRules() == c2->getRules())) | |
119 | { | |
120 | errln("Rules are not equal"); | |
121 | } | |
122 | ||
123 | delete c2; | |
124 | delete c1; | |
125 | } | |
126 | ||
127 | // @bug 4053636 | |
128 | // | |
129 | // Collator thinks "black-bird" == "black" | |
130 | // | |
131 | void CollationRegressionTest::Test4053636(/* char* par */) | |
132 | { | |
133 | if (en_us->equals("black_bird", "black")) | |
134 | { | |
135 | errln("black-bird == black"); | |
136 | } | |
137 | } | |
138 | ||
139 | // @bug 4054238 | |
140 | // | |
141 | // CollationElementIterator will not work correctly if the associated | |
142 | // Collator object's mode is changed | |
143 | // | |
144 | void CollationRegressionTest::Test4054238(/* char* par */) | |
145 | { | |
146 | const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; | |
147 | const UnicodeString test3(chars3); | |
148 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
149 | ||
150 | // NOTE: The Java code uses en_us to create the CollationElementIterators | |
151 | // but I'm pretty sure that's wrong, so I've changed this to use c. | |
152 | UErrorCode status = U_ZERO_ERROR; | |
153 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
154 | CollationElementIterator *i1 = c->createCollationElementIterator(test3); | |
155 | delete i1; | |
156 | delete c; | |
157 | } | |
158 | ||
159 | // @bug 4054734 | |
160 | // | |
161 | // Collator::IDENTICAL documented but not implemented | |
162 | // | |
163 | void CollationRegressionTest::Test4054734(/* char* par */) | |
164 | { | |
165 | /* | |
166 | Here's the original Java: | |
167 | ||
168 | String[] decomp = { | |
169 | "\u0001", "<", "\u0002", | |
170 | "\u0001", "=", "\u0001", | |
171 | "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise | |
172 | "\u00C0", "=", "A\u0300" // Decomp should make these equal | |
173 | }; | |
729e4ab9 | 174 | |
b75a7d8f A |
175 | String[] nodecomp = { |
176 | "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave | |
177 | }; | |
178 | */ | |
179 | ||
180 | static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
181 | { | |
182 | {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, | |
183 | {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, | |
184 | {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, | |
185 | {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} | |
186 | }; | |
187 | ||
729e4ab9 | 188 | |
b75a7d8f A |
189 | UErrorCode status = U_ZERO_ERROR; |
190 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
191 | ||
192 | c->setStrength(Collator::IDENTICAL); | |
193 | ||
194 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
195 | compareArray(*c, decomp, ARRAY_LENGTH(decomp)); | |
196 | ||
197 | delete c; | |
198 | } | |
199 | ||
200 | // @bug 4054736 | |
201 | // | |
202 | // Full Decomposition mode not implemented | |
203 | // | |
204 | void CollationRegressionTest::Test4054736(/* char* par */) | |
205 | { | |
206 | UErrorCode status = U_ZERO_ERROR; | |
207 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
208 | ||
209 | c->setStrength(Collator::SECONDARY); | |
210 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
211 | ||
212 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
213 | { | |
214 | {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed | |
215 | }; | |
216 | ||
217 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
218 | ||
219 | delete c; | |
220 | } | |
221 | ||
222 | // @bug 4058613 | |
223 | // | |
729e4ab9 | 224 | // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean |
b75a7d8f A |
225 | // |
226 | void CollationRegressionTest::Test4058613(/* char* par */) | |
227 | { | |
228 | // Creating a default collator doesn't work when Korean is the default | |
229 | // locale | |
729e4ab9 | 230 | |
b75a7d8f A |
231 | Locale oldDefault = Locale::getDefault(); |
232 | UErrorCode status = U_ZERO_ERROR; | |
729e4ab9 | 233 | |
b75a7d8f A |
234 | Locale::setDefault(Locale::getKorean(), status); |
235 | ||
236 | if (U_FAILURE(status)) | |
237 | { | |
238 | errln("Could not set default locale to Locale::KOREAN"); | |
239 | return; | |
240 | } | |
241 | ||
242 | Collator *c = NULL; | |
729e4ab9 | 243 | |
b75a7d8f A |
244 | c = Collator::createInstance("en_US", status); |
245 | ||
246 | if (c == NULL || U_FAILURE(status)) | |
247 | { | |
248 | errln("Could not create a Korean collator"); | |
249 | Locale::setDefault(oldDefault, status); | |
250 | delete c; | |
251 | return; | |
252 | } | |
729e4ab9 | 253 | |
b75a7d8f A |
254 | // Since the fix to this bug was to turn off decomposition for Korean collators, |
255 | // ensure that's what we got | |
256 | if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) | |
257 | { | |
258 | errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); | |
259 | } | |
260 | ||
261 | delete c; | |
262 | ||
263 | Locale::setDefault(oldDefault, status); | |
264 | } | |
265 | ||
266 | // @bug 4059820 | |
267 | // | |
268 | // RuleBasedCollator.getRules does not return the exact pattern as input | |
269 | // for expanding character sequences | |
270 | // | |
271 | void CollationRegressionTest::Test4059820(/* char* par */) | |
272 | { | |
273 | UErrorCode status = U_ZERO_ERROR; | |
274 | ||
275 | RuleBasedCollator *c = NULL; | |
276 | UnicodeString rules = "< a < b , c/a < d < z"; | |
729e4ab9 | 277 | |
b75a7d8f A |
278 | c = new RuleBasedCollator(rules, status); |
279 | ||
280 | if (c == NULL || U_FAILURE(status)) | |
281 | { | |
282 | errln("Failure building a collator."); | |
283 | delete c; | |
284 | return; | |
285 | } | |
286 | ||
287 | if ( c->getRules().indexOf("c/a") == -1) | |
288 | { | |
289 | errln("returned rules do not contain 'c/a'"); | |
290 | } | |
291 | ||
292 | delete c; | |
293 | } | |
294 | ||
295 | // @bug 4060154 | |
296 | // | |
297 | // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" | |
298 | // | |
299 | void CollationRegressionTest::Test4060154(/* char* par */) | |
300 | { | |
301 | UErrorCode status = U_ZERO_ERROR; | |
302 | UnicodeString rules; | |
303 | ||
304 | rules += "< g, G < h, H < i, I < j, J"; | |
305 | rules += " & H < "; | |
306 | rules += (UChar)0x0131; | |
307 | rules += ", "; | |
308 | rules += (UChar)0x0130; | |
309 | rules += ", i, I"; | |
310 | ||
311 | RuleBasedCollator *c = NULL; | |
729e4ab9 | 312 | |
b75a7d8f A |
313 | c = new RuleBasedCollator(rules, status); |
314 | ||
315 | if (c == NULL || U_FAILURE(status)) | |
316 | { | |
317 | errln("failure building collator."); | |
318 | delete c; | |
319 | return; | |
320 | } | |
321 | ||
322 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
323 | ||
324 | /* | |
325 | String[] tertiary = { | |
326 | "A", "<", "B", | |
327 | "H", "<", "\u0131", | |
328 | "H", "<", "I", | |
329 | "\u0131", "<", "\u0130", | |
330 | "\u0130", "<", "i", | |
331 | "\u0130", ">", "H", | |
332 | }; | |
333 | */ | |
334 | ||
335 | static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
336 | { | |
337 | {0x41, 0}, {0x3c, 0}, {0x42, 0}, | |
338 | {0x48, 0}, {0x3c, 0}, {0x0131, 0}, | |
339 | {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
340 | {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, | |
341 | {0x0130, 0}, {0x3c, 0}, {0x69, 0}, | |
342 | {0x0130, 0}, {0x3e, 0}, {0x48, 0} | |
343 | }; | |
344 | ||
345 | c->setStrength(Collator::TERTIARY); | |
346 | compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); | |
347 | ||
348 | /* | |
349 | String[] secondary = { | |
350 | "H", "<", "I", | |
351 | "\u0131", "=", "\u0130", | |
352 | }; | |
353 | */ | |
354 | static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
355 | { | |
356 | {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
357 | {0x0131, 0}, {0x3d, 0}, {0x0130, 0} | |
358 | }; | |
359 | ||
360 | c->setStrength(Collator::PRIMARY); | |
361 | compareArray(*c, secondary, ARRAY_LENGTH(secondary)); | |
362 | ||
363 | delete c; | |
73c04bcf | 364 | } |
b75a7d8f A |
365 | |
366 | // @bug 4062418 | |
367 | // | |
368 | // Secondary/Tertiary comparison incorrect in French Secondary | |
369 | // | |
370 | void CollationRegressionTest::Test4062418(/* char* par */) | |
371 | { | |
372 | UErrorCode status = U_ZERO_ERROR; | |
373 | ||
374 | RuleBasedCollator *c = NULL; | |
729e4ab9 A |
375 | |
376 | c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status); | |
b75a7d8f A |
377 | |
378 | if (c == NULL || U_FAILURE(status)) | |
379 | { | |
729e4ab9 | 380 | errln("Failed to create collator for Locale::getCanadaFrench()"); |
b75a7d8f A |
381 | delete c; |
382 | return; | |
383 | } | |
384 | ||
385 | c->setStrength(Collator::SECONDARY); | |
386 | ||
387 | /* | |
388 | String[] tests = { | |
389 | "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater | |
390 | }; | |
391 | */ | |
392 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
393 | { | |
394 | {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0} | |
395 | }; | |
396 | ||
397 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
398 | ||
399 | delete c; | |
400 | } | |
401 | ||
402 | // @bug 4065540 | |
403 | // | |
404 | // Collator::compare() method broken if either string contains spaces | |
405 | // | |
406 | void CollationRegressionTest::Test4065540(/* char* par */) | |
407 | { | |
408 | if (en_us->compare("abcd e", "abcd f") == 0) | |
409 | { | |
410 | errln("'abcd e' == 'abcd f'"); | |
411 | } | |
412 | } | |
413 | ||
414 | // @bug 4066189 | |
415 | // | |
416 | // Unicode characters need to be recursively decomposed to get the | |
417 | // correct result. For example, | |
418 | // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. | |
419 | // | |
420 | void CollationRegressionTest::Test4066189(/* char* par */) | |
421 | { | |
422 | static const UChar chars1[] = {0x1EB1, 0}; | |
423 | static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; | |
424 | const UnicodeString test1(chars1); | |
425 | const UnicodeString test2(chars2); | |
426 | UErrorCode status = U_ZERO_ERROR; | |
427 | ||
428 | // NOTE: The java code used en_us to create the | |
429 | // CollationElementIterator's. I'm pretty sure that | |
430 | // was wrong, so I've change the code to use c1 and c2 | |
431 | RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); | |
432 | c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
433 | CollationElementIterator *i1 = c1->createCollationElementIterator(test1); | |
434 | ||
435 | RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); | |
436 | c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
437 | CollationElementIterator *i2 = c2->createCollationElementIterator(test2); | |
438 | ||
439 | assertEqual(*i1, *i2); | |
440 | ||
441 | delete i2; | |
442 | delete c2; | |
443 | delete i1; | |
444 | delete c1; | |
445 | } | |
446 | ||
447 | // @bug 4066696 | |
448 | // | |
449 | // French secondary collation checking at the end of compare iteration fails | |
450 | // | |
451 | void CollationRegressionTest::Test4066696(/* char* par */) | |
452 | { | |
453 | UErrorCode status = U_ZERO_ERROR; | |
454 | RuleBasedCollator *c = NULL; | |
729e4ab9 A |
455 | |
456 | c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status); | |
b75a7d8f A |
457 | |
458 | if (c == NULL || U_FAILURE(status)) | |
459 | { | |
729e4ab9 | 460 | errln("Failure creating collator for Locale::getCanadaFrench()"); |
b75a7d8f A |
461 | delete c; |
462 | return; | |
463 | } | |
464 | ||
465 | c->setStrength(Collator::SECONDARY); | |
466 | ||
467 | /* | |
468 | String[] tests = { | |
469 | "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute | |
470 | }; | |
471 | ||
472 | should be: | |
473 | ||
474 | String[] tests = { | |
475 | "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute | |
476 | }; | |
477 | ||
478 | */ | |
479 | ||
480 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
481 | { | |
482 | {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} | |
483 | }; | |
484 | ||
485 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
486 | ||
487 | delete c; | |
488 | } | |
489 | ||
490 | // @bug 4076676 | |
491 | // | |
492 | // Bad canonicalization of same-class combining characters | |
493 | // | |
494 | void CollationRegressionTest::Test4076676(/* char* par */) | |
495 | { | |
496 | // These combining characters are all in the same class, so they should not | |
497 | // be reordered, and they should compare as unequal. | |
498 | static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; | |
499 | static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; | |
500 | ||
501 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
502 | c->setStrength(Collator::TERTIARY); | |
503 | ||
504 | if (c->compare(s1,s2) == 0) | |
505 | { | |
506 | errln("Same-class combining chars were reordered"); | |
507 | } | |
508 | ||
509 | delete c; | |
510 | } | |
511 | ||
512 | // @bug 4079231 | |
513 | // | |
514 | // RuleBasedCollator::operator==(NULL) throws NullPointerException | |
515 | // | |
516 | void CollationRegressionTest::Test4079231(/* char* par */) | |
517 | { | |
518 | // I don't think there's any way to write this test | |
519 | // in C++. The following is equivalent to the Java, | |
520 | // but doesn't compile 'cause NULL can't be converted | |
521 | // to Collator& | |
522 | // | |
523 | // if (en_us->operator==(NULL)) | |
524 | // { | |
525 | // errln("en_us->operator==(NULL) returned TRUE"); | |
526 | // } | |
527 | ||
528 | /* | |
529 | try { | |
530 | if (en_us->equals(null)) { | |
531 | errln("en_us->equals(null) returned true"); | |
532 | } | |
533 | } | |
534 | catch (Exception e) { | |
535 | errln("en_us->equals(null) threw " + e.toString()); | |
536 | } | |
537 | */ | |
538 | } | |
539 | ||
540 | // @bug 4078588 | |
541 | // | |
542 | // RuleBasedCollator breaks on "< a < bb" rule | |
543 | // | |
544 | void CollationRegressionTest::Test4078588(/* char *par */) | |
545 | { | |
546 | UErrorCode status = U_ZERO_ERROR; | |
547 | RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status); | |
548 | ||
549 | if (rbc == NULL || U_FAILURE(status)) | |
550 | { | |
551 | errln("Failed to create RuleBasedCollator."); | |
552 | delete rbc; | |
553 | return; | |
554 | } | |
555 | ||
556 | Collator::EComparisonResult result = rbc->compare("a","bb"); | |
557 | ||
558 | if (result != Collator::LESS) | |
559 | { | |
560 | errln((UnicodeString)"Compare(a,bb) returned " + (int)result | |
561 | + (UnicodeString)"; expected -1"); | |
562 | } | |
563 | ||
564 | delete rbc; | |
565 | } | |
566 | ||
567 | // @bug 4081866 | |
568 | // | |
569 | // Combining characters in different classes not reordered properly. | |
570 | // | |
571 | void CollationRegressionTest::Test4081866(/* char* par */) | |
572 | { | |
573 | // These combining characters are all in different classes, | |
574 | // so they should be reordered and the strings should compare as equal. | |
575 | static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; | |
576 | static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; | |
577 | ||
578 | UErrorCode status = U_ZERO_ERROR; | |
579 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
580 | c->setStrength(Collator::TERTIARY); | |
729e4ab9 | 581 | |
b75a7d8f A |
582 | // Now that the default collators are set to NO_DECOMPOSITION |
583 | // (as a result of fixing bug 4114077), we must set it explicitly | |
584 | // when we're testing reordering behavior. -- lwerner, 5/5/98 | |
585 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
586 | ||
587 | if (c->compare(s1,s2) != 0) | |
588 | { | |
589 | errln("Combining chars were not reordered"); | |
590 | } | |
591 | ||
592 | delete c; | |
593 | } | |
594 | ||
595 | // @bug 4087241 | |
596 | // | |
597 | // string comparison errors in Scandinavian collators | |
598 | // | |
599 | void CollationRegressionTest::Test4087241(/* char* par */) | |
600 | { | |
601 | UErrorCode status = U_ZERO_ERROR; | |
602 | Locale da_DK("da", "DK"); | |
603 | RuleBasedCollator *c = NULL; | |
729e4ab9 | 604 | |
b75a7d8f A |
605 | c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); |
606 | ||
607 | if (c == NULL || U_FAILURE(status)) | |
608 | { | |
609 | errln("Failed to create collator for da_DK locale"); | |
610 | delete c; | |
611 | return; | |
612 | } | |
613 | ||
614 | c->setStrength(Collator::SECONDARY); | |
615 | ||
616 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
617 | { | |
618 | {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae | |
619 | {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-unlaut < a-ring | |
620 | {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut | |
621 | }; | |
622 | ||
623 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
624 | ||
625 | delete c; | |
626 | } | |
627 | ||
628 | // @bug 4087243 | |
629 | // | |
630 | // CollationKey takes ignorable strings into account when it shouldn't | |
631 | // | |
632 | void CollationRegressionTest::Test4087243(/* char* par */) | |
633 | { | |
634 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
635 | c->setStrength(Collator::TERTIARY); | |
636 | ||
637 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
638 | { | |
639 | {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A | |
640 | }; | |
641 | ||
642 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
643 | ||
644 | delete c; | |
645 | } | |
646 | ||
647 | // @bug 4092260 | |
648 | // | |
649 | // Mu/micro conflict | |
650 | // Micro symbol and greek lowercase letter Mu should sort identically | |
651 | // | |
652 | void CollationRegressionTest::Test4092260(/* char* par */) | |
653 | { | |
654 | UErrorCode status = U_ZERO_ERROR; | |
655 | Locale el("el", ""); | |
656 | Collator *c = NULL; | |
729e4ab9 | 657 | |
b75a7d8f A |
658 | c = Collator::createInstance(el, status); |
659 | ||
660 | if (c == NULL || U_FAILURE(status)) | |
661 | { | |
662 | errln("Failed to create collator for el locale."); | |
663 | delete c; | |
664 | return; | |
665 | } | |
666 | ||
667 | // These now have tertiary differences in UCA | |
668 | c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
669 | ||
670 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
671 | { | |
672 | {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} | |
673 | }; | |
674 | ||
675 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
676 | ||
677 | delete c; | |
678 | } | |
679 | ||
680 | // @bug 4095316 | |
681 | // | |
682 | void CollationRegressionTest::Test4095316(/* char* par */) | |
683 | { | |
684 | UErrorCode status = U_ZERO_ERROR; | |
685 | Locale el_GR("el", "GR"); | |
686 | Collator *c = Collator::createInstance(el_GR, status); | |
687 | ||
688 | if (c == NULL || U_FAILURE(status)) | |
689 | { | |
690 | errln("Failed to create collator for el_GR locale"); | |
691 | delete c; | |
692 | return; | |
693 | } | |
694 | // These now have tertiary differences in UCA | |
695 | //c->setStrength(Collator::TERTIARY); | |
696 | c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
697 | ||
698 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
699 | { | |
700 | {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} | |
701 | }; | |
702 | ||
703 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
704 | ||
705 | delete c; | |
706 | } | |
707 | ||
708 | // @bug 4101940 | |
709 | // | |
710 | void CollationRegressionTest::Test4101940(/* char* par */) | |
711 | { | |
712 | UErrorCode status = U_ZERO_ERROR; | |
713 | RuleBasedCollator *c = NULL; | |
714 | UnicodeString rules = "< a < b"; | |
715 | UnicodeString nothing = ""; | |
729e4ab9 | 716 | |
b75a7d8f A |
717 | c = new RuleBasedCollator(rules, status); |
718 | ||
719 | if (c == NULL || U_FAILURE(status)) | |
720 | { | |
721 | errln("Failed to create RuleBasedCollator"); | |
722 | delete c; | |
723 | return; | |
724 | } | |
725 | ||
726 | CollationElementIterator *i = c->createCollationElementIterator(nothing); | |
727 | i->reset(); | |
728 | ||
729 | if (i->next(status) != CollationElementIterator::NULLORDER) | |
730 | { | |
731 | errln("next did not return NULLORDER"); | |
732 | } | |
733 | ||
734 | delete i; | |
735 | delete c; | |
736 | } | |
737 | ||
738 | // @bug 4103436 | |
739 | // | |
740 | // Collator::compare not handling spaces properly | |
741 | // | |
742 | void CollationRegressionTest::Test4103436(/* char* par */) | |
743 | { | |
744 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
745 | c->setStrength(Collator::TERTIARY); | |
746 | ||
747 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
748 | { | |
749 | {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, | |
750 | {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0} | |
751 | }; | |
752 | ||
753 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
754 | ||
755 | delete c; | |
756 | } | |
757 | ||
758 | // @bug 4114076 | |
759 | // | |
760 | // Collation not Unicode conformant with Hangul syllables | |
761 | // | |
762 | void CollationRegressionTest::Test4114076(/* char* par */) | |
763 | { | |
764 | UErrorCode status = U_ZERO_ERROR; | |
765 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
766 | c->setStrength(Collator::TERTIARY); | |
767 | ||
768 | // | |
769 | // With Canonical decomposition, Hangul syllables should get decomposed | |
770 | // into Jamo, but Jamo characters should not be decomposed into | |
771 | // conjoining Jamo | |
772 | // | |
773 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
774 | { | |
775 | {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} | |
776 | }; | |
777 | ||
778 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
779 | compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
780 | ||
781 | // From UTR #15: | |
782 | // *In earlier versions of Unicode, jamo characters like ksf | |
729e4ab9 | 783 | // had compatibility mappings to kf + sf. These mappings were |
b75a7d8f A |
784 | // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.) |
785 | // That is, the following test is obsolete as of 2.1.9 | |
786 | ||
787 | //obsolete- // With Full decomposition, it should go all the way down to | |
788 | //obsolete- // conjoining Jamo characters. | |
789 | //obsolete- // | |
790 | //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
791 | //obsolete- { | |
792 | //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0} | |
793 | //obsolete- }; | |
794 | //obsolete- | |
795 | //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); | |
796 | //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); | |
797 | ||
798 | delete c; | |
799 | } | |
800 | ||
801 | ||
802 | // @bug 4124632 | |
803 | // | |
804 | // Collator::getCollationKey was hanging on certain character sequences | |
805 | // | |
806 | void CollationRegressionTest::Test4124632(/* char* par */) | |
807 | { | |
808 | UErrorCode status = U_ZERO_ERROR; | |
809 | Collator *coll = NULL; | |
729e4ab9 | 810 | |
b75a7d8f | 811 | coll = Collator::createInstance(Locale::getJapan(), status); |
729e4ab9 | 812 | |
b75a7d8f A |
813 | if (coll == NULL || U_FAILURE(status)) |
814 | { | |
815 | errln("Failed to create collator for Locale::JAPAN"); | |
816 | delete coll; | |
46f4442e | 817 | return; |
b75a7d8f A |
818 | } |
819 | ||
820 | static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; | |
821 | CollationKey key; | |
729e4ab9 | 822 | |
b75a7d8f A |
823 | coll->getCollationKey(test, key, status); |
824 | ||
825 | if (key.isBogus() || U_FAILURE(status)) | |
826 | { | |
827 | errln("CollationKey creation failed."); | |
828 | } | |
829 | ||
830 | delete coll; | |
831 | } | |
832 | ||
833 | // @bug 4132736 | |
834 | // | |
835 | // sort order of french words with multiple accents has errors | |
836 | // | |
837 | void CollationRegressionTest::Test4132736(/* char* par */) | |
838 | { | |
839 | UErrorCode status = U_ZERO_ERROR; | |
840 | ||
841 | Collator *c = NULL; | |
729e4ab9 A |
842 | |
843 | c = Collator::createInstance(Locale::getCanadaFrench(), status); | |
b75a7d8f | 844 | c->setStrength(Collator::TERTIARY); |
729e4ab9 | 845 | |
b75a7d8f A |
846 | if (c == NULL || U_FAILURE(status)) |
847 | { | |
729e4ab9 | 848 | errln("Failed to create a collator for Locale::getCanadaFrench()"); |
b75a7d8f | 849 | delete c; |
46f4442e | 850 | return; |
b75a7d8f A |
851 | } |
852 | ||
853 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
854 | { | |
855 | {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0}, | |
856 | {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} | |
857 | }; | |
858 | ||
859 | compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
860 | ||
861 | delete c; | |
862 | } | |
863 | ||
864 | // @bug 4133509 | |
865 | // | |
866 | // The sorting using java.text.CollationKey is not in the exact order | |
867 | // | |
868 | void CollationRegressionTest::Test4133509(/* char* par */) | |
869 | { | |
870 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
871 | { | |
872 | {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0}, | |
873 | {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, | |
874 | {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} | |
875 | }; | |
876 | ||
877 | compareArray(*en_us, test1, ARRAY_LENGTH(test1)); | |
878 | } | |
879 | ||
880 | // @bug 4114077 | |
881 | // | |
729e4ab9 | 882 | // Collation with decomposition off doesn't work for Europe |
b75a7d8f A |
883 | // |
884 | void CollationRegressionTest::Test4114077(/* char* par */) | |
885 | { | |
886 | // Ensure that we get the same results with decomposition off | |
887 | // as we do with it on.... | |
729e4ab9 | 888 | |
b75a7d8f A |
889 | UErrorCode status = U_ZERO_ERROR; |
890 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
891 | c->setStrength(Collator::TERTIARY); | |
729e4ab9 | 892 | |
b75a7d8f A |
893 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = |
894 | { | |
895 | {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent | |
896 | {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0}, | |
897 | {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, | |
898 | {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute | |
899 | // -> a, ring, acute | |
900 | {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal | |
901 | }; | |
902 | ||
903 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
904 | compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
905 | ||
906 | static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
907 | { | |
908 | {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal | |
909 | }; | |
910 | ||
911 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
912 | compareArray(*c, test2, ARRAY_LENGTH(test2)); | |
913 | ||
914 | delete c; | |
915 | } | |
916 | ||
917 | // @bug 4141640 | |
918 | // | |
729e4ab9 | 919 | // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) |
b75a7d8f A |
920 | // |
921 | void CollationRegressionTest::Test4141640(/* char* par */) | |
922 | { | |
923 | // | |
924 | // Rather than just creating a Swedish collator, we might as well | |
925 | // try to instantiate one for every locale available on the system | |
926 | // in order to prevent this sort of bug from cropping up in the future | |
927 | // | |
928 | UErrorCode status = U_ZERO_ERROR; | |
929 | int32_t i, localeCount; | |
930 | const Locale *locales = Locale::getAvailableLocales(localeCount); | |
729e4ab9 | 931 | |
b75a7d8f A |
932 | for (i = 0; i < localeCount; i += 1) |
933 | { | |
934 | Collator *c = NULL; | |
935 | ||
936 | status = U_ZERO_ERROR; | |
937 | c = Collator::createInstance(locales[i], status); | |
938 | ||
939 | if (c == NULL || U_FAILURE(status)) | |
940 | { | |
941 | UnicodeString msg, localeName; | |
942 | ||
943 | msg += "Could not create collator for locale "; | |
944 | msg += locales[i].getName(); | |
945 | ||
946 | errln(msg); | |
947 | } | |
948 | ||
949 | delete c; | |
950 | } | |
951 | } | |
952 | ||
953 | // @bug 4139572 | |
954 | // | |
729e4ab9 | 955 | // getCollationKey throws exception for spanish text |
b75a7d8f A |
956 | // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 |
957 | // | |
958 | void CollationRegressionTest::Test4139572(/* char* par */) | |
959 | { | |
960 | // | |
961 | // Code pasted straight from the bug report | |
962 | // (and then translated to C++ ;-) | |
963 | // | |
964 | // create spanish locale and collator | |
965 | UErrorCode status = U_ZERO_ERROR; | |
966 | Locale l("es", "es"); | |
967 | Collator *col = NULL; | |
729e4ab9 | 968 | |
b75a7d8f A |
969 | col = Collator::createInstance(l, status); |
970 | ||
971 | if (col == NULL || U_FAILURE(status)) | |
972 | { | |
973 | errln("Failed to create a collator for es_es locale."); | |
974 | delete col; | |
975 | return; | |
976 | } | |
977 | ||
978 | CollationKey key; | |
979 | ||
980 | // this spanish phrase kills it! | |
981 | col->getCollationKey("Nombre De Objeto", key, status); | |
982 | ||
983 | if (key.isBogus() || U_FAILURE(status)) | |
984 | { | |
985 | errln("Error creating CollationKey for \"Nombre De Ojbeto\""); | |
986 | } | |
987 | ||
988 | delete col; | |
989 | } | |
990 | /* HSYS : RuleBasedCollator::compare() performance enhancements | |
991 | compare() does not create CollationElementIterator() anymore.*/ | |
729e4ab9 | 992 | |
b75a7d8f A |
993 | class My4146160Collator : public RuleBasedCollator |
994 | { | |
995 | public: | |
996 | My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); | |
997 | ~My4146160Collator(); | |
998 | ||
999 | CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const; | |
1000 | ||
1001 | CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const; | |
729e4ab9 | 1002 | |
b75a7d8f A |
1003 | static int32_t count; |
1004 | }; | |
1005 | ||
1006 | int32_t My4146160Collator::count = 0; | |
1007 | ||
1008 | My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) | |
1009 | : RuleBasedCollator(rbc.getRules(), status) | |
1010 | { | |
1011 | } | |
1012 | ||
1013 | My4146160Collator::~My4146160Collator() | |
1014 | { | |
1015 | } | |
1016 | ||
1017 | CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const | |
1018 | { | |
1019 | count += 1; | |
1020 | return RuleBasedCollator::createCollationElementIterator(text); | |
1021 | } | |
1022 | ||
1023 | CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const | |
1024 | { | |
1025 | count += 1; | |
1026 | return RuleBasedCollator::createCollationElementIterator(text); | |
1027 | } | |
1028 | ||
1029 | // @bug 4146160 | |
1030 | // | |
1031 | // RuleBasedCollator doesn't use createCollationElementIterator internally | |
1032 | // | |
1033 | void CollationRegressionTest::Test4146160(/* char* par */) | |
1034 | { | |
1035 | #if 0 | |
1036 | // | |
1037 | // Use a custom collator class whose createCollationElementIterator | |
1038 | // methods increment a count.... | |
1039 | // | |
1040 | UErrorCode status = U_ZERO_ERROR; | |
1041 | CollationKey key; | |
1042 | ||
1043 | My4146160Collator::count = 0; | |
1044 | My4146160Collator *mc = NULL; | |
729e4ab9 | 1045 | |
b75a7d8f A |
1046 | mc = new My4146160Collator(*en_us, status); |
1047 | ||
1048 | if (mc == NULL || U_FAILURE(status)) | |
1049 | { | |
1050 | errln("Failed to create a My4146160Collator."); | |
1051 | delete mc; | |
1052 | return; | |
1053 | } | |
1054 | ||
1055 | mc->getCollationKey("1", key, status); | |
1056 | ||
1057 | if (key.isBogus() || U_FAILURE(status)) | |
1058 | { | |
1059 | errln("Failure to get a CollationKey from a My4146160Collator."); | |
1060 | delete mc; | |
1061 | return; | |
1062 | } | |
1063 | ||
1064 | if (My4146160Collator::count < 1) | |
1065 | { | |
1066 | errln("My4146160Collator::createCollationElementIterator not called for getCollationKey"); | |
1067 | } | |
1068 | ||
1069 | My4146160Collator::count = 0; | |
1070 | mc->compare("1", "2"); | |
1071 | ||
1072 | if (My4146160Collator::count < 1) | |
1073 | { | |
1074 | errln("My4146160Collator::createtCollationElementIterator not called for compare"); | |
1075 | } | |
1076 | ||
1077 | delete mc; | |
1078 | #endif | |
1079 | } | |
729e4ab9 A |
1080 | |
1081 | // Ticket 7189 | |
1082 | // | |
1083 | // nextSortKeyPart incorrect for EO_S1 collation | |
1084 | static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) { | |
1085 | UCharIterator uiter; | |
1086 | uint32_t state[2] = { 0, 0 }; | |
1087 | int32_t keyLen; | |
1088 | int32_t count = 8; | |
1089 | ||
1090 | uiter_setString(&uiter, text, len); | |
1091 | keyLen = 0; | |
1092 | while (TRUE) { | |
1093 | int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status); | |
1094 | if (U_FAILURE(status)) { | |
1095 | return -1; | |
1096 | } | |
1097 | if (keyPartLen == 0) { | |
1098 | break; | |
1099 | } | |
1100 | keyLen += keyPartLen; | |
1101 | } | |
1102 | return keyLen; | |
1103 | } | |
1104 | ||
1105 | void CollationRegressionTest::TestT7189() { | |
1106 | UErrorCode status = U_ZERO_ERROR; | |
1107 | UCollator *coll; | |
1108 | uint32_t i; | |
1109 | ||
1110 | static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
1111 | // "Achter De Hoven" | |
1112 | { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, | |
1113 | // "ABC" | |
1114 | { 0x41, 0x42, 0x43, 0x00 }, | |
1115 | // "HELLO world!" | |
1116 | { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } | |
1117 | }; | |
1118 | ||
1119 | static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
1120 | // "Achter de Hoven" | |
1121 | { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, | |
1122 | // "abc" | |
1123 | { 0x61, 0x62, 0x63, 0x00 }, | |
1124 | // "hello world!" | |
1125 | { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } | |
1126 | }; | |
1127 | ||
1128 | // Open the collator | |
1129 | coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); | |
1130 | if (U_FAILURE(status)) { | |
1131 | errln("Failed to create a collator for short string EO_S1"); | |
1132 | return; | |
1133 | } | |
1134 | ||
1135 | for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) { | |
1136 | uint8_t key1[100], key2[100]; | |
1137 | int32_t len1, len2; | |
1138 | ||
1139 | len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status); | |
1140 | if (U_FAILURE(status)) { | |
1141 | errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]); | |
1142 | break; | |
1143 | } | |
1144 | len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status); | |
1145 | if (U_FAILURE(status)) { | |
1146 | errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]); | |
1147 | break; | |
1148 | } | |
1149 | ||
1150 | if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { | |
1151 | errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1)); | |
1152 | } else { | |
1153 | logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : " | |
1154 | + TestUtility::hex(key2, len2)); | |
1155 | } | |
1156 | } | |
1157 | ucol_close(coll); | |
1158 | } | |
1159 | ||
4388f060 A |
1160 | void CollationRegressionTest::TestCaseFirstCompression() { |
1161 | RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone(); | |
1162 | UErrorCode status = U_ZERO_ERROR; | |
1163 | ||
1164 | // default | |
1165 | caseFirstCompressionSub(col, "default"); | |
1166 | ||
1167 | // Upper first | |
1168 | col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); | |
1169 | if (U_FAILURE(status)) { | |
1170 | errln("Failed to set UCOL_UPPER_FIRST"); | |
1171 | return; | |
1172 | } | |
1173 | caseFirstCompressionSub(col, "upper first"); | |
1174 | ||
1175 | // Lower first | |
1176 | col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); | |
1177 | if (U_FAILURE(status)) { | |
1178 | errln("Failed to set UCOL_LOWER_FIRST"); | |
1179 | return; | |
1180 | } | |
1181 | caseFirstCompressionSub(col, "lower first"); | |
1182 | ||
1183 | delete col; | |
1184 | } | |
1185 | ||
1186 | void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) { | |
1187 | const int32_t maxLength = 50; | |
1188 | ||
1189 | UChar str1[maxLength]; | |
1190 | UChar str2[maxLength]; | |
1191 | ||
1192 | CollationKey key1, key2; | |
1193 | ||
1194 | for (int32_t len = 1; len <= maxLength; len++) { | |
1195 | int32_t i = 0; | |
1196 | for (; i < len - 1; i++) { | |
1197 | str1[i] = str2[i] = (UChar)0x61; // 'a' | |
1198 | } | |
1199 | str1[i] = (UChar)0x41; // 'A' | |
1200 | str2[i] = (UChar)0x61; // 'a' | |
1201 | ||
1202 | UErrorCode status = U_ZERO_ERROR; | |
1203 | col->getCollationKey(str1, len, key1, status); | |
1204 | col->getCollationKey(str2, len, key2, status); | |
1205 | ||
1206 | UCollationResult cmpKey = key1.compareTo(key2, status); | |
1207 | UCollationResult cmpCol = col->compare(str1, len, str2, len, status); | |
1208 | ||
1209 | if (U_FAILURE(status)) { | |
1210 | errln("Error in caseFirstCompressionSub"); | |
1211 | } else if (cmpKey != cmpCol) { | |
1212 | errln((UnicodeString)"Inconsistent comparison(" + opt | |
1213 | + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len) | |
1214 | + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol); | |
1215 | } | |
1216 | } | |
1217 | } | |
1218 | ||
1219 | ||
1220 | ||
b75a7d8f A |
1221 | void CollationRegressionTest::compareArray(Collator &c, |
1222 | const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN], | |
1223 | int32_t testCount) | |
1224 | { | |
1225 | int32_t i; | |
1226 | Collator::EComparisonResult expectedResult = Collator::EQUAL; | |
1227 | ||
1228 | for (i = 0; i < testCount; i += 3) | |
1229 | { | |
1230 | UnicodeString source(tests[i]); | |
1231 | UnicodeString comparison(tests[i + 1]); | |
1232 | UnicodeString target(tests[i + 2]); | |
1233 | ||
1234 | if (comparison == "<") | |
1235 | { | |
1236 | expectedResult = Collator::LESS; | |
1237 | } | |
1238 | else if (comparison == ">") | |
1239 | { | |
1240 | expectedResult = Collator::GREATER; | |
1241 | } | |
1242 | else if (comparison == "=") | |
1243 | { | |
1244 | expectedResult = Collator::EQUAL; | |
1245 | } | |
1246 | else | |
1247 | { | |
1248 | UnicodeString bogus1("Bogus comparison string \""); | |
1249 | UnicodeString bogus2("\""); | |
1250 | errln(bogus1 + comparison + bogus2); | |
1251 | } | |
1252 | ||
1253 | Collator::EComparisonResult compareResult = c.compare(source, target); | |
1254 | ||
1255 | CollationKey sourceKey, targetKey; | |
1256 | UErrorCode status = U_ZERO_ERROR; | |
1257 | ||
1258 | c.getCollationKey(source, sourceKey, status); | |
1259 | ||
1260 | if (U_FAILURE(status)) | |
1261 | { | |
1262 | errln("Couldn't get collationKey for source"); | |
1263 | continue; | |
1264 | } | |
1265 | ||
1266 | c.getCollationKey(target, targetKey, status); | |
1267 | ||
1268 | if (U_FAILURE(status)) | |
1269 | { | |
1270 | errln("Couldn't get collationKey for target"); | |
1271 | continue; | |
1272 | } | |
1273 | ||
1274 | Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); | |
1275 | ||
1276 | reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult ); | |
1277 | ||
1278 | } | |
1279 | } | |
1280 | ||
1281 | void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) | |
1282 | { | |
1283 | int32_t c1, c2, count = 0; | |
1284 | UErrorCode status = U_ZERO_ERROR; | |
1285 | ||
1286 | do | |
1287 | { | |
1288 | c1 = i1.next(status); | |
1289 | c2 = i2.next(status); | |
1290 | ||
1291 | if (c1 != c2) | |
1292 | { | |
1293 | UnicodeString msg, msg1(" "); | |
729e4ab9 | 1294 | |
b75a7d8f A |
1295 | msg += msg1 + count; |
1296 | msg += ": strength(0x"; | |
1297 | appendHex(c1, 8, msg); | |
1298 | msg += ") != strength(0x"; | |
1299 | appendHex(c2, 8, msg); | |
1300 | msg += ")"; | |
1301 | ||
1302 | errln(msg); | |
1303 | break; | |
1304 | } | |
1305 | ||
1306 | count += 1; | |
1307 | } | |
1308 | while (c1 != CollationElementIterator::NULLORDER); | |
1309 | } | |
1310 | ||
1311 | void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */) | |
1312 | { | |
1313 | if (exec) | |
1314 | { | |
1315 | logln("Collation Regression Tests: "); | |
1316 | } | |
1317 | ||
1318 | if(en_us) { | |
1319 | switch (index) | |
1320 | { | |
1321 | case 0: name = "Test4048446"; if (exec) Test4048446(/* par */); break; | |
1322 | case 1: name = "Test4051866"; if (exec) Test4051866(/* par */); break; | |
1323 | case 2: name = "Test4053636"; if (exec) Test4053636(/* par */); break; | |
1324 | case 3: name = "Test4054238"; if (exec) Test4054238(/* par */); break; | |
1325 | case 4: name = "Test4054734"; if (exec) Test4054734(/* par */); break; | |
1326 | case 5: name = "Test4054736"; if (exec) Test4054736(/* par */); break; | |
1327 | case 6: name = "Test4058613"; if (exec) Test4058613(/* par */); break; | |
1328 | case 7: name = "Test4059820"; if (exec) Test4059820(/* par */); break; | |
1329 | case 8: name = "Test4060154"; if (exec) Test4060154(/* par */); break; | |
1330 | case 9: name = "Test4062418"; if (exec) Test4062418(/* par */); break; | |
1331 | case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break; | |
1332 | case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break; | |
1333 | case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break; | |
1334 | case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break; | |
1335 | case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break; | |
1336 | case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break; | |
1337 | case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break; | |
1338 | case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break; | |
1339 | case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break; | |
1340 | case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break; | |
1341 | case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break; | |
1342 | case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break; | |
1343 | case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break; | |
1344 | case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break; | |
1345 | case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break; | |
1346 | case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break; | |
1347 | case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break; | |
1348 | case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break; | |
1349 | case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break; | |
1350 | case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break; | |
1351 | case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break; | |
4388f060 A |
1352 | case 31: name = "TestT7189"; if (exec) TestT7189(); break; |
1353 | case 32: name = "TestCaseFirstCompression"; if (exec) TestCaseFirstCompression(); break; | |
b75a7d8f A |
1354 | default: name = ""; break; |
1355 | } | |
1356 | } else { | |
729e4ab9 | 1357 | dataerrln("Class collator not instantiated"); |
b75a7d8f A |
1358 | name = ""; |
1359 | } | |
1360 | } | |
1361 | ||
1362 | #endif /* #if !UCONFIG_NO_COLLATION */ |