]>
Commit | Line | Data |
---|---|---|
1 | /******************************************************************** | |
2 | * COPYRIGHT: | |
3 | * Copyright (c) 1997-2014, International Business Machines Corporation and | |
4 | * others. All Rights Reserved. | |
5 | ********************************************************************/ | |
6 | ||
7 | #include "unicode/utypes.h" | |
8 | ||
9 | #if !UCONFIG_NO_COLLATION | |
10 | ||
11 | #include "unicode/coll.h" | |
12 | #include "unicode/localpointer.h" | |
13 | #include "unicode/tblcoll.h" | |
14 | #include "unicode/unistr.h" | |
15 | #include "unicode/sortkey.h" | |
16 | #include "regcoll.h" | |
17 | #include "sfwdchit.h" | |
18 | #include "testutil.h" | |
19 | #include "cmemory.h" | |
20 | ||
21 | #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0])) | |
22 | ||
23 | CollationRegressionTest::CollationRegressionTest() | |
24 | { | |
25 | UErrorCode status = U_ZERO_ERROR; | |
26 | ||
27 | en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); | |
28 | if(U_FAILURE(status)) { | |
29 | delete en_us; | |
30 | en_us = 0; | |
31 | errcheckln(status, "Collator creation failed with %s", u_errorName(status)); | |
32 | return; | |
33 | } | |
34 | } | |
35 | ||
36 | CollationRegressionTest::~CollationRegressionTest() | |
37 | { | |
38 | delete en_us; | |
39 | } | |
40 | ||
41 | ||
42 | // @bug 4048446 | |
43 | // | |
44 | // CollationElementIterator.reset() doesn't work | |
45 | // | |
46 | void CollationRegressionTest::Test4048446(/* char* par */) | |
47 | { | |
48 | const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; | |
49 | const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; | |
50 | CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); | |
51 | CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); | |
52 | UErrorCode status = U_ZERO_ERROR; | |
53 | ||
54 | if (i1 == NULL|| i2 == NULL) | |
55 | { | |
56 | errln("Could not create CollationElementIterator's"); | |
57 | delete i1; | |
58 | delete i2; | |
59 | return; | |
60 | } | |
61 | ||
62 | while (i1->next(status) != CollationElementIterator::NULLORDER) | |
63 | { | |
64 | if (U_FAILURE(status)) | |
65 | { | |
66 | errln("error calling next()"); | |
67 | ||
68 | delete i1; | |
69 | delete i2; | |
70 | return; | |
71 | } | |
72 | } | |
73 | ||
74 | i1->reset(); | |
75 | ||
76 | assertEqual(*i1, *i2); | |
77 | ||
78 | delete i1; | |
79 | delete i2; | |
80 | } | |
81 | ||
82 | // @bug 4051866 | |
83 | // | |
84 | // Collator -> rules -> Collator round-trip broken for expanding characters | |
85 | // | |
86 | void CollationRegressionTest::Test4051866(/* char* par */) | |
87 | { | |
88 | UnicodeString rules; | |
89 | UErrorCode status = U_ZERO_ERROR; | |
90 | ||
91 | rules += "&n < o "; | |
92 | rules += "& oe ,o"; | |
93 | rules += (UChar)0x3080; | |
94 | rules += "& oe ,"; | |
95 | rules += (UChar)0x1530; | |
96 | rules += " ,O"; | |
97 | rules += "& OE ,O"; | |
98 | rules += (UChar)0x3080; | |
99 | rules += "& OE ,"; | |
100 | rules += (UChar)0x1520; | |
101 | rules += "< p ,P"; | |
102 | ||
103 | // Build a collator containing expanding characters | |
104 | LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status)); | |
105 | ||
106 | // Build another using the rules from the first | |
107 | LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status)); | |
108 | if (U_FAILURE(status)) { | |
109 | errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status)); | |
110 | return; | |
111 | } | |
112 | ||
113 | // Make sure they're the same | |
114 | if (!(c1->getRules() == c2->getRules())) | |
115 | { | |
116 | errln("Rules are not equal"); | |
117 | } | |
118 | } | |
119 | ||
120 | // @bug 4053636 | |
121 | // | |
122 | // Collator thinks "black-bird" == "black" | |
123 | // | |
124 | void CollationRegressionTest::Test4053636(/* char* par */) | |
125 | { | |
126 | if (en_us->equals("black_bird", "black")) | |
127 | { | |
128 | errln("black-bird == black"); | |
129 | } | |
130 | } | |
131 | ||
132 | // @bug 4054238 | |
133 | // | |
134 | // CollationElementIterator will not work correctly if the associated | |
135 | // Collator object's mode is changed | |
136 | // | |
137 | void CollationRegressionTest::Test4054238(/* char* par */) | |
138 | { | |
139 | const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; | |
140 | const UnicodeString test3(chars3); | |
141 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
142 | ||
143 | // NOTE: The Java code uses en_us to create the CollationElementIterators | |
144 | // but I'm pretty sure that's wrong, so I've changed this to use c. | |
145 | UErrorCode status = U_ZERO_ERROR; | |
146 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
147 | CollationElementIterator *i1 = c->createCollationElementIterator(test3); | |
148 | delete i1; | |
149 | delete c; | |
150 | } | |
151 | ||
152 | // @bug 4054734 | |
153 | // | |
154 | // Collator::IDENTICAL documented but not implemented | |
155 | // | |
156 | void CollationRegressionTest::Test4054734(/* char* par */) | |
157 | { | |
158 | /* | |
159 | Here's the original Java: | |
160 | ||
161 | String[] decomp = { | |
162 | "\u0001", "<", "\u0002", | |
163 | "\u0001", "=", "\u0001", | |
164 | "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise | |
165 | "\u00C0", "=", "A\u0300" // Decomp should make these equal | |
166 | }; | |
167 | ||
168 | String[] nodecomp = { | |
169 | "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave | |
170 | }; | |
171 | */ | |
172 | ||
173 | static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
174 | { | |
175 | {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, | |
176 | {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, | |
177 | {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, | |
178 | {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} | |
179 | }; | |
180 | ||
181 | ||
182 | UErrorCode status = U_ZERO_ERROR; | |
183 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
184 | ||
185 | c->setStrength(Collator::IDENTICAL); | |
186 | ||
187 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
188 | compareArray(*c, decomp, ARRAY_LENGTH(decomp)); | |
189 | ||
190 | delete c; | |
191 | } | |
192 | ||
193 | // @bug 4054736 | |
194 | // | |
195 | // Full Decomposition mode not implemented | |
196 | // | |
197 | void CollationRegressionTest::Test4054736(/* char* par */) | |
198 | { | |
199 | UErrorCode status = U_ZERO_ERROR; | |
200 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
201 | ||
202 | c->setStrength(Collator::SECONDARY); | |
203 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
204 | ||
205 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
206 | { | |
207 | {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed | |
208 | }; | |
209 | ||
210 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
211 | ||
212 | delete c; | |
213 | } | |
214 | ||
215 | // @bug 4058613 | |
216 | // | |
217 | // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean | |
218 | // | |
219 | void CollationRegressionTest::Test4058613(/* char* par */) | |
220 | { | |
221 | // Creating a default collator doesn't work when Korean is the default | |
222 | // locale | |
223 | ||
224 | Locale oldDefault = Locale::getDefault(); | |
225 | UErrorCode status = U_ZERO_ERROR; | |
226 | ||
227 | Locale::setDefault(Locale::getKorean(), status); | |
228 | ||
229 | if (U_FAILURE(status)) | |
230 | { | |
231 | errln("Could not set default locale to Locale::KOREAN"); | |
232 | return; | |
233 | } | |
234 | ||
235 | Collator *c = NULL; | |
236 | ||
237 | c = Collator::createInstance("en_US", status); | |
238 | ||
239 | if (c == NULL || U_FAILURE(status)) | |
240 | { | |
241 | errln("Could not create a Korean collator"); | |
242 | Locale::setDefault(oldDefault, status); | |
243 | delete c; | |
244 | return; | |
245 | } | |
246 | ||
247 | // Since the fix to this bug was to turn off decomposition for Korean collators, | |
248 | // ensure that's what we got | |
249 | if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) | |
250 | { | |
251 | errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); | |
252 | } | |
253 | ||
254 | delete c; | |
255 | ||
256 | Locale::setDefault(oldDefault, status); | |
257 | } | |
258 | ||
259 | // @bug 4059820 | |
260 | // | |
261 | // RuleBasedCollator.getRules does not return the exact pattern as input | |
262 | // for expanding character sequences | |
263 | // | |
264 | void CollationRegressionTest::Test4059820(/* char* par */) | |
265 | { | |
266 | UErrorCode status = U_ZERO_ERROR; | |
267 | ||
268 | RuleBasedCollator *c = NULL; | |
269 | UnicodeString rules = "&9 < a < b , c/a < d < z"; | |
270 | ||
271 | c = new RuleBasedCollator(rules, status); | |
272 | ||
273 | if (c == NULL || U_FAILURE(status)) | |
274 | { | |
275 | errln("Failure building a collator."); | |
276 | delete c; | |
277 | return; | |
278 | } | |
279 | ||
280 | if ( c->getRules().indexOf("c/a") == -1) | |
281 | { | |
282 | errln("returned rules do not contain 'c/a'"); | |
283 | } | |
284 | ||
285 | delete c; | |
286 | } | |
287 | ||
288 | // @bug 4060154 | |
289 | // | |
290 | // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" | |
291 | // | |
292 | void CollationRegressionTest::Test4060154(/* char* par */) | |
293 | { | |
294 | UErrorCode status = U_ZERO_ERROR; | |
295 | UnicodeString rules; | |
296 | ||
297 | rules += "&f < g, G < h, H < i, I < j, J"; | |
298 | rules += " & H < "; | |
299 | rules += (UChar)0x0131; | |
300 | rules += ", "; | |
301 | rules += (UChar)0x0130; | |
302 | rules += ", i, I"; | |
303 | ||
304 | RuleBasedCollator *c = NULL; | |
305 | ||
306 | c = new RuleBasedCollator(rules, status); | |
307 | ||
308 | if (c == NULL || U_FAILURE(status)) | |
309 | { | |
310 | errln("failure building collator."); | |
311 | delete c; | |
312 | return; | |
313 | } | |
314 | ||
315 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
316 | ||
317 | /* | |
318 | String[] tertiary = { | |
319 | "A", "<", "B", | |
320 | "H", "<", "\u0131", | |
321 | "H", "<", "I", | |
322 | "\u0131", "<", "\u0130", | |
323 | "\u0130", "<", "i", | |
324 | "\u0130", ">", "H", | |
325 | }; | |
326 | */ | |
327 | ||
328 | static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
329 | { | |
330 | {0x41, 0}, {0x3c, 0}, {0x42, 0}, | |
331 | {0x48, 0}, {0x3c, 0}, {0x0131, 0}, | |
332 | {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
333 | {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, | |
334 | {0x0130, 0}, {0x3c, 0}, {0x69, 0}, | |
335 | {0x0130, 0}, {0x3e, 0}, {0x48, 0} | |
336 | }; | |
337 | ||
338 | c->setStrength(Collator::TERTIARY); | |
339 | compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); | |
340 | ||
341 | /* | |
342 | String[] secondary = { | |
343 | "H", "<", "I", | |
344 | "\u0131", "=", "\u0130", | |
345 | }; | |
346 | */ | |
347 | static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
348 | { | |
349 | {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
350 | {0x0131, 0}, {0x3d, 0}, {0x0130, 0} | |
351 | }; | |
352 | ||
353 | c->setStrength(Collator::PRIMARY); | |
354 | compareArray(*c, secondary, ARRAY_LENGTH(secondary)); | |
355 | ||
356 | delete c; | |
357 | } | |
358 | ||
359 | // @bug 4062418 | |
360 | // | |
361 | // Secondary/Tertiary comparison incorrect in French Secondary | |
362 | // | |
363 | void CollationRegressionTest::Test4062418(/* char* par */) | |
364 | { | |
365 | UErrorCode status = U_ZERO_ERROR; | |
366 | ||
367 | RuleBasedCollator *c = NULL; | |
368 | ||
369 | c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status); | |
370 | ||
371 | if (c == NULL || U_FAILURE(status)) | |
372 | { | |
373 | errln("Failed to create collator for Locale::getCanadaFrench()"); | |
374 | delete c; | |
375 | return; | |
376 | } | |
377 | ||
378 | c->setStrength(Collator::SECONDARY); | |
379 | ||
380 | /* | |
381 | String[] tests = { | |
382 | "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater | |
383 | }; | |
384 | */ | |
385 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
386 | { | |
387 | {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0} | |
388 | }; | |
389 | ||
390 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
391 | ||
392 | delete c; | |
393 | } | |
394 | ||
395 | // @bug 4065540 | |
396 | // | |
397 | // Collator::compare() method broken if either string contains spaces | |
398 | // | |
399 | void CollationRegressionTest::Test4065540(/* char* par */) | |
400 | { | |
401 | if (en_us->compare("abcd e", "abcd f") == 0) | |
402 | { | |
403 | errln("'abcd e' == 'abcd f'"); | |
404 | } | |
405 | } | |
406 | ||
407 | // @bug 4066189 | |
408 | // | |
409 | // Unicode characters need to be recursively decomposed to get the | |
410 | // correct result. For example, | |
411 | // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. | |
412 | // | |
413 | void CollationRegressionTest::Test4066189(/* char* par */) | |
414 | { | |
415 | static const UChar chars1[] = {0x1EB1, 0}; | |
416 | static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; | |
417 | const UnicodeString test1(chars1); | |
418 | const UnicodeString test2(chars2); | |
419 | UErrorCode status = U_ZERO_ERROR; | |
420 | ||
421 | // NOTE: The java code used en_us to create the | |
422 | // CollationElementIterator's. I'm pretty sure that | |
423 | // was wrong, so I've change the code to use c1 and c2 | |
424 | RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); | |
425 | c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
426 | CollationElementIterator *i1 = c1->createCollationElementIterator(test1); | |
427 | ||
428 | RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); | |
429 | c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
430 | CollationElementIterator *i2 = c2->createCollationElementIterator(test2); | |
431 | ||
432 | assertEqual(*i1, *i2); | |
433 | ||
434 | delete i2; | |
435 | delete c2; | |
436 | delete i1; | |
437 | delete c1; | |
438 | } | |
439 | ||
440 | // @bug 4066696 | |
441 | // | |
442 | // French secondary collation checking at the end of compare iteration fails | |
443 | // | |
444 | void CollationRegressionTest::Test4066696(/* char* par */) | |
445 | { | |
446 | UErrorCode status = U_ZERO_ERROR; | |
447 | RuleBasedCollator *c = NULL; | |
448 | ||
449 | c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status); | |
450 | ||
451 | if (c == NULL || U_FAILURE(status)) | |
452 | { | |
453 | errln("Failure creating collator for Locale::getCanadaFrench()"); | |
454 | delete c; | |
455 | return; | |
456 | } | |
457 | ||
458 | c->setStrength(Collator::SECONDARY); | |
459 | ||
460 | /* | |
461 | String[] tests = { | |
462 | "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute | |
463 | }; | |
464 | ||
465 | should be: | |
466 | ||
467 | String[] tests = { | |
468 | "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute | |
469 | }; | |
470 | ||
471 | */ | |
472 | ||
473 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
474 | { | |
475 | {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} | |
476 | }; | |
477 | ||
478 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
479 | ||
480 | delete c; | |
481 | } | |
482 | ||
483 | // @bug 4076676 | |
484 | // | |
485 | // Bad canonicalization of same-class combining characters | |
486 | // | |
487 | void CollationRegressionTest::Test4076676(/* char* par */) | |
488 | { | |
489 | // These combining characters are all in the same class, so they should not | |
490 | // be reordered, and they should compare as unequal. | |
491 | static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; | |
492 | static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; | |
493 | ||
494 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
495 | c->setStrength(Collator::TERTIARY); | |
496 | ||
497 | if (c->compare(s1,s2) == 0) | |
498 | { | |
499 | errln("Same-class combining chars were reordered"); | |
500 | } | |
501 | ||
502 | delete c; | |
503 | } | |
504 | ||
505 | // @bug 4079231 | |
506 | // | |
507 | // RuleBasedCollator::operator==(NULL) throws NullPointerException | |
508 | // | |
509 | void CollationRegressionTest::Test4079231(/* char* par */) | |
510 | { | |
511 | // I don't think there's any way to write this test | |
512 | // in C++. The following is equivalent to the Java, | |
513 | // but doesn't compile 'cause NULL can't be converted | |
514 | // to Collator& | |
515 | // | |
516 | // if (en_us->operator==(NULL)) | |
517 | // { | |
518 | // errln("en_us->operator==(NULL) returned TRUE"); | |
519 | // } | |
520 | ||
521 | /* | |
522 | try { | |
523 | if (en_us->equals(null)) { | |
524 | errln("en_us->equals(null) returned true"); | |
525 | } | |
526 | } | |
527 | catch (Exception e) { | |
528 | errln("en_us->equals(null) threw " + e.toString()); | |
529 | } | |
530 | */ | |
531 | } | |
532 | ||
533 | // @bug 4078588 | |
534 | // | |
535 | // RuleBasedCollator breaks on "< a < bb" rule | |
536 | // | |
537 | void CollationRegressionTest::Test4078588(/* char *par */) | |
538 | { | |
539 | UErrorCode status = U_ZERO_ERROR; | |
540 | RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status); | |
541 | ||
542 | if (rbc == NULL || U_FAILURE(status)) | |
543 | { | |
544 | errln("Failed to create RuleBasedCollator."); | |
545 | delete rbc; | |
546 | return; | |
547 | } | |
548 | ||
549 | Collator::EComparisonResult result = rbc->compare("a","bb"); | |
550 | ||
551 | if (result != Collator::LESS) | |
552 | { | |
553 | errln((UnicodeString)"Compare(a,bb) returned " + (int)result | |
554 | + (UnicodeString)"; expected -1"); | |
555 | } | |
556 | ||
557 | delete rbc; | |
558 | } | |
559 | ||
560 | // @bug 4081866 | |
561 | // | |
562 | // Combining characters in different classes not reordered properly. | |
563 | // | |
564 | void CollationRegressionTest::Test4081866(/* char* par */) | |
565 | { | |
566 | // These combining characters are all in different classes, | |
567 | // so they should be reordered and the strings should compare as equal. | |
568 | static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; | |
569 | static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; | |
570 | ||
571 | UErrorCode status = U_ZERO_ERROR; | |
572 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
573 | c->setStrength(Collator::TERTIARY); | |
574 | ||
575 | // Now that the default collators are set to NO_DECOMPOSITION | |
576 | // (as a result of fixing bug 4114077), we must set it explicitly | |
577 | // when we're testing reordering behavior. -- lwerner, 5/5/98 | |
578 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
579 | ||
580 | if (c->compare(s1,s2) != 0) | |
581 | { | |
582 | errln("Combining chars were not reordered"); | |
583 | } | |
584 | ||
585 | delete c; | |
586 | } | |
587 | ||
588 | // @bug 4087241 | |
589 | // | |
590 | // string comparison errors in Scandinavian collators | |
591 | // | |
592 | void CollationRegressionTest::Test4087241(/* char* par */) | |
593 | { | |
594 | UErrorCode status = U_ZERO_ERROR; | |
595 | Locale da_DK("da", "DK"); | |
596 | RuleBasedCollator *c = NULL; | |
597 | ||
598 | c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); | |
599 | ||
600 | if (c == NULL || U_FAILURE(status)) | |
601 | { | |
602 | errln("Failed to create collator for da_DK locale"); | |
603 | delete c; | |
604 | return; | |
605 | } | |
606 | ||
607 | c->setStrength(Collator::SECONDARY); | |
608 | ||
609 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
610 | { | |
611 | {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae | |
612 | {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring | |
613 | {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut | |
614 | }; | |
615 | ||
616 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
617 | ||
618 | delete c; | |
619 | } | |
620 | ||
621 | // @bug 4087243 | |
622 | // | |
623 | // CollationKey takes ignorable strings into account when it shouldn't | |
624 | // | |
625 | void CollationRegressionTest::Test4087243(/* char* par */) | |
626 | { | |
627 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
628 | c->setStrength(Collator::TERTIARY); | |
629 | ||
630 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
631 | { | |
632 | {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A | |
633 | }; | |
634 | ||
635 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
636 | ||
637 | delete c; | |
638 | } | |
639 | ||
640 | // @bug 4092260 | |
641 | // | |
642 | // Mu/micro conflict | |
643 | // Micro symbol and greek lowercase letter Mu should sort identically | |
644 | // | |
645 | void CollationRegressionTest::Test4092260(/* char* par */) | |
646 | { | |
647 | UErrorCode status = U_ZERO_ERROR; | |
648 | Locale el("el", ""); | |
649 | Collator *c = NULL; | |
650 | ||
651 | c = Collator::createInstance(el, status); | |
652 | ||
653 | if (c == NULL || U_FAILURE(status)) | |
654 | { | |
655 | errln("Failed to create collator for el locale."); | |
656 | delete c; | |
657 | return; | |
658 | } | |
659 | ||
660 | // These now have tertiary differences in UCA | |
661 | c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
662 | ||
663 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
664 | { | |
665 | {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} | |
666 | }; | |
667 | ||
668 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
669 | ||
670 | delete c; | |
671 | } | |
672 | ||
673 | // @bug 4095316 | |
674 | // | |
675 | void CollationRegressionTest::Test4095316(/* char* par */) | |
676 | { | |
677 | UErrorCode status = U_ZERO_ERROR; | |
678 | Locale el_GR("el", "GR"); | |
679 | Collator *c = Collator::createInstance(el_GR, status); | |
680 | ||
681 | if (c == NULL || U_FAILURE(status)) | |
682 | { | |
683 | errln("Failed to create collator for el_GR locale"); | |
684 | delete c; | |
685 | return; | |
686 | } | |
687 | // These now have tertiary differences in UCA | |
688 | //c->setStrength(Collator::TERTIARY); | |
689 | c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
690 | ||
691 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
692 | { | |
693 | {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} | |
694 | }; | |
695 | ||
696 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
697 | ||
698 | delete c; | |
699 | } | |
700 | ||
701 | // @bug 4101940 | |
702 | // | |
703 | void CollationRegressionTest::Test4101940(/* char* par */) | |
704 | { | |
705 | UErrorCode status = U_ZERO_ERROR; | |
706 | RuleBasedCollator *c = NULL; | |
707 | UnicodeString rules = "&9 < a < b"; | |
708 | UnicodeString nothing = ""; | |
709 | ||
710 | c = new RuleBasedCollator(rules, status); | |
711 | ||
712 | if (c == NULL || U_FAILURE(status)) | |
713 | { | |
714 | errln("Failed to create RuleBasedCollator"); | |
715 | delete c; | |
716 | return; | |
717 | } | |
718 | ||
719 | CollationElementIterator *i = c->createCollationElementIterator(nothing); | |
720 | i->reset(); | |
721 | ||
722 | if (i->next(status) != CollationElementIterator::NULLORDER) | |
723 | { | |
724 | errln("next did not return NULLORDER"); | |
725 | } | |
726 | ||
727 | delete i; | |
728 | delete c; | |
729 | } | |
730 | ||
731 | // @bug 4103436 | |
732 | // | |
733 | // Collator::compare not handling spaces properly | |
734 | // | |
735 | void CollationRegressionTest::Test4103436(/* char* par */) | |
736 | { | |
737 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
738 | c->setStrength(Collator::TERTIARY); | |
739 | ||
740 | static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
741 | { | |
742 | {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, | |
743 | {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0} | |
744 | }; | |
745 | ||
746 | compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
747 | ||
748 | delete c; | |
749 | } | |
750 | ||
751 | // @bug 4114076 | |
752 | // | |
753 | // Collation not Unicode conformant with Hangul syllables | |
754 | // | |
755 | void CollationRegressionTest::Test4114076(/* char* par */) | |
756 | { | |
757 | UErrorCode status = U_ZERO_ERROR; | |
758 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
759 | c->setStrength(Collator::TERTIARY); | |
760 | ||
761 | // | |
762 | // With Canonical decomposition, Hangul syllables should get decomposed | |
763 | // into Jamo, but Jamo characters should not be decomposed into | |
764 | // conjoining Jamo | |
765 | // | |
766 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
767 | { | |
768 | {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} | |
769 | }; | |
770 | ||
771 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
772 | compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
773 | ||
774 | // From UTR #15: | |
775 | // *In earlier versions of Unicode, jamo characters like ksf | |
776 | // had compatibility mappings to kf + sf. These mappings were | |
777 | // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.) | |
778 | // That is, the following test is obsolete as of 2.1.9 | |
779 | ||
780 | //obsolete- // With Full decomposition, it should go all the way down to | |
781 | //obsolete- // conjoining Jamo characters. | |
782 | //obsolete- // | |
783 | //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
784 | //obsolete- { | |
785 | //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0} | |
786 | //obsolete- }; | |
787 | //obsolete- | |
788 | //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); | |
789 | //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); | |
790 | ||
791 | delete c; | |
792 | } | |
793 | ||
794 | ||
795 | // @bug 4124632 | |
796 | // | |
797 | // Collator::getCollationKey was hanging on certain character sequences | |
798 | // | |
799 | void CollationRegressionTest::Test4124632(/* char* par */) | |
800 | { | |
801 | UErrorCode status = U_ZERO_ERROR; | |
802 | Collator *coll = NULL; | |
803 | ||
804 | coll = Collator::createInstance(Locale::getJapan(), status); | |
805 | ||
806 | if (coll == NULL || U_FAILURE(status)) | |
807 | { | |
808 | errln("Failed to create collator for Locale::JAPAN"); | |
809 | delete coll; | |
810 | return; | |
811 | } | |
812 | ||
813 | static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; | |
814 | CollationKey key; | |
815 | ||
816 | coll->getCollationKey(test, key, status); | |
817 | ||
818 | if (key.isBogus() || U_FAILURE(status)) | |
819 | { | |
820 | errln("CollationKey creation failed."); | |
821 | } | |
822 | ||
823 | delete coll; | |
824 | } | |
825 | ||
826 | // @bug 4132736 | |
827 | // | |
828 | // sort order of french words with multiple accents has errors | |
829 | // | |
830 | void CollationRegressionTest::Test4132736(/* char* par */) | |
831 | { | |
832 | UErrorCode status = U_ZERO_ERROR; | |
833 | ||
834 | Collator *c = NULL; | |
835 | ||
836 | c = Collator::createInstance(Locale::getCanadaFrench(), status); | |
837 | c->setStrength(Collator::TERTIARY); | |
838 | ||
839 | if (c == NULL || U_FAILURE(status)) | |
840 | { | |
841 | errln("Failed to create a collator for Locale::getCanadaFrench()"); | |
842 | delete c; | |
843 | return; | |
844 | } | |
845 | ||
846 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
847 | { | |
848 | {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0}, | |
849 | {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} | |
850 | }; | |
851 | ||
852 | compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
853 | ||
854 | delete c; | |
855 | } | |
856 | ||
857 | // @bug 4133509 | |
858 | // | |
859 | // The sorting using java.text.CollationKey is not in the exact order | |
860 | // | |
861 | void CollationRegressionTest::Test4133509(/* char* par */) | |
862 | { | |
863 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
864 | { | |
865 | {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0}, | |
866 | {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, | |
867 | {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} | |
868 | }; | |
869 | ||
870 | compareArray(*en_us, test1, ARRAY_LENGTH(test1)); | |
871 | } | |
872 | ||
873 | // @bug 4114077 | |
874 | // | |
875 | // Collation with decomposition off doesn't work for Europe | |
876 | // | |
877 | void CollationRegressionTest::Test4114077(/* char* par */) | |
878 | { | |
879 | // Ensure that we get the same results with decomposition off | |
880 | // as we do with it on.... | |
881 | ||
882 | UErrorCode status = U_ZERO_ERROR; | |
883 | RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
884 | c->setStrength(Collator::TERTIARY); | |
885 | ||
886 | static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
887 | { | |
888 | {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent | |
889 | {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0}, | |
890 | {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, | |
891 | {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute | |
892 | // -> a, ring, acute | |
893 | {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal | |
894 | }; | |
895 | ||
896 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
897 | compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
898 | ||
899 | static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
900 | { | |
901 | {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal | |
902 | }; | |
903 | ||
904 | c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
905 | compareArray(*c, test2, ARRAY_LENGTH(test2)); | |
906 | ||
907 | delete c; | |
908 | } | |
909 | ||
910 | // @bug 4141640 | |
911 | // | |
912 | // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) | |
913 | // | |
914 | void CollationRegressionTest::Test4141640(/* char* par */) | |
915 | { | |
916 | // | |
917 | // Rather than just creating a Swedish collator, we might as well | |
918 | // try to instantiate one for every locale available on the system | |
919 | // in order to prevent this sort of bug from cropping up in the future | |
920 | // | |
921 | UErrorCode status = U_ZERO_ERROR; | |
922 | int32_t i, localeCount; | |
923 | const Locale *locales = Locale::getAvailableLocales(localeCount); | |
924 | ||
925 | for (i = 0; i < localeCount; i += 1) | |
926 | { | |
927 | Collator *c = NULL; | |
928 | ||
929 | status = U_ZERO_ERROR; | |
930 | c = Collator::createInstance(locales[i], status); | |
931 | ||
932 | if (c == NULL || U_FAILURE(status)) | |
933 | { | |
934 | UnicodeString msg, localeName; | |
935 | ||
936 | msg += "Could not create collator for locale "; | |
937 | msg += locales[i].getName(); | |
938 | ||
939 | errln(msg); | |
940 | } | |
941 | ||
942 | delete c; | |
943 | } | |
944 | } | |
945 | ||
946 | // @bug 4139572 | |
947 | // | |
948 | // getCollationKey throws exception for spanish text | |
949 | // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 | |
950 | // | |
951 | void CollationRegressionTest::Test4139572(/* char* par */) | |
952 | { | |
953 | // | |
954 | // Code pasted straight from the bug report | |
955 | // (and then translated to C++ ;-) | |
956 | // | |
957 | // create spanish locale and collator | |
958 | UErrorCode status = U_ZERO_ERROR; | |
959 | Locale l("es", "es"); | |
960 | Collator *col = NULL; | |
961 | ||
962 | col = Collator::createInstance(l, status); | |
963 | ||
964 | if (col == NULL || U_FAILURE(status)) | |
965 | { | |
966 | errln("Failed to create a collator for es_es locale."); | |
967 | delete col; | |
968 | return; | |
969 | } | |
970 | ||
971 | CollationKey key; | |
972 | ||
973 | // this spanish phrase kills it! | |
974 | col->getCollationKey("Nombre De Objeto", key, status); | |
975 | ||
976 | if (key.isBogus() || U_FAILURE(status)) | |
977 | { | |
978 | errln("Error creating CollationKey for \"Nombre De Ojbeto\""); | |
979 | } | |
980 | ||
981 | delete col; | |
982 | } | |
983 | /* HSYS : RuleBasedCollator::compare() performance enhancements | |
984 | compare() does not create CollationElementIterator() anymore.*/ | |
985 | ||
986 | class My4146160Collator : public RuleBasedCollator | |
987 | { | |
988 | public: | |
989 | My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); | |
990 | ~My4146160Collator(); | |
991 | ||
992 | CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const; | |
993 | ||
994 | CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const; | |
995 | ||
996 | static int32_t count; | |
997 | }; | |
998 | ||
999 | int32_t My4146160Collator::count = 0; | |
1000 | ||
1001 | My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) | |
1002 | : RuleBasedCollator(rbc.getRules(), status) | |
1003 | { | |
1004 | } | |
1005 | ||
1006 | My4146160Collator::~My4146160Collator() | |
1007 | { | |
1008 | } | |
1009 | ||
1010 | CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const | |
1011 | { | |
1012 | count += 1; | |
1013 | return RuleBasedCollator::createCollationElementIterator(text); | |
1014 | } | |
1015 | ||
1016 | CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const | |
1017 | { | |
1018 | count += 1; | |
1019 | return RuleBasedCollator::createCollationElementIterator(text); | |
1020 | } | |
1021 | ||
1022 | // @bug 4146160 | |
1023 | // | |
1024 | // RuleBasedCollator doesn't use createCollationElementIterator internally | |
1025 | // | |
1026 | void CollationRegressionTest::Test4146160(/* char* par */) | |
1027 | { | |
1028 | #if 0 | |
1029 | // | |
1030 | // Use a custom collator class whose createCollationElementIterator | |
1031 | // methods increment a count.... | |
1032 | // | |
1033 | UErrorCode status = U_ZERO_ERROR; | |
1034 | CollationKey key; | |
1035 | ||
1036 | My4146160Collator::count = 0; | |
1037 | My4146160Collator *mc = NULL; | |
1038 | ||
1039 | mc = new My4146160Collator(*en_us, status); | |
1040 | ||
1041 | if (mc == NULL || U_FAILURE(status)) | |
1042 | { | |
1043 | errln("Failed to create a My4146160Collator."); | |
1044 | delete mc; | |
1045 | return; | |
1046 | } | |
1047 | ||
1048 | mc->getCollationKey("1", key, status); | |
1049 | ||
1050 | if (key.isBogus() || U_FAILURE(status)) | |
1051 | { | |
1052 | errln("Failure to get a CollationKey from a My4146160Collator."); | |
1053 | delete mc; | |
1054 | return; | |
1055 | } | |
1056 | ||
1057 | if (My4146160Collator::count < 1) | |
1058 | { | |
1059 | errln("My4146160Collator::createCollationElementIterator not called for getCollationKey"); | |
1060 | } | |
1061 | ||
1062 | My4146160Collator::count = 0; | |
1063 | mc->compare("1", "2"); | |
1064 | ||
1065 | if (My4146160Collator::count < 1) | |
1066 | { | |
1067 | errln("My4146160Collator::createtCollationElementIterator not called for compare"); | |
1068 | } | |
1069 | ||
1070 | delete mc; | |
1071 | #endif | |
1072 | } | |
1073 | ||
1074 | void CollationRegressionTest::Test4179216() { | |
1075 | // you can position a CollationElementIterator in the middle of | |
1076 | // a contracting character sequence, yielding a bogus collation | |
1077 | // element | |
1078 | IcuTestErrorCode errorCode(*this, "Test4179216"); | |
1079 | RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode); | |
1080 | UnicodeString testText = "church church catcatcher runcrunchynchy"; | |
1081 | CollationElementIterator *iter = coll.createCollationElementIterator(testText); | |
1082 | ||
1083 | // test that the "ch" combination works properly | |
1084 | iter->setOffset(4, errorCode); | |
1085 | int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1086 | ||
1087 | iter->reset(); | |
1088 | int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1089 | ||
1090 | iter->setOffset(5, errorCode); | |
1091 | int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1092 | ||
1093 | // Compares and prints only 16-bit primary weights. | |
1094 | if (elt4 != elt0 || elt5 != elt0) { | |
1095 | errln("The collation elements at positions 0 (0x%04x), " | |
1096 | "4 (0x%04x), and 5 (0x%04x) don't match.", | |
1097 | elt0, elt4, elt5); | |
1098 | } | |
1099 | ||
1100 | // test that the "cat" combination works properly | |
1101 | iter->setOffset(14, errorCode); | |
1102 | int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1103 | ||
1104 | iter->setOffset(15, errorCode); | |
1105 | int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1106 | ||
1107 | iter->setOffset(16, errorCode); | |
1108 | int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1109 | ||
1110 | iter->setOffset(17, errorCode); | |
1111 | int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1112 | ||
1113 | iter->setOffset(18, errorCode); | |
1114 | int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1115 | ||
1116 | iter->setOffset(19, errorCode); | |
1117 | int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode)); | |
1118 | ||
1119 | // Compares and prints only 16-bit primary weights. | |
1120 | if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 | |
1121 | || elt14 != elt18 || elt14 != elt19) { | |
1122 | errln("\"cat\" elements don't match: elt14 = 0x%04x, " | |
1123 | "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, " | |
1124 | "elt18 = 0x%04x, elt19 = 0x%04x", | |
1125 | elt14, elt15, elt16, elt17, elt18, elt19); | |
1126 | } | |
1127 | ||
1128 | // now generate a complete list of the collation elements, | |
1129 | // first using next() and then using setOffset(), and | |
1130 | // make sure both interfaces return the same set of elements | |
1131 | iter->reset(); | |
1132 | ||
1133 | int32_t elt = iter->next(errorCode); | |
1134 | int32_t count = 0; | |
1135 | while (elt != CollationElementIterator::NULLORDER) { | |
1136 | ++count; | |
1137 | elt = iter->next(errorCode); | |
1138 | } | |
1139 | ||
1140 | LocalArray<UnicodeString> nextElements(new UnicodeString[count]); | |
1141 | LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]); | |
1142 | int32_t lastPos = 0; | |
1143 | ||
1144 | iter->reset(); | |
1145 | elt = iter->next(errorCode); | |
1146 | count = 0; | |
1147 | while (elt != CollationElementIterator::NULLORDER) { | |
1148 | nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); | |
1149 | lastPos = iter->getOffset(); | |
1150 | elt = iter->next(errorCode); | |
1151 | } | |
1152 | int32_t nextElementsLength = count; | |
1153 | count = 0; | |
1154 | for (int32_t i = 0; i < testText.length(); ) { | |
1155 | iter->setOffset(i, errorCode); | |
1156 | lastPos = iter->getOffset(); | |
1157 | elt = iter->next(errorCode); | |
1158 | setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); | |
1159 | i = iter->getOffset(); | |
1160 | } | |
1161 | for (int32_t i = 0; i < nextElementsLength; i++) { | |
1162 | if (nextElements[i] == setOffsetElements[i]) { | |
1163 | logln(nextElements[i]); | |
1164 | } else { | |
1165 | errln(UnicodeString("Error: next() yielded ") + nextElements[i] + | |
1166 | ", but setOffset() yielded " + setOffsetElements[i]); | |
1167 | } | |
1168 | } | |
1169 | delete iter; | |
1170 | } | |
1171 | ||
1172 | // Ticket 7189 | |
1173 | // | |
1174 | // nextSortKeyPart incorrect for EO_S1 collation | |
1175 | static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) { | |
1176 | UCharIterator uiter; | |
1177 | uint32_t state[2] = { 0, 0 }; | |
1178 | int32_t keyLen; | |
1179 | int32_t count = 8; | |
1180 | ||
1181 | uiter_setString(&uiter, text, len); | |
1182 | keyLen = 0; | |
1183 | while (TRUE) { | |
1184 | int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status); | |
1185 | if (U_FAILURE(status)) { | |
1186 | return -1; | |
1187 | } | |
1188 | if (keyPartLen == 0) { | |
1189 | break; | |
1190 | } | |
1191 | keyLen += keyPartLen; | |
1192 | } | |
1193 | return keyLen; | |
1194 | } | |
1195 | ||
1196 | void CollationRegressionTest::TestT7189() { | |
1197 | UErrorCode status = U_ZERO_ERROR; | |
1198 | UCollator *coll; | |
1199 | uint32_t i; | |
1200 | ||
1201 | static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
1202 | // "Achter De Hoven" | |
1203 | { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, | |
1204 | // "ABC" | |
1205 | { 0x41, 0x42, 0x43, 0x00 }, | |
1206 | // "HELLO world!" | |
1207 | { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } | |
1208 | }; | |
1209 | ||
1210 | static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
1211 | // "Achter de Hoven" | |
1212 | { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, | |
1213 | // "abc" | |
1214 | { 0x61, 0x62, 0x63, 0x00 }, | |
1215 | // "hello world!" | |
1216 | { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } | |
1217 | }; | |
1218 | ||
1219 | // Open the collator | |
1220 | coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); | |
1221 | if (U_FAILURE(status)) { | |
1222 | errln("Failed to create a collator for short string EO_S1"); | |
1223 | return; | |
1224 | } | |
1225 | ||
1226 | for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) { | |
1227 | uint8_t key1[100], key2[100]; | |
1228 | int32_t len1, len2; | |
1229 | ||
1230 | len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status); | |
1231 | if (U_FAILURE(status)) { | |
1232 | errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]); | |
1233 | break; | |
1234 | } | |
1235 | len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status); | |
1236 | if (U_FAILURE(status)) { | |
1237 | errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]); | |
1238 | break; | |
1239 | } | |
1240 | ||
1241 | if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { | |
1242 | errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1)); | |
1243 | } else { | |
1244 | logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : " | |
1245 | + TestUtility::hex(key2, len2)); | |
1246 | } | |
1247 | } | |
1248 | ucol_close(coll); | |
1249 | } | |
1250 | ||
1251 | void CollationRegressionTest::TestCaseFirstCompression() { | |
1252 | RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone(); | |
1253 | UErrorCode status = U_ZERO_ERROR; | |
1254 | ||
1255 | // default | |
1256 | caseFirstCompressionSub(col, "default"); | |
1257 | ||
1258 | // Upper first | |
1259 | col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); | |
1260 | if (U_FAILURE(status)) { | |
1261 | errln("Failed to set UCOL_UPPER_FIRST"); | |
1262 | return; | |
1263 | } | |
1264 | caseFirstCompressionSub(col, "upper first"); | |
1265 | ||
1266 | // Lower first | |
1267 | col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); | |
1268 | if (U_FAILURE(status)) { | |
1269 | errln("Failed to set UCOL_LOWER_FIRST"); | |
1270 | return; | |
1271 | } | |
1272 | caseFirstCompressionSub(col, "lower first"); | |
1273 | ||
1274 | delete col; | |
1275 | } | |
1276 | ||
1277 | void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) { | |
1278 | const int32_t maxLength = 50; | |
1279 | ||
1280 | UChar str1[maxLength]; | |
1281 | UChar str2[maxLength]; | |
1282 | ||
1283 | CollationKey key1, key2; | |
1284 | ||
1285 | for (int32_t len = 1; len <= maxLength; len++) { | |
1286 | int32_t i = 0; | |
1287 | for (; i < len - 1; i++) { | |
1288 | str1[i] = str2[i] = (UChar)0x61; // 'a' | |
1289 | } | |
1290 | str1[i] = (UChar)0x41; // 'A' | |
1291 | str2[i] = (UChar)0x61; // 'a' | |
1292 | ||
1293 | UErrorCode status = U_ZERO_ERROR; | |
1294 | col->getCollationKey(str1, len, key1, status); | |
1295 | col->getCollationKey(str2, len, key2, status); | |
1296 | ||
1297 | UCollationResult cmpKey = key1.compareTo(key2, status); | |
1298 | UCollationResult cmpCol = col->compare(str1, len, str2, len, status); | |
1299 | ||
1300 | if (U_FAILURE(status)) { | |
1301 | errln("Error in caseFirstCompressionSub"); | |
1302 | } else if (cmpKey != cmpCol) { | |
1303 | errln((UnicodeString)"Inconsistent comparison(" + opt | |
1304 | + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len) | |
1305 | + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol); | |
1306 | } | |
1307 | } | |
1308 | } | |
1309 | ||
1310 | void CollationRegressionTest::TestTrailingComment() { | |
1311 | // ICU ticket #8070: | |
1312 | // Check that the rule parser handles a comment without terminating end-of-line. | |
1313 | IcuTestErrorCode errorCode(*this, "TestTrailingComment"); | |
1314 | RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode); | |
1315 | UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63); | |
1316 | assertTrue("c<b", coll.compare(c, b) < 0); | |
1317 | assertTrue("b<a", coll.compare(b, a) < 0); | |
1318 | } | |
1319 | ||
1320 | void CollationRegressionTest::TestBeforeWithTooStrongAfter() { | |
1321 | // ICU ticket #9959: | |
1322 | // Forbid rules with a before-reset followed by a stronger relation. | |
1323 | IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter"); | |
1324 | RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode); | |
1325 | if(errorCode.isSuccess()) { | |
1326 | errln("should forbid before-2-reset followed by primary relation"); | |
1327 | } else { | |
1328 | errorCode.reset(); | |
1329 | } | |
1330 | RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode); | |
1331 | if(errorCode.isSuccess()) { | |
1332 | errln("should forbid before-3-reset followed by primary or secondary relation"); | |
1333 | } else { | |
1334 | errorCode.reset(); | |
1335 | } | |
1336 | } | |
1337 | ||
1338 | void CollationRegressionTest::compareArray(Collator &c, | |
1339 | const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN], | |
1340 | int32_t testCount) | |
1341 | { | |
1342 | int32_t i; | |
1343 | Collator::EComparisonResult expectedResult = Collator::EQUAL; | |
1344 | ||
1345 | for (i = 0; i < testCount; i += 3) | |
1346 | { | |
1347 | UnicodeString source(tests[i]); | |
1348 | UnicodeString comparison(tests[i + 1]); | |
1349 | UnicodeString target(tests[i + 2]); | |
1350 | ||
1351 | if (comparison == "<") | |
1352 | { | |
1353 | expectedResult = Collator::LESS; | |
1354 | } | |
1355 | else if (comparison == ">") | |
1356 | { | |
1357 | expectedResult = Collator::GREATER; | |
1358 | } | |
1359 | else if (comparison == "=") | |
1360 | { | |
1361 | expectedResult = Collator::EQUAL; | |
1362 | } | |
1363 | else | |
1364 | { | |
1365 | UnicodeString bogus1("Bogus comparison string \""); | |
1366 | UnicodeString bogus2("\""); | |
1367 | errln(bogus1 + comparison + bogus2); | |
1368 | } | |
1369 | ||
1370 | Collator::EComparisonResult compareResult = c.compare(source, target); | |
1371 | ||
1372 | CollationKey sourceKey, targetKey; | |
1373 | UErrorCode status = U_ZERO_ERROR; | |
1374 | ||
1375 | c.getCollationKey(source, sourceKey, status); | |
1376 | ||
1377 | if (U_FAILURE(status)) | |
1378 | { | |
1379 | errln("Couldn't get collationKey for source"); | |
1380 | continue; | |
1381 | } | |
1382 | ||
1383 | c.getCollationKey(target, targetKey, status); | |
1384 | ||
1385 | if (U_FAILURE(status)) | |
1386 | { | |
1387 | errln("Couldn't get collationKey for target"); | |
1388 | continue; | |
1389 | } | |
1390 | ||
1391 | Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); | |
1392 | ||
1393 | reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult ); | |
1394 | ||
1395 | } | |
1396 | } | |
1397 | ||
1398 | void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) | |
1399 | { | |
1400 | int32_t c1, c2, count = 0; | |
1401 | UErrorCode status = U_ZERO_ERROR; | |
1402 | ||
1403 | do | |
1404 | { | |
1405 | c1 = i1.next(status); | |
1406 | c2 = i2.next(status); | |
1407 | ||
1408 | if (c1 != c2) | |
1409 | { | |
1410 | UnicodeString msg, msg1(" "); | |
1411 | ||
1412 | msg += msg1 + count; | |
1413 | msg += ": strength(0x"; | |
1414 | appendHex(c1, 8, msg); | |
1415 | msg += ") != strength(0x"; | |
1416 | appendHex(c2, 8, msg); | |
1417 | msg += ")"; | |
1418 | ||
1419 | errln(msg); | |
1420 | break; | |
1421 | } | |
1422 | ||
1423 | count += 1; | |
1424 | } | |
1425 | while (c1 != CollationElementIterator::NULLORDER); | |
1426 | } | |
1427 | ||
1428 | void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */) | |
1429 | { | |
1430 | if (exec) | |
1431 | { | |
1432 | logln("Collation Regression Tests: "); | |
1433 | } | |
1434 | ||
1435 | if(en_us == NULL) { | |
1436 | dataerrln("Class collator not instantiated"); | |
1437 | name = ""; | |
1438 | return; | |
1439 | } | |
1440 | TESTCASE_AUTO_BEGIN; | |
1441 | TESTCASE_AUTO(Test4048446); | |
1442 | TESTCASE_AUTO(Test4051866); | |
1443 | TESTCASE_AUTO(Test4053636); | |
1444 | TESTCASE_AUTO(Test4054238); | |
1445 | TESTCASE_AUTO(Test4054734); | |
1446 | TESTCASE_AUTO(Test4054736); | |
1447 | TESTCASE_AUTO(Test4058613); | |
1448 | TESTCASE_AUTO(Test4059820); | |
1449 | TESTCASE_AUTO(Test4060154); | |
1450 | TESTCASE_AUTO(Test4062418); | |
1451 | TESTCASE_AUTO(Test4065540); | |
1452 | TESTCASE_AUTO(Test4066189); | |
1453 | TESTCASE_AUTO(Test4066696); | |
1454 | TESTCASE_AUTO(Test4076676); | |
1455 | TESTCASE_AUTO(Test4078588); | |
1456 | TESTCASE_AUTO(Test4079231); | |
1457 | TESTCASE_AUTO(Test4081866); | |
1458 | TESTCASE_AUTO(Test4087241); | |
1459 | TESTCASE_AUTO(Test4087243); | |
1460 | TESTCASE_AUTO(Test4092260); | |
1461 | TESTCASE_AUTO(Test4095316); | |
1462 | TESTCASE_AUTO(Test4101940); | |
1463 | TESTCASE_AUTO(Test4103436); | |
1464 | TESTCASE_AUTO(Test4114076); | |
1465 | TESTCASE_AUTO(Test4114077); | |
1466 | TESTCASE_AUTO(Test4124632); | |
1467 | TESTCASE_AUTO(Test4132736); | |
1468 | TESTCASE_AUTO(Test4133509); | |
1469 | TESTCASE_AUTO(Test4139572); | |
1470 | TESTCASE_AUTO(Test4141640); | |
1471 | TESTCASE_AUTO(Test4146160); | |
1472 | TESTCASE_AUTO(Test4179216); | |
1473 | TESTCASE_AUTO(TestT7189); | |
1474 | TESTCASE_AUTO(TestCaseFirstCompression); | |
1475 | TESTCASE_AUTO(TestTrailingComment); | |
1476 | TESTCASE_AUTO(TestBeforeWithTooStrongAfter); | |
1477 | TESTCASE_AUTO_END; | |
1478 | } | |
1479 | ||
1480 | #endif /* #if !UCONFIG_NO_COLLATION */ |