]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/regcoll.cpp
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / test / intltest / regcoll.cpp
CommitLineData
b75a7d8f 1/********************************************************************
729e4ab9 2 * COPYRIGHT:
57a6839d 3 * Copyright (c) 1997-2014, International Business Machines Corporation and
b75a7d8f
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_COLLATION
10
11#include "unicode/coll.h"
57a6839d 12#include "unicode/localpointer.h"
b75a7d8f
A
13#include "unicode/tblcoll.h"
14#include "unicode/unistr.h"
15#include "unicode/sortkey.h"
16#include "regcoll.h"
17#include "sfwdchit.h"
729e4ab9
A
18#include "testutil.h"
19#include "cmemory.h"
b75a7d8f
A
20
21#define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
22
23CollationRegressionTest::CollationRegressionTest()
24{
25 UErrorCode status = U_ZERO_ERROR;
26
27 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
28 if(U_FAILURE(status)) {
29 delete en_us;
30 en_us = 0;
729e4ab9 31 errcheckln(status, "Collator creation failed with %s", u_errorName(status));
b75a7d8f
A
32 return;
33 }
34}
35
36CollationRegressionTest::~CollationRegressionTest()
37{
38 delete en_us;
39}
40
41
42 // @bug 4048446
43//
44// CollationElementIterator.reset() doesn't work
45//
46void CollationRegressionTest::Test4048446(/* char* par */)
47{
48 const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
49 const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
50 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
51 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
52 UErrorCode status = U_ZERO_ERROR;
53
54 if (i1 == NULL|| i2 == NULL)
55 {
56 errln("Could not create CollationElementIterator's");
57 delete i1;
58 delete i2;
59 return;
60 }
61
62 while (i1->next(status) != CollationElementIterator::NULLORDER)
63 {
64 if (U_FAILURE(status))
65 {
66 errln("error calling next()");
67
68 delete i1;
69 delete i2;
70 return;
71 }
72 }
73
74 i1->reset();
75
76 assertEqual(*i1, *i2);
77
78 delete i1;
79 delete i2;
80}
81
82// @bug 4051866
83//
84// Collator -> rules -> Collator round-trip broken for expanding characters
85//
86void CollationRegressionTest::Test4051866(/* char* par */)
87{
b75a7d8f
A
88 UnicodeString rules;
89 UErrorCode status = U_ZERO_ERROR;
90
57a6839d 91 rules += "&n < o ";
b75a7d8f
A
92 rules += "& oe ,o";
93 rules += (UChar)0x3080;
94 rules += "& oe ,";
95 rules += (UChar)0x1530;
96 rules += " ,O";
97 rules += "& OE ,O";
98 rules += (UChar)0x3080;
99 rules += "& OE ,";
100 rules += (UChar)0x1520;
101 rules += "< p ,P";
102
103 // Build a collator containing expanding characters
57a6839d 104 LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status));
b75a7d8f
A
105
106 // Build another using the rules from the first
57a6839d
A
107 LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status));
108 if (U_FAILURE(status)) {
109 errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
110 return;
111 }
b75a7d8f
A
112
113 // Make sure they're the same
114 if (!(c1->getRules() == c2->getRules()))
115 {
116 errln("Rules are not equal");
117 }
b75a7d8f
A
118}
119
120// @bug 4053636
121//
122// Collator thinks "black-bird" == "black"
123//
124void CollationRegressionTest::Test4053636(/* char* par */)
125{
126 if (en_us->equals("black_bird", "black"))
127 {
128 errln("black-bird == black");
129 }
130}
131
132// @bug 4054238
133//
134// CollationElementIterator will not work correctly if the associated
135// Collator object's mode is changed
136//
137void CollationRegressionTest::Test4054238(/* char* par */)
138{
139 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
140 const UnicodeString test3(chars3);
141 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
142
143 // NOTE: The Java code uses en_us to create the CollationElementIterators
144 // but I'm pretty sure that's wrong, so I've changed this to use c.
145 UErrorCode status = U_ZERO_ERROR;
146 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
147 CollationElementIterator *i1 = c->createCollationElementIterator(test3);
148 delete i1;
149 delete c;
150}
151
152// @bug 4054734
153//
154// Collator::IDENTICAL documented but not implemented
155//
156void CollationRegressionTest::Test4054734(/* char* par */)
157{
158 /*
159 Here's the original Java:
160
161 String[] decomp = {
162 "\u0001", "<", "\u0002",
163 "\u0001", "=", "\u0001",
164 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
165 "\u00C0", "=", "A\u0300" // Decomp should make these equal
166 };
729e4ab9 167
b75a7d8f
A
168 String[] nodecomp = {
169 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
170 };
171 */
172
173 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
174 {
175 {0x0001, 0}, {0x3c, 0}, {0x0002, 0},
176 {0x0001, 0}, {0x3d, 0}, {0x0001, 0},
177 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
178 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
179 };
180
729e4ab9 181
b75a7d8f
A
182 UErrorCode status = U_ZERO_ERROR;
183 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
184
185 c->setStrength(Collator::IDENTICAL);
186
187 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
188 compareArray(*c, decomp, ARRAY_LENGTH(decomp));
189
190 delete c;
191}
192
193// @bug 4054736
194//
195// Full Decomposition mode not implemented
196//
197void CollationRegressionTest::Test4054736(/* char* par */)
198{
199 UErrorCode status = U_ZERO_ERROR;
200 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
201
202 c->setStrength(Collator::SECONDARY);
203 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
204
205 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
206 {
207 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed
208 };
209
210 compareArray(*c, tests, ARRAY_LENGTH(tests));
211
212 delete c;
213}
214
215// @bug 4058613
216//
729e4ab9 217// Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
b75a7d8f
A
218//
219void CollationRegressionTest::Test4058613(/* char* par */)
220{
221 // Creating a default collator doesn't work when Korean is the default
222 // locale
729e4ab9 223
b75a7d8f
A
224 Locale oldDefault = Locale::getDefault();
225 UErrorCode status = U_ZERO_ERROR;
729e4ab9 226
b75a7d8f
A
227 Locale::setDefault(Locale::getKorean(), status);
228
229 if (U_FAILURE(status))
230 {
231 errln("Could not set default locale to Locale::KOREAN");
232 return;
233 }
234
235 Collator *c = NULL;
729e4ab9 236
b75a7d8f
A
237 c = Collator::createInstance("en_US", status);
238
239 if (c == NULL || U_FAILURE(status))
240 {
241 errln("Could not create a Korean collator");
242 Locale::setDefault(oldDefault, status);
243 delete c;
244 return;
245 }
729e4ab9 246
b75a7d8f
A
247 // Since the fix to this bug was to turn off decomposition for Korean collators,
248 // ensure that's what we got
249 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
250 {
251 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
252 }
253
254 delete c;
255
256 Locale::setDefault(oldDefault, status);
257}
258
259// @bug 4059820
260//
261// RuleBasedCollator.getRules does not return the exact pattern as input
262// for expanding character sequences
263//
264void CollationRegressionTest::Test4059820(/* char* par */)
265{
266 UErrorCode status = U_ZERO_ERROR;
267
268 RuleBasedCollator *c = NULL;
57a6839d 269 UnicodeString rules = "&9 < a < b , c/a < d < z";
729e4ab9 270
b75a7d8f
A
271 c = new RuleBasedCollator(rules, status);
272
273 if (c == NULL || U_FAILURE(status))
274 {
275 errln("Failure building a collator.");
276 delete c;
277 return;
278 }
279
280 if ( c->getRules().indexOf("c/a") == -1)
281 {
282 errln("returned rules do not contain 'c/a'");
283 }
284
285 delete c;
286}
287
288// @bug 4060154
289//
290// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
291//
292void CollationRegressionTest::Test4060154(/* char* par */)
293{
294 UErrorCode status = U_ZERO_ERROR;
295 UnicodeString rules;
296
57a6839d 297 rules += "&f < g, G < h, H < i, I < j, J";
b75a7d8f
A
298 rules += " & H < ";
299 rules += (UChar)0x0131;
300 rules += ", ";
301 rules += (UChar)0x0130;
302 rules += ", i, I";
303
304 RuleBasedCollator *c = NULL;
729e4ab9 305
b75a7d8f
A
306 c = new RuleBasedCollator(rules, status);
307
308 if (c == NULL || U_FAILURE(status))
309 {
310 errln("failure building collator.");
311 delete c;
312 return;
313 }
314
315 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
316
317 /*
318 String[] tertiary = {
319 "A", "<", "B",
320 "H", "<", "\u0131",
321 "H", "<", "I",
322 "\u0131", "<", "\u0130",
323 "\u0130", "<", "i",
324 "\u0130", ">", "H",
325 };
326*/
327
328 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
329 {
330 {0x41, 0}, {0x3c, 0}, {0x42, 0},
331 {0x48, 0}, {0x3c, 0}, {0x0131, 0},
332 {0x48, 0}, {0x3c, 0}, {0x49, 0},
333 {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
334 {0x0130, 0}, {0x3c, 0}, {0x69, 0},
335 {0x0130, 0}, {0x3e, 0}, {0x48, 0}
336 };
337
338 c->setStrength(Collator::TERTIARY);
339 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
340
341 /*
342 String[] secondary = {
343 "H", "<", "I",
344 "\u0131", "=", "\u0130",
345 };
346*/
347 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
348 {
349 {0x48, 0}, {0x3c, 0}, {0x49, 0},
350 {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
351 };
352
353 c->setStrength(Collator::PRIMARY);
354 compareArray(*c, secondary, ARRAY_LENGTH(secondary));
355
356 delete c;
73c04bcf 357}
b75a7d8f
A
358
359// @bug 4062418
360//
361// Secondary/Tertiary comparison incorrect in French Secondary
362//
363void CollationRegressionTest::Test4062418(/* char* par */)
364{
365 UErrorCode status = U_ZERO_ERROR;
366
367 RuleBasedCollator *c = NULL;
729e4ab9
A
368
369 c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
b75a7d8f
A
370
371 if (c == NULL || U_FAILURE(status))
372 {
729e4ab9 373 errln("Failed to create collator for Locale::getCanadaFrench()");
b75a7d8f
A
374 delete c;
375 return;
376 }
377
378 c->setStrength(Collator::SECONDARY);
379
380/*
381 String[] tests = {
382 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
383 };
384*/
385 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
386 {
387 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
388 };
389
390 compareArray(*c, tests, ARRAY_LENGTH(tests));
391
392 delete c;
393}
394
395// @bug 4065540
396//
397// Collator::compare() method broken if either string contains spaces
398//
399void CollationRegressionTest::Test4065540(/* char* par */)
400{
401 if (en_us->compare("abcd e", "abcd f") == 0)
402 {
403 errln("'abcd e' == 'abcd f'");
404 }
405}
406
407// @bug 4066189
408//
409// Unicode characters need to be recursively decomposed to get the
410// correct result. For example,
411// u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
412//
413void CollationRegressionTest::Test4066189(/* char* par */)
414{
415 static const UChar chars1[] = {0x1EB1, 0};
416 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
417 const UnicodeString test1(chars1);
418 const UnicodeString test2(chars2);
419 UErrorCode status = U_ZERO_ERROR;
420
421 // NOTE: The java code used en_us to create the
422 // CollationElementIterator's. I'm pretty sure that
423 // was wrong, so I've change the code to use c1 and c2
424 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
425 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
426 CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
427
428 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
429 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
430 CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
431
432 assertEqual(*i1, *i2);
433
434 delete i2;
435 delete c2;
436 delete i1;
437 delete c1;
438}
439
440// @bug 4066696
441//
442// French secondary collation checking at the end of compare iteration fails
443//
444void CollationRegressionTest::Test4066696(/* char* par */)
445{
446 UErrorCode status = U_ZERO_ERROR;
447 RuleBasedCollator *c = NULL;
729e4ab9
A
448
449 c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
b75a7d8f
A
450
451 if (c == NULL || U_FAILURE(status))
452 {
729e4ab9 453 errln("Failure creating collator for Locale::getCanadaFrench()");
b75a7d8f
A
454 delete c;
455 return;
456 }
457
458 c->setStrength(Collator::SECONDARY);
459
460/*
461 String[] tests = {
462 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
463 };
464
465 should be:
466
467 String[] tests = {
468 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
469 };
470
471*/
472
473 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
474 {
475 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
476 };
477
478 compareArray(*c, tests, ARRAY_LENGTH(tests));
479
480 delete c;
481}
482
483// @bug 4076676
484//
485// Bad canonicalization of same-class combining characters
486//
487void CollationRegressionTest::Test4076676(/* char* par */)
488{
489 // These combining characters are all in the same class, so they should not
490 // be reordered, and they should compare as unequal.
491 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
492 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
493
494 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
495 c->setStrength(Collator::TERTIARY);
496
497 if (c->compare(s1,s2) == 0)
498 {
499 errln("Same-class combining chars were reordered");
500 }
501
502 delete c;
503}
504
505// @bug 4079231
506//
507// RuleBasedCollator::operator==(NULL) throws NullPointerException
508//
509void CollationRegressionTest::Test4079231(/* char* par */)
510{
511 // I don't think there's any way to write this test
512 // in C++. The following is equivalent to the Java,
513 // but doesn't compile 'cause NULL can't be converted
514 // to Collator&
515 //
516 // if (en_us->operator==(NULL))
517 // {
518 // errln("en_us->operator==(NULL) returned TRUE");
519 // }
520
521 /*
522 try {
523 if (en_us->equals(null)) {
524 errln("en_us->equals(null) returned true");
525 }
526 }
527 catch (Exception e) {
528 errln("en_us->equals(null) threw " + e.toString());
529 }
530*/
531}
532
533// @bug 4078588
534//
535// RuleBasedCollator breaks on "< a < bb" rule
536//
537void CollationRegressionTest::Test4078588(/* char *par */)
538{
539 UErrorCode status = U_ZERO_ERROR;
57a6839d 540 RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
b75a7d8f
A
541
542 if (rbc == NULL || U_FAILURE(status))
543 {
544 errln("Failed to create RuleBasedCollator.");
545 delete rbc;
546 return;
547 }
548
549 Collator::EComparisonResult result = rbc->compare("a","bb");
550
551 if (result != Collator::LESS)
552 {
553 errln((UnicodeString)"Compare(a,bb) returned " + (int)result
554 + (UnicodeString)"; expected -1");
555 }
556
557 delete rbc;
558}
559
560// @bug 4081866
561//
562// Combining characters in different classes not reordered properly.
563//
564void CollationRegressionTest::Test4081866(/* char* par */)
565{
566 // These combining characters are all in different classes,
567 // so they should be reordered and the strings should compare as equal.
568 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
569 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
570
571 UErrorCode status = U_ZERO_ERROR;
572 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
573 c->setStrength(Collator::TERTIARY);
729e4ab9 574
b75a7d8f
A
575 // Now that the default collators are set to NO_DECOMPOSITION
576 // (as a result of fixing bug 4114077), we must set it explicitly
577 // when we're testing reordering behavior. -- lwerner, 5/5/98
578 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
579
580 if (c->compare(s1,s2) != 0)
581 {
582 errln("Combining chars were not reordered");
583 }
584
585 delete c;
586}
587
588// @bug 4087241
589//
590// string comparison errors in Scandinavian collators
591//
592void CollationRegressionTest::Test4087241(/* char* par */)
593{
594 UErrorCode status = U_ZERO_ERROR;
595 Locale da_DK("da", "DK");
596 RuleBasedCollator *c = NULL;
729e4ab9 597
b75a7d8f
A
598 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
599
600 if (c == NULL || U_FAILURE(status))
601 {
602 errln("Failed to create collator for da_DK locale");
603 delete c;
604 return;
605 }
606
607 c->setStrength(Collator::SECONDARY);
608
609 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
610 {
611 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae
57a6839d 612 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring
b75a7d8f
A
613 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut
614 };
615
616 compareArray(*c, tests, ARRAY_LENGTH(tests));
617
618 delete c;
619}
620
621// @bug 4087243
622//
623// CollationKey takes ignorable strings into account when it shouldn't
624//
625void CollationRegressionTest::Test4087243(/* char* par */)
626{
627 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
628 c->setStrength(Collator::TERTIARY);
629
630 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
631 {
632 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A
633 };
634
635 compareArray(*c, tests, ARRAY_LENGTH(tests));
636
637 delete c;
638}
639
640// @bug 4092260
641//
642// Mu/micro conflict
643// Micro symbol and greek lowercase letter Mu should sort identically
644//
645void CollationRegressionTest::Test4092260(/* char* par */)
646{
647 UErrorCode status = U_ZERO_ERROR;
648 Locale el("el", "");
649 Collator *c = NULL;
729e4ab9 650
b75a7d8f
A
651 c = Collator::createInstance(el, status);
652
653 if (c == NULL || U_FAILURE(status))
654 {
655 errln("Failed to create collator for el locale.");
656 delete c;
657 return;
658 }
659
660 // These now have tertiary differences in UCA
661 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
662
663 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
664 {
665 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
666 };
667
668 compareArray(*c, tests, ARRAY_LENGTH(tests));
669
670 delete c;
671}
672
673// @bug 4095316
674//
675void CollationRegressionTest::Test4095316(/* char* par */)
676{
677 UErrorCode status = U_ZERO_ERROR;
678 Locale el_GR("el", "GR");
679 Collator *c = Collator::createInstance(el_GR, status);
680
681 if (c == NULL || U_FAILURE(status))
682 {
683 errln("Failed to create collator for el_GR locale");
684 delete c;
685 return;
686 }
687 // These now have tertiary differences in UCA
688 //c->setStrength(Collator::TERTIARY);
689 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
690
691 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
692 {
693 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
694 };
695
696 compareArray(*c, tests, ARRAY_LENGTH(tests));
697
698 delete c;
699}
700
701// @bug 4101940
702//
703void CollationRegressionTest::Test4101940(/* char* par */)
704{
705 UErrorCode status = U_ZERO_ERROR;
706 RuleBasedCollator *c = NULL;
57a6839d 707 UnicodeString rules = "&9 < a < b";
b75a7d8f 708 UnicodeString nothing = "";
729e4ab9 709
b75a7d8f
A
710 c = new RuleBasedCollator(rules, status);
711
712 if (c == NULL || U_FAILURE(status))
713 {
714 errln("Failed to create RuleBasedCollator");
715 delete c;
716 return;
717 }
718
719 CollationElementIterator *i = c->createCollationElementIterator(nothing);
720 i->reset();
721
722 if (i->next(status) != CollationElementIterator::NULLORDER)
723 {
724 errln("next did not return NULLORDER");
725 }
726
727 delete i;
728 delete c;
729}
730
731// @bug 4103436
732//
733// Collator::compare not handling spaces properly
734//
735void CollationRegressionTest::Test4103436(/* char* par */)
736{
737 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
738 c->setStrength(Collator::TERTIARY);
739
740 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
741 {
742 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
743 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
744 };
745
746 compareArray(*c, tests, ARRAY_LENGTH(tests));
747
748 delete c;
749}
750
751// @bug 4114076
752//
753// Collation not Unicode conformant with Hangul syllables
754//
755void CollationRegressionTest::Test4114076(/* char* par */)
756{
757 UErrorCode status = U_ZERO_ERROR;
758 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
759 c->setStrength(Collator::TERTIARY);
760
761 //
762 // With Canonical decomposition, Hangul syllables should get decomposed
763 // into Jamo, but Jamo characters should not be decomposed into
764 // conjoining Jamo
765 //
766 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
767 {
768 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
769 };
770
771 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
772 compareArray(*c, test1, ARRAY_LENGTH(test1));
773
774 // From UTR #15:
775 // *In earlier versions of Unicode, jamo characters like ksf
729e4ab9 776 // had compatibility mappings to kf + sf. These mappings were
b75a7d8f
A
777 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
778 // That is, the following test is obsolete as of 2.1.9
779
780//obsolete- // With Full decomposition, it should go all the way down to
781//obsolete- // conjoining Jamo characters.
782//obsolete- //
783//obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
784//obsolete- {
785//obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
786//obsolete- };
787//obsolete-
788//obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
789//obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2));
790
791 delete c;
792}
793
794
795// @bug 4124632
796//
797// Collator::getCollationKey was hanging on certain character sequences
798//
799void CollationRegressionTest::Test4124632(/* char* par */)
800{
801 UErrorCode status = U_ZERO_ERROR;
802 Collator *coll = NULL;
729e4ab9 803
b75a7d8f 804 coll = Collator::createInstance(Locale::getJapan(), status);
729e4ab9 805
b75a7d8f
A
806 if (coll == NULL || U_FAILURE(status))
807 {
808 errln("Failed to create collator for Locale::JAPAN");
809 delete coll;
46f4442e 810 return;
b75a7d8f
A
811 }
812
813 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
814 CollationKey key;
729e4ab9 815
b75a7d8f
A
816 coll->getCollationKey(test, key, status);
817
818 if (key.isBogus() || U_FAILURE(status))
819 {
820 errln("CollationKey creation failed.");
821 }
822
823 delete coll;
824}
825
826// @bug 4132736
827//
828// sort order of french words with multiple accents has errors
829//
830void CollationRegressionTest::Test4132736(/* char* par */)
831{
832 UErrorCode status = U_ZERO_ERROR;
833
834 Collator *c = NULL;
729e4ab9
A
835
836 c = Collator::createInstance(Locale::getCanadaFrench(), status);
b75a7d8f 837 c->setStrength(Collator::TERTIARY);
729e4ab9 838
b75a7d8f
A
839 if (c == NULL || U_FAILURE(status))
840 {
729e4ab9 841 errln("Failed to create a collator for Locale::getCanadaFrench()");
b75a7d8f 842 delete c;
46f4442e 843 return;
b75a7d8f
A
844 }
845
846 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
847 {
848 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
849 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
850 };
851
852 compareArray(*c, test1, ARRAY_LENGTH(test1));
853
854 delete c;
855}
856
857// @bug 4133509
858//
859// The sorting using java.text.CollationKey is not in the exact order
860//
861void CollationRegressionTest::Test4133509(/* char* par */)
862{
863 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
864 {
865 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
866 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
867 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
868 };
869
870 compareArray(*en_us, test1, ARRAY_LENGTH(test1));
871}
872
873// @bug 4114077
874//
729e4ab9 875// Collation with decomposition off doesn't work for Europe
b75a7d8f
A
876//
877void CollationRegressionTest::Test4114077(/* char* par */)
878{
879 // Ensure that we get the same results with decomposition off
880 // as we do with it on....
729e4ab9 881
b75a7d8f
A
882 UErrorCode status = U_ZERO_ERROR;
883 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
884 c->setStrength(Collator::TERTIARY);
729e4ab9 885
b75a7d8f
A
886 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
887 {
888 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
889 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
890 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0},
891 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute
892 // -> a, ring, acute
893 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal
894 };
895
896 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
897 compareArray(*c, test1, ARRAY_LENGTH(test1));
898
899 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
900 {
901 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal
902 };
903
904 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
905 compareArray(*c, test2, ARRAY_LENGTH(test2));
906
907 delete c;
908}
909
910// @bug 4141640
911//
729e4ab9 912// Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
b75a7d8f
A
913//
914void CollationRegressionTest::Test4141640(/* char* par */)
915{
916 //
917 // Rather than just creating a Swedish collator, we might as well
918 // try to instantiate one for every locale available on the system
919 // in order to prevent this sort of bug from cropping up in the future
920 //
921 UErrorCode status = U_ZERO_ERROR;
922 int32_t i, localeCount;
923 const Locale *locales = Locale::getAvailableLocales(localeCount);
729e4ab9 924
b75a7d8f
A
925 for (i = 0; i < localeCount; i += 1)
926 {
927 Collator *c = NULL;
928
929 status = U_ZERO_ERROR;
930 c = Collator::createInstance(locales[i], status);
931
932 if (c == NULL || U_FAILURE(status))
933 {
934 UnicodeString msg, localeName;
935
936 msg += "Could not create collator for locale ";
937 msg += locales[i].getName();
938
939 errln(msg);
940 }
941
942 delete c;
943 }
944}
945
946// @bug 4139572
947//
729e4ab9 948// getCollationKey throws exception for spanish text
b75a7d8f
A
949// Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
950//
951void CollationRegressionTest::Test4139572(/* char* par */)
952{
953 //
954 // Code pasted straight from the bug report
955 // (and then translated to C++ ;-)
956 //
957 // create spanish locale and collator
958 UErrorCode status = U_ZERO_ERROR;
959 Locale l("es", "es");
960 Collator *col = NULL;
729e4ab9 961
b75a7d8f
A
962 col = Collator::createInstance(l, status);
963
964 if (col == NULL || U_FAILURE(status))
965 {
966 errln("Failed to create a collator for es_es locale.");
967 delete col;
968 return;
969 }
970
971 CollationKey key;
972
973 // this spanish phrase kills it!
974 col->getCollationKey("Nombre De Objeto", key, status);
975
976 if (key.isBogus() || U_FAILURE(status))
977 {
978 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
979 }
980
981 delete col;
982}
983/* HSYS : RuleBasedCollator::compare() performance enhancements
984 compare() does not create CollationElementIterator() anymore.*/
729e4ab9 985
b75a7d8f
A
986class My4146160Collator : public RuleBasedCollator
987{
988public:
989 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
990 ~My4146160Collator();
991
992 CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
993
994 CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
729e4ab9 995
b75a7d8f
A
996 static int32_t count;
997};
998
999int32_t My4146160Collator::count = 0;
1000
1001My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1002 : RuleBasedCollator(rbc.getRules(), status)
1003{
1004}
1005
1006My4146160Collator::~My4146160Collator()
1007{
1008}
1009
1010CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1011{
1012 count += 1;
1013 return RuleBasedCollator::createCollationElementIterator(text);
1014}
1015
1016CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1017{
1018 count += 1;
1019 return RuleBasedCollator::createCollationElementIterator(text);
1020}
1021
1022// @bug 4146160
1023//
1024// RuleBasedCollator doesn't use createCollationElementIterator internally
1025//
1026void CollationRegressionTest::Test4146160(/* char* par */)
1027{
1028#if 0
1029 //
1030 // Use a custom collator class whose createCollationElementIterator
1031 // methods increment a count....
1032 //
1033 UErrorCode status = U_ZERO_ERROR;
1034 CollationKey key;
1035
1036 My4146160Collator::count = 0;
1037 My4146160Collator *mc = NULL;
729e4ab9 1038
b75a7d8f
A
1039 mc = new My4146160Collator(*en_us, status);
1040
1041 if (mc == NULL || U_FAILURE(status))
1042 {
1043 errln("Failed to create a My4146160Collator.");
1044 delete mc;
1045 return;
1046 }
1047
1048 mc->getCollationKey("1", key, status);
1049
1050 if (key.isBogus() || U_FAILURE(status))
1051 {
1052 errln("Failure to get a CollationKey from a My4146160Collator.");
1053 delete mc;
1054 return;
1055 }
1056
1057 if (My4146160Collator::count < 1)
1058 {
1059 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1060 }
1061
1062 My4146160Collator::count = 0;
1063 mc->compare("1", "2");
1064
1065 if (My4146160Collator::count < 1)
1066 {
1067 errln("My4146160Collator::createtCollationElementIterator not called for compare");
1068 }
1069
1070 delete mc;
1071#endif
1072}
729e4ab9 1073
57a6839d
A
1074void CollationRegressionTest::Test4179216() {
1075 // you can position a CollationElementIterator in the middle of
1076 // a contracting character sequence, yielding a bogus collation
1077 // element
1078 IcuTestErrorCode errorCode(*this, "Test4179216");
1079 RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
1080 UnicodeString testText = "church church catcatcher runcrunchynchy";
1081 CollationElementIterator *iter = coll.createCollationElementIterator(testText);
1082
1083 // test that the "ch" combination works properly
1084 iter->setOffset(4, errorCode);
1085 int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1086
1087 iter->reset();
1088 int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1089
1090 iter->setOffset(5, errorCode);
1091 int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1092
1093 // Compares and prints only 16-bit primary weights.
1094 if (elt4 != elt0 || elt5 != elt0) {
1095 errln("The collation elements at positions 0 (0x%04x), "
1096 "4 (0x%04x), and 5 (0x%04x) don't match.",
1097 elt0, elt4, elt5);
1098 }
1099
1100 // test that the "cat" combination works properly
1101 iter->setOffset(14, errorCode);
1102 int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1103
1104 iter->setOffset(15, errorCode);
1105 int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1106
1107 iter->setOffset(16, errorCode);
1108 int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1109
1110 iter->setOffset(17, errorCode);
1111 int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1112
1113 iter->setOffset(18, errorCode);
1114 int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1115
1116 iter->setOffset(19, errorCode);
1117 int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1118
1119 // Compares and prints only 16-bit primary weights.
1120 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
1121 || elt14 != elt18 || elt14 != elt19) {
1122 errln("\"cat\" elements don't match: elt14 = 0x%04x, "
1123 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
1124 "elt18 = 0x%04x, elt19 = 0x%04x",
1125 elt14, elt15, elt16, elt17, elt18, elt19);
1126 }
1127
1128 // now generate a complete list of the collation elements,
1129 // first using next() and then using setOffset(), and
1130 // make sure both interfaces return the same set of elements
1131 iter->reset();
1132
1133 int32_t elt = iter->next(errorCode);
1134 int32_t count = 0;
1135 while (elt != CollationElementIterator::NULLORDER) {
1136 ++count;
1137 elt = iter->next(errorCode);
1138 }
1139
1140 LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
1141 LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
1142 int32_t lastPos = 0;
1143
1144 iter->reset();
1145 elt = iter->next(errorCode);
1146 count = 0;
1147 while (elt != CollationElementIterator::NULLORDER) {
1148 nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1149 lastPos = iter->getOffset();
1150 elt = iter->next(errorCode);
1151 }
1152 int32_t nextElementsLength = count;
1153 count = 0;
1154 for (int32_t i = 0; i < testText.length(); ) {
1155 iter->setOffset(i, errorCode);
1156 lastPos = iter->getOffset();
1157 elt = iter->next(errorCode);
1158 setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1159 i = iter->getOffset();
1160 }
1161 for (int32_t i = 0; i < nextElementsLength; i++) {
1162 if (nextElements[i] == setOffsetElements[i]) {
1163 logln(nextElements[i]);
1164 } else {
1165 errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
1166 ", but setOffset() yielded " + setOffsetElements[i]);
1167 }
1168 }
1169 delete iter;
1170}
1171
729e4ab9
A
1172// Ticket 7189
1173//
1174// nextSortKeyPart incorrect for EO_S1 collation
1175static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1176 UCharIterator uiter;
1177 uint32_t state[2] = { 0, 0 };
1178 int32_t keyLen;
1179 int32_t count = 8;
1180
1181 uiter_setString(&uiter, text, len);
1182 keyLen = 0;
1183 while (TRUE) {
1184 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1185 if (U_FAILURE(status)) {
1186 return -1;
1187 }
1188 if (keyPartLen == 0) {
1189 break;
1190 }
1191 keyLen += keyPartLen;
1192 }
1193 return keyLen;
1194}
1195
1196void CollationRegressionTest::TestT7189() {
1197 UErrorCode status = U_ZERO_ERROR;
1198 UCollator *coll;
1199 uint32_t i;
1200
1201 static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1202 // "Achter De Hoven"
1203 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1204 // "ABC"
1205 { 0x41, 0x42, 0x43, 0x00 },
1206 // "HELLO world!"
1207 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1208 };
1209
1210 static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1211 // "Achter de Hoven"
1212 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1213 // "abc"
1214 { 0x61, 0x62, 0x63, 0x00 },
1215 // "hello world!"
1216 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1217 };
1218
1219 // Open the collator
1220 coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
1221 if (U_FAILURE(status)) {
1222 errln("Failed to create a collator for short string EO_S1");
1223 return;
1224 }
1225
1226 for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
1227 uint8_t key1[100], key2[100];
1228 int32_t len1, len2;
1229
1230 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1231 if (U_FAILURE(status)) {
1232 errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1233 break;
1234 }
1235 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1236 if (U_FAILURE(status)) {
1237 errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1238 break;
1239 }
1240
1241 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1242 errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1));
1243 } else {
1244 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : "
1245 + TestUtility::hex(key2, len2));
1246 }
1247 }
1248 ucol_close(coll);
1249}
1250
4388f060
A
1251void CollationRegressionTest::TestCaseFirstCompression() {
1252 RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone();
1253 UErrorCode status = U_ZERO_ERROR;
1254
1255 // default
1256 caseFirstCompressionSub(col, "default");
1257
1258 // Upper first
1259 col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
1260 if (U_FAILURE(status)) {
1261 errln("Failed to set UCOL_UPPER_FIRST");
1262 return;
1263 }
1264 caseFirstCompressionSub(col, "upper first");
1265
1266 // Lower first
1267 col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
1268 if (U_FAILURE(status)) {
1269 errln("Failed to set UCOL_LOWER_FIRST");
1270 return;
1271 }
1272 caseFirstCompressionSub(col, "lower first");
1273
1274 delete col;
1275}
1276
1277void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
1278 const int32_t maxLength = 50;
1279
1280 UChar str1[maxLength];
1281 UChar str2[maxLength];
1282
1283 CollationKey key1, key2;
1284
1285 for (int32_t len = 1; len <= maxLength; len++) {
1286 int32_t i = 0;
1287 for (; i < len - 1; i++) {
1288 str1[i] = str2[i] = (UChar)0x61; // 'a'
1289 }
1290 str1[i] = (UChar)0x41; // 'A'
1291 str2[i] = (UChar)0x61; // 'a'
1292
1293 UErrorCode status = U_ZERO_ERROR;
1294 col->getCollationKey(str1, len, key1, status);
1295 col->getCollationKey(str2, len, key2, status);
1296
1297 UCollationResult cmpKey = key1.compareTo(key2, status);
1298 UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
1299
1300 if (U_FAILURE(status)) {
1301 errln("Error in caseFirstCompressionSub");
1302 } else if (cmpKey != cmpCol) {
1303 errln((UnicodeString)"Inconsistent comparison(" + opt
1304 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
1305 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
1306 }
1307 }
1308}
1309
57a6839d
A
1310void CollationRegressionTest::TestTrailingComment() {
1311 // ICU ticket #8070:
1312 // Check that the rule parser handles a comment without terminating end-of-line.
1313 IcuTestErrorCode errorCode(*this, "TestTrailingComment");
1314 RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
1315 UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63);
1316 assertTrue("c<b", coll.compare(c, b) < 0);
1317 assertTrue("b<a", coll.compare(b, a) < 0);
1318}
4388f060 1319
57a6839d
A
1320void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
1321 // ICU ticket #9959:
1322 // Forbid rules with a before-reset followed by a stronger relation.
1323 IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
1324 RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
1325 if(errorCode.isSuccess()) {
1326 errln("should forbid before-2-reset followed by primary relation");
1327 } else {
1328 errorCode.reset();
1329 }
1330 RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
1331 if(errorCode.isSuccess()) {
1332 errln("should forbid before-3-reset followed by primary or secondary relation");
1333 } else {
1334 errorCode.reset();
1335 }
1336}
4388f060 1337
b75a7d8f
A
1338void CollationRegressionTest::compareArray(Collator &c,
1339 const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1340 int32_t testCount)
1341{
1342 int32_t i;
1343 Collator::EComparisonResult expectedResult = Collator::EQUAL;
1344
1345 for (i = 0; i < testCount; i += 3)
1346 {
1347 UnicodeString source(tests[i]);
1348 UnicodeString comparison(tests[i + 1]);
1349 UnicodeString target(tests[i + 2]);
1350
1351 if (comparison == "<")
1352 {
1353 expectedResult = Collator::LESS;
1354 }
1355 else if (comparison == ">")
1356 {
1357 expectedResult = Collator::GREATER;
1358 }
1359 else if (comparison == "=")
1360 {
1361 expectedResult = Collator::EQUAL;
1362 }
1363 else
1364 {
1365 UnicodeString bogus1("Bogus comparison string \"");
1366 UnicodeString bogus2("\"");
1367 errln(bogus1 + comparison + bogus2);
1368 }
1369
1370 Collator::EComparisonResult compareResult = c.compare(source, target);
1371
1372 CollationKey sourceKey, targetKey;
1373 UErrorCode status = U_ZERO_ERROR;
1374
1375 c.getCollationKey(source, sourceKey, status);
1376
1377 if (U_FAILURE(status))
1378 {
1379 errln("Couldn't get collationKey for source");
1380 continue;
1381 }
1382
1383 c.getCollationKey(target, targetKey, status);
1384
1385 if (U_FAILURE(status))
1386 {
1387 errln("Couldn't get collationKey for target");
1388 continue;
1389 }
1390
1391 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1392
1393 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1394
1395 }
1396}
1397
1398void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1399{
1400 int32_t c1, c2, count = 0;
1401 UErrorCode status = U_ZERO_ERROR;
1402
1403 do
1404 {
1405 c1 = i1.next(status);
1406 c2 = i2.next(status);
1407
1408 if (c1 != c2)
1409 {
1410 UnicodeString msg, msg1(" ");
729e4ab9 1411
b75a7d8f
A
1412 msg += msg1 + count;
1413 msg += ": strength(0x";
1414 appendHex(c1, 8, msg);
1415 msg += ") != strength(0x";
1416 appendHex(c2, 8, msg);
1417 msg += ")";
1418
1419 errln(msg);
1420 break;
1421 }
1422
1423 count += 1;
1424 }
1425 while (c1 != CollationElementIterator::NULLORDER);
1426}
1427
1428void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1429{
1430 if (exec)
1431 {
1432 logln("Collation Regression Tests: ");
1433 }
1434
57a6839d
A
1435 if(en_us == NULL) {
1436 dataerrln("Class collator not instantiated");
1437 name = "";
1438 return;
b75a7d8f 1439 }
57a6839d
A
1440 TESTCASE_AUTO_BEGIN;
1441 TESTCASE_AUTO(Test4048446);
1442 TESTCASE_AUTO(Test4051866);
1443 TESTCASE_AUTO(Test4053636);
1444 TESTCASE_AUTO(Test4054238);
1445 TESTCASE_AUTO(Test4054734);
1446 TESTCASE_AUTO(Test4054736);
1447 TESTCASE_AUTO(Test4058613);
1448 TESTCASE_AUTO(Test4059820);
1449 TESTCASE_AUTO(Test4060154);
1450 TESTCASE_AUTO(Test4062418);
1451 TESTCASE_AUTO(Test4065540);
1452 TESTCASE_AUTO(Test4066189);
1453 TESTCASE_AUTO(Test4066696);
1454 TESTCASE_AUTO(Test4076676);
1455 TESTCASE_AUTO(Test4078588);
1456 TESTCASE_AUTO(Test4079231);
1457 TESTCASE_AUTO(Test4081866);
1458 TESTCASE_AUTO(Test4087241);
1459 TESTCASE_AUTO(Test4087243);
1460 TESTCASE_AUTO(Test4092260);
1461 TESTCASE_AUTO(Test4095316);
1462 TESTCASE_AUTO(Test4101940);
1463 TESTCASE_AUTO(Test4103436);
1464 TESTCASE_AUTO(Test4114076);
1465 TESTCASE_AUTO(Test4114077);
1466 TESTCASE_AUTO(Test4124632);
1467 TESTCASE_AUTO(Test4132736);
1468 TESTCASE_AUTO(Test4133509);
1469 TESTCASE_AUTO(Test4139572);
1470 TESTCASE_AUTO(Test4141640);
1471 TESTCASE_AUTO(Test4146160);
1472 TESTCASE_AUTO(Test4179216);
1473 TESTCASE_AUTO(TestT7189);
1474 TESTCASE_AUTO(TestCaseFirstCompression);
1475 TESTCASE_AUTO(TestTrailingComment);
1476 TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
1477 TESTCASE_AUTO_END;
b75a7d8f
A
1478}
1479
1480#endif /* #if !UCONFIG_NO_COLLATION */