1 /********************************************************************
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
7 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_COLLATION
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
20 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
22 CollationRegressionTest::CollationRegressionTest()
24 UErrorCode status
= U_ZERO_ERROR
;
26 en_us
= (RuleBasedCollator
*)Collator::createInstance(Locale::getUS(), status
);
27 if(U_FAILURE(status
)) {
30 errcheckln(status
, "Collator creation failed with %s", u_errorName(status
));
35 CollationRegressionTest::~CollationRegressionTest()
43 // CollationElementIterator.reset() doesn't work
45 void CollationRegressionTest::Test4048446(/* char* par */)
47 const UnicodeString test1
= "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
48 const UnicodeString test2
= "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
49 CollationElementIterator
*i1
= en_us
->createCollationElementIterator(test1
);
50 CollationElementIterator
*i2
= en_us
->createCollationElementIterator(test1
);
51 UErrorCode status
= U_ZERO_ERROR
;
53 if (i1
== NULL
|| i2
== NULL
)
55 errln("Could not create CollationElementIterator's");
61 while (i1
->next(status
) != CollationElementIterator::NULLORDER
)
63 if (U_FAILURE(status
))
65 errln("error calling next()");
75 assertEqual(*i1
, *i2
);
83 // Collator -> rules -> Collator round-trip broken for expanding characters
85 void CollationRegressionTest::Test4051866(/* char* par */)
88 RuleBasedCollator c1 = new RuleBasedCollator("< o "
97 UErrorCode status
= U_ZERO_ERROR
;
101 rules
+= (UChar
)0x3080;
103 rules
+= (UChar
)0x1530;
106 rules
+= (UChar
)0x3080;
108 rules
+= (UChar
)0x1520;
111 // Build a collator containing expanding characters
112 RuleBasedCollator
*c1
= new RuleBasedCollator(rules
, status
);
114 // Build another using the rules from the first
115 RuleBasedCollator
*c2
= new RuleBasedCollator(c1
->getRules(), status
);
117 // Make sure they're the same
118 if (!(c1
->getRules() == c2
->getRules()))
120 errln("Rules are not equal");
129 // Collator thinks "black-bird" == "black"
131 void CollationRegressionTest::Test4053636(/* char* par */)
133 if (en_us
->equals("black_bird", "black"))
135 errln("black-bird == black");
141 // CollationElementIterator will not work correctly if the associated
142 // Collator object's mode is changed
144 void CollationRegressionTest::Test4054238(/* char* par */)
146 const UChar chars3
[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
147 const UnicodeString
test3(chars3
);
148 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
150 // NOTE: The Java code uses en_us to create the CollationElementIterators
151 // but I'm pretty sure that's wrong, so I've changed this to use c.
152 UErrorCode status
= U_ZERO_ERROR
;
153 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
154 CollationElementIterator
*i1
= c
->createCollationElementIterator(test3
);
161 // Collator::IDENTICAL documented but not implemented
163 void CollationRegressionTest::Test4054734(/* char* par */)
166 Here's the original Java:
169 "\u0001", "<", "\u0002",
170 "\u0001", "=", "\u0001",
171 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
172 "\u00C0", "=", "A\u0300" // Decomp should make these equal
175 String[] nodecomp = {
176 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
180 static const UChar decomp
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
182 {0x0001, 0}, {0x3c, 0}, {0x0002, 0},
183 {0x0001, 0}, {0x3d, 0}, {0x0001, 0},
184 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
185 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
189 UErrorCode status
= U_ZERO_ERROR
;
190 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
192 c
->setStrength(Collator::IDENTICAL
);
194 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
195 compareArray(*c
, decomp
, ARRAY_LENGTH(decomp
));
202 // Full Decomposition mode not implemented
204 void CollationRegressionTest::Test4054736(/* char* par */)
206 UErrorCode status
= U_ZERO_ERROR
;
207 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
209 c
->setStrength(Collator::SECONDARY
);
210 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
212 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
214 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed
217 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
224 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
226 void CollationRegressionTest::Test4058613(/* char* par */)
228 // Creating a default collator doesn't work when Korean is the default
231 Locale oldDefault
= Locale::getDefault();
232 UErrorCode status
= U_ZERO_ERROR
;
234 Locale::setDefault(Locale::getKorean(), status
);
236 if (U_FAILURE(status
))
238 errln("Could not set default locale to Locale::KOREAN");
244 c
= Collator::createInstance("en_US", status
);
246 if (c
== NULL
|| U_FAILURE(status
))
248 errln("Could not create a Korean collator");
249 Locale::setDefault(oldDefault
, status
);
254 // Since the fix to this bug was to turn off decomposition for Korean collators,
255 // ensure that's what we got
256 if (c
->getAttribute(UCOL_NORMALIZATION_MODE
, status
) != UCOL_OFF
)
258 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
263 Locale::setDefault(oldDefault
, status
);
268 // RuleBasedCollator.getRules does not return the exact pattern as input
269 // for expanding character sequences
271 void CollationRegressionTest::Test4059820(/* char* par */)
273 UErrorCode status
= U_ZERO_ERROR
;
275 RuleBasedCollator
*c
= NULL
;
276 UnicodeString rules
= "< a < b , c/a < d < z";
278 c
= new RuleBasedCollator(rules
, status
);
280 if (c
== NULL
|| U_FAILURE(status
))
282 errln("Failure building a collator.");
287 if ( c
->getRules().indexOf("c/a") == -1)
289 errln("returned rules do not contain 'c/a'");
297 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
299 void CollationRegressionTest::Test4060154(/* char* par */)
301 UErrorCode status
= U_ZERO_ERROR
;
304 rules
+= "< g, G < h, H < i, I < j, J";
306 rules
+= (UChar
)0x0131;
308 rules
+= (UChar
)0x0130;
311 RuleBasedCollator
*c
= NULL
;
313 c
= new RuleBasedCollator(rules
, status
);
315 if (c
== NULL
|| U_FAILURE(status
))
317 errln("failure building collator.");
322 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
325 String[] tertiary = {
329 "\u0131", "<", "\u0130",
335 static const UChar tertiary
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
337 {0x41, 0}, {0x3c, 0}, {0x42, 0},
338 {0x48, 0}, {0x3c, 0}, {0x0131, 0},
339 {0x48, 0}, {0x3c, 0}, {0x49, 0},
340 {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
341 {0x0130, 0}, {0x3c, 0}, {0x69, 0},
342 {0x0130, 0}, {0x3e, 0}, {0x48, 0}
345 c
->setStrength(Collator::TERTIARY
);
346 compareArray(*c
, tertiary
, ARRAY_LENGTH(tertiary
));
349 String[] secondary = {
351 "\u0131", "=", "\u0130",
354 static const UChar secondary
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
356 {0x48, 0}, {0x3c, 0}, {0x49, 0},
357 {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
360 c
->setStrength(Collator::PRIMARY
);
361 compareArray(*c
, secondary
, ARRAY_LENGTH(secondary
));
368 // Secondary/Tertiary comparison incorrect in French Secondary
370 void CollationRegressionTest::Test4062418(/* char* par */)
372 UErrorCode status
= U_ZERO_ERROR
;
374 RuleBasedCollator
*c
= NULL
;
376 c
= (RuleBasedCollator
*) Collator::createInstance(Locale::getCanadaFrench(), status
);
378 if (c
== NULL
|| U_FAILURE(status
))
380 errln("Failed to create collator for Locale::getCanadaFrench()");
385 c
->setStrength(Collator::SECONDARY
);
389 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
392 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
394 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
397 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
404 // Collator::compare() method broken if either string contains spaces
406 void CollationRegressionTest::Test4065540(/* char* par */)
408 if (en_us
->compare("abcd e", "abcd f") == 0)
410 errln("'abcd e' == 'abcd f'");
416 // Unicode characters need to be recursively decomposed to get the
417 // correct result. For example,
418 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
420 void CollationRegressionTest::Test4066189(/* char* par */)
422 static const UChar chars1
[] = {0x1EB1, 0};
423 static const UChar chars2
[] = {0x61, 0x0306, 0x0300, 0};
424 const UnicodeString
test1(chars1
);
425 const UnicodeString
test2(chars2
);
426 UErrorCode status
= U_ZERO_ERROR
;
428 // NOTE: The java code used en_us to create the
429 // CollationElementIterator's. I'm pretty sure that
430 // was wrong, so I've change the code to use c1 and c2
431 RuleBasedCollator
*c1
= (RuleBasedCollator
*) en_us
->clone();
432 c1
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
433 CollationElementIterator
*i1
= c1
->createCollationElementIterator(test1
);
435 RuleBasedCollator
*c2
= (RuleBasedCollator
*) en_us
->clone();
436 c2
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, status
);
437 CollationElementIterator
*i2
= c2
->createCollationElementIterator(test2
);
439 assertEqual(*i1
, *i2
);
449 // French secondary collation checking at the end of compare iteration fails
451 void CollationRegressionTest::Test4066696(/* char* par */)
453 UErrorCode status
= U_ZERO_ERROR
;
454 RuleBasedCollator
*c
= NULL
;
456 c
= (RuleBasedCollator
*)Collator::createInstance(Locale::getCanadaFrench(), status
);
458 if (c
== NULL
|| U_FAILURE(status
))
460 errln("Failure creating collator for Locale::getCanadaFrench()");
465 c
->setStrength(Collator::SECONDARY
);
469 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
475 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
480 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
482 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
485 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
492 // Bad canonicalization of same-class combining characters
494 void CollationRegressionTest::Test4076676(/* char* par */)
496 // These combining characters are all in the same class, so they should not
497 // be reordered, and they should compare as unequal.
498 static const UChar s1
[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
499 static const UChar s2
[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
501 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
502 c
->setStrength(Collator::TERTIARY
);
504 if (c
->compare(s1
,s2
) == 0)
506 errln("Same-class combining chars were reordered");
514 // RuleBasedCollator::operator==(NULL) throws NullPointerException
516 void CollationRegressionTest::Test4079231(/* char* par */)
518 // I don't think there's any way to write this test
519 // in C++. The following is equivalent to the Java,
520 // but doesn't compile 'cause NULL can't be converted
523 // if (en_us->operator==(NULL))
525 // errln("en_us->operator==(NULL) returned TRUE");
530 if (en_us->equals(null)) {
531 errln("en_us->equals(null) returned true");
534 catch (Exception e) {
535 errln("en_us->equals(null) threw " + e.toString());
542 // RuleBasedCollator breaks on "< a < bb" rule
544 void CollationRegressionTest::Test4078588(/* char *par */)
546 UErrorCode status
= U_ZERO_ERROR
;
547 RuleBasedCollator
*rbc
= new RuleBasedCollator((UnicodeString
)"< a < bb", status
);
549 if (rbc
== NULL
|| U_FAILURE(status
))
551 errln("Failed to create RuleBasedCollator.");
556 Collator::EComparisonResult result
= rbc
->compare("a","bb");
558 if (result
!= Collator::LESS
)
560 errln((UnicodeString
)"Compare(a,bb) returned " + (int)result
561 + (UnicodeString
)"; expected -1");
569 // Combining characters in different classes not reordered properly.
571 void CollationRegressionTest::Test4081866(/* char* par */)
573 // These combining characters are all in different classes,
574 // so they should be reordered and the strings should compare as equal.
575 static const UChar s1
[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
576 static const UChar s2
[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
578 UErrorCode status
= U_ZERO_ERROR
;
579 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
580 c
->setStrength(Collator::TERTIARY
);
582 // Now that the default collators are set to NO_DECOMPOSITION
583 // (as a result of fixing bug 4114077), we must set it explicitly
584 // when we're testing reordering behavior. -- lwerner, 5/5/98
585 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
587 if (c
->compare(s1
,s2
) != 0)
589 errln("Combining chars were not reordered");
597 // string comparison errors in Scandinavian collators
599 void CollationRegressionTest::Test4087241(/* char* par */)
601 UErrorCode status
= U_ZERO_ERROR
;
602 Locale
da_DK("da", "DK");
603 RuleBasedCollator
*c
= NULL
;
605 c
= (RuleBasedCollator
*) Collator::createInstance(da_DK
, status
);
607 if (c
== NULL
|| U_FAILURE(status
))
609 errln("Failed to create collator for da_DK locale");
614 c
->setStrength(Collator::SECONDARY
);
616 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
618 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae
619 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-unlaut < a-ring
620 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut
623 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
630 // CollationKey takes ignorable strings into account when it shouldn't
632 void CollationRegressionTest::Test4087243(/* char* par */)
634 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
635 c
->setStrength(Collator::TERTIARY
);
637 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
639 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A
642 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
650 // Micro symbol and greek lowercase letter Mu should sort identically
652 void CollationRegressionTest::Test4092260(/* char* par */)
654 UErrorCode status
= U_ZERO_ERROR
;
658 c
= Collator::createInstance(el
, status
);
660 if (c
== NULL
|| U_FAILURE(status
))
662 errln("Failed to create collator for el locale.");
667 // These now have tertiary differences in UCA
668 c
->setAttribute(UCOL_STRENGTH
, UCOL_SECONDARY
, status
);
670 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
672 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
675 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
682 void CollationRegressionTest::Test4095316(/* char* par */)
684 UErrorCode status
= U_ZERO_ERROR
;
685 Locale
el_GR("el", "GR");
686 Collator
*c
= Collator::createInstance(el_GR
, status
);
688 if (c
== NULL
|| U_FAILURE(status
))
690 errln("Failed to create collator for el_GR locale");
694 // These now have tertiary differences in UCA
695 //c->setStrength(Collator::TERTIARY);
696 c
->setAttribute(UCOL_STRENGTH
, UCOL_SECONDARY
, status
);
698 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
700 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
703 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
710 void CollationRegressionTest::Test4101940(/* char* par */)
712 UErrorCode status
= U_ZERO_ERROR
;
713 RuleBasedCollator
*c
= NULL
;
714 UnicodeString rules
= "< a < b";
715 UnicodeString nothing
= "";
717 c
= new RuleBasedCollator(rules
, status
);
719 if (c
== NULL
|| U_FAILURE(status
))
721 errln("Failed to create RuleBasedCollator");
726 CollationElementIterator
*i
= c
->createCollationElementIterator(nothing
);
729 if (i
->next(status
) != CollationElementIterator::NULLORDER
)
731 errln("next did not return NULLORDER");
740 // Collator::compare not handling spaces properly
742 void CollationRegressionTest::Test4103436(/* char* par */)
744 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
745 c
->setStrength(Collator::TERTIARY
);
747 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
749 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
750 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
753 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
760 // Collation not Unicode conformant with Hangul syllables
762 void CollationRegressionTest::Test4114076(/* char* par */)
764 UErrorCode status
= U_ZERO_ERROR
;
765 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
766 c
->setStrength(Collator::TERTIARY
);
769 // With Canonical decomposition, Hangul syllables should get decomposed
770 // into Jamo, but Jamo characters should not be decomposed into
773 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
775 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
778 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
779 compareArray(*c
, test1
, ARRAY_LENGTH(test1
));
782 // *In earlier versions of Unicode, jamo characters like ksf
783 // had compatibility mappings to kf + sf. These mappings were
784 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
785 // That is, the following test is obsolete as of 2.1.9
787 //obsolete- // With Full decomposition, it should go all the way down to
788 //obsolete- // conjoining Jamo characters.
790 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
792 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
795 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
796 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2));
804 // Collator::getCollationKey was hanging on certain character sequences
806 void CollationRegressionTest::Test4124632(/* char* par */)
808 UErrorCode status
= U_ZERO_ERROR
;
809 Collator
*coll
= NULL
;
811 coll
= Collator::createInstance(Locale::getJapan(), status
);
813 if (coll
== NULL
|| U_FAILURE(status
))
815 errln("Failed to create collator for Locale::JAPAN");
820 static const UChar test
[] = {0x41, 0x0308, 0x62, 0x63, 0};
823 coll
->getCollationKey(test
, key
, status
);
825 if (key
.isBogus() || U_FAILURE(status
))
827 errln("CollationKey creation failed.");
835 // sort order of french words with multiple accents has errors
837 void CollationRegressionTest::Test4132736(/* char* par */)
839 UErrorCode status
= U_ZERO_ERROR
;
843 c
= Collator::createInstance(Locale::getCanadaFrench(), status
);
844 c
->setStrength(Collator::TERTIARY
);
846 if (c
== NULL
|| U_FAILURE(status
))
848 errln("Failed to create a collator for Locale::getCanadaFrench()");
853 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
855 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
856 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
859 compareArray(*c
, test1
, ARRAY_LENGTH(test1
));
866 // The sorting using java.text.CollationKey is not in the exact order
868 void CollationRegressionTest::Test4133509(/* char* par */)
870 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
872 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
873 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
874 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
877 compareArray(*en_us
, test1
, ARRAY_LENGTH(test1
));
882 // Collation with decomposition off doesn't work for Europe
884 void CollationRegressionTest::Test4114077(/* char* par */)
886 // Ensure that we get the same results with decomposition off
887 // as we do with it on....
889 UErrorCode status
= U_ZERO_ERROR
;
890 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
891 c
->setStrength(Collator::TERTIARY
);
893 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
895 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
896 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
897 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0},
898 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute
900 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal
903 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, status
);
904 compareArray(*c
, test1
, ARRAY_LENGTH(test1
));
906 static const UChar test2
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
908 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal
911 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
912 compareArray(*c
, test2
, ARRAY_LENGTH(test2
));
919 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
921 void CollationRegressionTest::Test4141640(/* char* par */)
924 // Rather than just creating a Swedish collator, we might as well
925 // try to instantiate one for every locale available on the system
926 // in order to prevent this sort of bug from cropping up in the future
928 UErrorCode status
= U_ZERO_ERROR
;
929 int32_t i
, localeCount
;
930 const Locale
*locales
= Locale::getAvailableLocales(localeCount
);
932 for (i
= 0; i
< localeCount
; i
+= 1)
936 status
= U_ZERO_ERROR
;
937 c
= Collator::createInstance(locales
[i
], status
);
939 if (c
== NULL
|| U_FAILURE(status
))
941 UnicodeString msg
, localeName
;
943 msg
+= "Could not create collator for locale ";
944 msg
+= locales
[i
].getName();
955 // getCollationKey throws exception for spanish text
956 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
958 void CollationRegressionTest::Test4139572(/* char* par */)
961 // Code pasted straight from the bug report
962 // (and then translated to C++ ;-)
964 // create spanish locale and collator
965 UErrorCode status
= U_ZERO_ERROR
;
966 Locale
l("es", "es");
967 Collator
*col
= NULL
;
969 col
= Collator::createInstance(l
, status
);
971 if (col
== NULL
|| U_FAILURE(status
))
973 errln("Failed to create a collator for es_es locale.");
980 // this spanish phrase kills it!
981 col
->getCollationKey("Nombre De Objeto", key
, status
);
983 if (key
.isBogus() || U_FAILURE(status
))
985 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
990 /* HSYS : RuleBasedCollator::compare() performance enhancements
991 compare() does not create CollationElementIterator() anymore.*/
993 class My4146160Collator
: public RuleBasedCollator
996 My4146160Collator(RuleBasedCollator
&rbc
, UErrorCode
&status
);
997 ~My4146160Collator();
999 CollationElementIterator
*createCollationElementIterator(const UnicodeString
&text
) const;
1001 CollationElementIterator
*createCollationElementIterator(const CharacterIterator
&text
) const;
1003 static int32_t count
;
1006 int32_t My4146160Collator::count
= 0;
1008 My4146160Collator::My4146160Collator(RuleBasedCollator
&rbc
, UErrorCode
&status
)
1009 : RuleBasedCollator(rbc
.getRules(), status
)
1013 My4146160Collator::~My4146160Collator()
1017 CollationElementIterator
*My4146160Collator::createCollationElementIterator(const UnicodeString
&text
) const
1020 return RuleBasedCollator::createCollationElementIterator(text
);
1023 CollationElementIterator
*My4146160Collator::createCollationElementIterator(const CharacterIterator
&text
) const
1026 return RuleBasedCollator::createCollationElementIterator(text
);
1031 // RuleBasedCollator doesn't use createCollationElementIterator internally
1033 void CollationRegressionTest::Test4146160(/* char* par */)
1037 // Use a custom collator class whose createCollationElementIterator
1038 // methods increment a count....
1040 UErrorCode status
= U_ZERO_ERROR
;
1043 My4146160Collator::count
= 0;
1044 My4146160Collator
*mc
= NULL
;
1046 mc
= new My4146160Collator(*en_us
, status
);
1048 if (mc
== NULL
|| U_FAILURE(status
))
1050 errln("Failed to create a My4146160Collator.");
1055 mc
->getCollationKey("1", key
, status
);
1057 if (key
.isBogus() || U_FAILURE(status
))
1059 errln("Failure to get a CollationKey from a My4146160Collator.");
1064 if (My4146160Collator::count
< 1)
1066 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1069 My4146160Collator::count
= 0;
1070 mc
->compare("1", "2");
1072 if (My4146160Collator::count
< 1)
1074 errln("My4146160Collator::createtCollationElementIterator not called for compare");
1083 // nextSortKeyPart incorrect for EO_S1 collation
1084 static int32_t calcKeyIncremental(UCollator
*coll
, const UChar
* text
, int32_t len
, uint8_t *keyBuf
, int32_t /*keyBufLen*/, UErrorCode
& status
) {
1085 UCharIterator uiter
;
1086 uint32_t state
[2] = { 0, 0 };
1090 uiter_setString(&uiter
, text
, len
);
1093 int32_t keyPartLen
= ucol_nextSortKeyPart(coll
, &uiter
, state
, &keyBuf
[keyLen
], count
, &status
);
1094 if (U_FAILURE(status
)) {
1097 if (keyPartLen
== 0) {
1100 keyLen
+= keyPartLen
;
1105 void CollationRegressionTest::TestT7189() {
1106 UErrorCode status
= U_ZERO_ERROR
;
1110 static const UChar text1
[][CollationRegressionTest::MAX_TOKEN_LEN
] = {
1111 // "Achter De Hoven"
1112 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1114 { 0x41, 0x42, 0x43, 0x00 },
1116 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1119 static const UChar text2
[][CollationRegressionTest::MAX_TOKEN_LEN
] = {
1120 // "Achter de Hoven"
1121 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1123 { 0x61, 0x62, 0x63, 0x00 },
1125 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1128 // Open the collator
1129 coll
= ucol_openFromShortString("EO_S1", FALSE
, NULL
, &status
);
1130 if (U_FAILURE(status
)) {
1131 errln("Failed to create a collator for short string EO_S1");
1135 for (i
= 0; i
< sizeof(text1
) / (CollationRegressionTest::MAX_TOKEN_LEN
* sizeof(UChar
)); i
++) {
1136 uint8_t key1
[100], key2
[100];
1139 len1
= calcKeyIncremental(coll
, text1
[i
], -1, key1
, sizeof(key1
), status
);
1140 if (U_FAILURE(status
)) {
1141 errln(UnicodeString("Failed to get a partial collation key for ") + text1
[i
]);
1144 len2
= calcKeyIncremental(coll
, text2
[i
], -1, key2
, sizeof(key2
), status
);
1145 if (U_FAILURE(status
)) {
1146 errln(UnicodeString("Failed to get a partial collation key for ") + text2
[i
]);
1150 if (len1
== len2
&& uprv_memcmp(key1
, key2
, len1
) == 0) {
1151 errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1
[i
] + "\n" + " text2: " + text2
[i
] + "\n" + " key : " + TestUtility::hex(key1
, len1
));
1153 logln(UnicodeString("Keys produced -\n") + " text1: " + text1
[i
] + "\n" + " key1 : " + TestUtility::hex(key1
, len1
) + "\n" + " text2: " + text2
[i
] + "\n" + " key2 : "
1154 + TestUtility::hex(key2
, len2
));
1160 void CollationRegressionTest::TestCaseFirstCompression() {
1161 RuleBasedCollator
*col
= (RuleBasedCollator
*) en_us
->clone();
1162 UErrorCode status
= U_ZERO_ERROR
;
1165 caseFirstCompressionSub(col
, "default");
1168 col
->setAttribute(UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, status
);
1169 if (U_FAILURE(status
)) {
1170 errln("Failed to set UCOL_UPPER_FIRST");
1173 caseFirstCompressionSub(col
, "upper first");
1176 col
->setAttribute(UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, status
);
1177 if (U_FAILURE(status
)) {
1178 errln("Failed to set UCOL_LOWER_FIRST");
1181 caseFirstCompressionSub(col
, "lower first");
1186 void CollationRegressionTest::caseFirstCompressionSub(Collator
*col
, UnicodeString opt
) {
1187 const int32_t maxLength
= 50;
1189 UChar str1
[maxLength
];
1190 UChar str2
[maxLength
];
1192 CollationKey key1
, key2
;
1194 for (int32_t len
= 1; len
<= maxLength
; len
++) {
1196 for (; i
< len
- 1; i
++) {
1197 str1
[i
] = str2
[i
] = (UChar
)0x61; // 'a'
1199 str1
[i
] = (UChar
)0x41; // 'A'
1200 str2
[i
] = (UChar
)0x61; // 'a'
1202 UErrorCode status
= U_ZERO_ERROR
;
1203 col
->getCollationKey(str1
, len
, key1
, status
);
1204 col
->getCollationKey(str2
, len
, key2
, status
);
1206 UCollationResult cmpKey
= key1
.compareTo(key2
, status
);
1207 UCollationResult cmpCol
= col
->compare(str1
, len
, str2
, len
, status
);
1209 if (U_FAILURE(status
)) {
1210 errln("Error in caseFirstCompressionSub");
1211 } else if (cmpKey
!= cmpCol
) {
1212 errln((UnicodeString
)"Inconsistent comparison(" + opt
1213 + "): str1=" + UnicodeString(str1
, len
) + ", str2=" + UnicodeString(str2
, len
)
1214 + ", cmpKey=" + cmpKey
+ ", cmpCol=" + cmpCol
);
1221 void CollationRegressionTest::compareArray(Collator
&c
,
1222 const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
],
1226 Collator::EComparisonResult expectedResult
= Collator::EQUAL
;
1228 for (i
= 0; i
< testCount
; i
+= 3)
1230 UnicodeString
source(tests
[i
]);
1231 UnicodeString
comparison(tests
[i
+ 1]);
1232 UnicodeString
target(tests
[i
+ 2]);
1234 if (comparison
== "<")
1236 expectedResult
= Collator::LESS
;
1238 else if (comparison
== ">")
1240 expectedResult
= Collator::GREATER
;
1242 else if (comparison
== "=")
1244 expectedResult
= Collator::EQUAL
;
1248 UnicodeString
bogus1("Bogus comparison string \"");
1249 UnicodeString
bogus2("\"");
1250 errln(bogus1
+ comparison
+ bogus2
);
1253 Collator::EComparisonResult compareResult
= c
.compare(source
, target
);
1255 CollationKey sourceKey
, targetKey
;
1256 UErrorCode status
= U_ZERO_ERROR
;
1258 c
.getCollationKey(source
, sourceKey
, status
);
1260 if (U_FAILURE(status
))
1262 errln("Couldn't get collationKey for source");
1266 c
.getCollationKey(target
, targetKey
, status
);
1268 if (U_FAILURE(status
))
1270 errln("Couldn't get collationKey for target");
1274 Collator::EComparisonResult keyResult
= sourceKey
.compareTo(targetKey
);
1276 reportCResult( source
, target
, sourceKey
, targetKey
, compareResult
, keyResult
, compareResult
, expectedResult
);
1281 void CollationRegressionTest::assertEqual(CollationElementIterator
&i1
, CollationElementIterator
&i2
)
1283 int32_t c1
, c2
, count
= 0;
1284 UErrorCode status
= U_ZERO_ERROR
;
1288 c1
= i1
.next(status
);
1289 c2
= i2
.next(status
);
1293 UnicodeString msg
, msg1(" ");
1295 msg
+= msg1
+ count
;
1296 msg
+= ": strength(0x";
1297 appendHex(c1
, 8, msg
);
1298 msg
+= ") != strength(0x";
1299 appendHex(c2
, 8, msg
);
1308 while (c1
!= CollationElementIterator::NULLORDER
);
1311 void CollationRegressionTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* /* par */)
1315 logln("Collation Regression Tests: ");
1321 case 0: name
= "Test4048446"; if (exec
) Test4048446(/* par */); break;
1322 case 1: name
= "Test4051866"; if (exec
) Test4051866(/* par */); break;
1323 case 2: name
= "Test4053636"; if (exec
) Test4053636(/* par */); break;
1324 case 3: name
= "Test4054238"; if (exec
) Test4054238(/* par */); break;
1325 case 4: name
= "Test4054734"; if (exec
) Test4054734(/* par */); break;
1326 case 5: name
= "Test4054736"; if (exec
) Test4054736(/* par */); break;
1327 case 6: name
= "Test4058613"; if (exec
) Test4058613(/* par */); break;
1328 case 7: name
= "Test4059820"; if (exec
) Test4059820(/* par */); break;
1329 case 8: name
= "Test4060154"; if (exec
) Test4060154(/* par */); break;
1330 case 9: name
= "Test4062418"; if (exec
) Test4062418(/* par */); break;
1331 case 10: name
= "Test4065540"; if (exec
) Test4065540(/* par */); break;
1332 case 11: name
= "Test4066189"; if (exec
) Test4066189(/* par */); break;
1333 case 12: name
= "Test4066696"; if (exec
) Test4066696(/* par */); break;
1334 case 13: name
= "Test4076676"; if (exec
) Test4076676(/* par */); break;
1335 case 14: name
= "Test4078588"; if (exec
) Test4078588(/* par */); break;
1336 case 15: name
= "Test4079231"; if (exec
) Test4079231(/* par */); break;
1337 case 16: name
= "Test4081866"; if (exec
) Test4081866(/* par */); break;
1338 case 17: name
= "Test4087241"; if (exec
) Test4087241(/* par */); break;
1339 case 18: name
= "Test4087243"; if (exec
) Test4087243(/* par */); break;
1340 case 19: name
= "Test4092260"; if (exec
) Test4092260(/* par */); break;
1341 case 20: name
= "Test4095316"; if (exec
) Test4095316(/* par */); break;
1342 case 21: name
= "Test4101940"; if (exec
) Test4101940(/* par */); break;
1343 case 22: name
= "Test4103436"; if (exec
) Test4103436(/* par */); break;
1344 case 23: name
= "Test4114076"; if (exec
) Test4114076(/* par */); break;
1345 case 24: name
= "Test4114077"; if (exec
) Test4114077(/* par */); break;
1346 case 25: name
= "Test4124632"; if (exec
) Test4124632(/* par */); break;
1347 case 26: name
= "Test4132736"; if (exec
) Test4132736(/* par */); break;
1348 case 27: name
= "Test4133509"; if (exec
) Test4133509(/* par */); break;
1349 case 28: name
= "Test4139572"; if (exec
) Test4139572(/* par */); break;
1350 case 29: name
= "Test4141640"; if (exec
) Test4141640(/* par */); break;
1351 case 30: name
= "Test4146160"; if (exec
) Test4146160(/* par */); break;
1352 case 31: name
= "TestT7189"; if (exec
) TestT7189(); break;
1353 case 32: name
= "TestCaseFirstCompression"; if (exec
) TestCaseFirstCompression(); break;
1354 default: name
= ""; break;
1357 dataerrln("Class collator not instantiated");
1362 #endif /* #if !UCONFIG_NO_COLLATION */