1 /********************************************************************
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
7 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_COLLATION
11 #include "unicode/coll.h"
12 #include "unicode/localpointer.h"
13 #include "unicode/tblcoll.h"
14 #include "unicode/unistr.h"
15 #include "unicode/sortkey.h"
21 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
23 CollationRegressionTest::CollationRegressionTest()
25 UErrorCode status
= U_ZERO_ERROR
;
27 en_us
= (RuleBasedCollator
*)Collator::createInstance(Locale::getUS(), status
);
28 if(U_FAILURE(status
)) {
31 errcheckln(status
, "Collator creation failed with %s", u_errorName(status
));
36 CollationRegressionTest::~CollationRegressionTest()
44 // CollationElementIterator.reset() doesn't work
46 void CollationRegressionTest::Test4048446(/* char* par */)
48 const UnicodeString test1
= "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
49 const UnicodeString test2
= "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
50 CollationElementIterator
*i1
= en_us
->createCollationElementIterator(test1
);
51 CollationElementIterator
*i2
= en_us
->createCollationElementIterator(test1
);
52 UErrorCode status
= U_ZERO_ERROR
;
54 if (i1
== NULL
|| i2
== NULL
)
56 errln("Could not create CollationElementIterator's");
62 while (i1
->next(status
) != CollationElementIterator::NULLORDER
)
64 if (U_FAILURE(status
))
66 errln("error calling next()");
76 assertEqual(*i1
, *i2
);
84 // Collator -> rules -> Collator round-trip broken for expanding characters
86 void CollationRegressionTest::Test4051866(/* char* par */)
89 UErrorCode status
= U_ZERO_ERROR
;
93 rules
+= (UChar
)0x3080;
95 rules
+= (UChar
)0x1530;
98 rules
+= (UChar
)0x3080;
100 rules
+= (UChar
)0x1520;
103 // Build a collator containing expanding characters
104 LocalPointer
<RuleBasedCollator
> c1(new RuleBasedCollator(rules
, status
));
106 // Build another using the rules from the first
107 LocalPointer
<RuleBasedCollator
> c2(new RuleBasedCollator(c1
->getRules(), status
));
108 if (U_FAILURE(status
)) {
109 errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status
));
113 // Make sure they're the same
114 if (!(c1
->getRules() == c2
->getRules()))
116 errln("Rules are not equal");
122 // Collator thinks "black-bird" == "black"
124 void CollationRegressionTest::Test4053636(/* char* par */)
126 if (en_us
->equals("black_bird", "black"))
128 errln("black-bird == black");
134 // CollationElementIterator will not work correctly if the associated
135 // Collator object's mode is changed
137 void CollationRegressionTest::Test4054238(/* char* par */)
139 const UChar chars3
[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
140 const UnicodeString
test3(chars3
);
141 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
143 // NOTE: The Java code uses en_us to create the CollationElementIterators
144 // but I'm pretty sure that's wrong, so I've changed this to use c.
145 UErrorCode status
= U_ZERO_ERROR
;
146 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
147 CollationElementIterator
*i1
= c
->createCollationElementIterator(test3
);
154 // Collator::IDENTICAL documented but not implemented
156 void CollationRegressionTest::Test4054734(/* char* par */)
159 Here's the original Java:
162 "\u0001", "<", "\u0002",
163 "\u0001", "=", "\u0001",
164 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
165 "\u00C0", "=", "A\u0300" // Decomp should make these equal
168 String[] nodecomp = {
169 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
173 static const UChar decomp
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
175 {0x0001, 0}, {0x3c, 0}, {0x0002, 0},
176 {0x0001, 0}, {0x3d, 0}, {0x0001, 0},
177 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
178 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
182 UErrorCode status
= U_ZERO_ERROR
;
183 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
185 c
->setStrength(Collator::IDENTICAL
);
187 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
188 compareArray(*c
, decomp
, ARRAY_LENGTH(decomp
));
195 // Full Decomposition mode not implemented
197 void CollationRegressionTest::Test4054736(/* char* par */)
199 UErrorCode status
= U_ZERO_ERROR
;
200 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
202 c
->setStrength(Collator::SECONDARY
);
203 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
205 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
207 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed
210 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
217 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
219 void CollationRegressionTest::Test4058613(/* char* par */)
221 // Creating a default collator doesn't work when Korean is the default
224 Locale oldDefault
= Locale::getDefault();
225 UErrorCode status
= U_ZERO_ERROR
;
227 Locale::setDefault(Locale::getKorean(), status
);
229 if (U_FAILURE(status
))
231 errln("Could not set default locale to Locale::KOREAN");
237 c
= Collator::createInstance("en_US", status
);
239 if (c
== NULL
|| U_FAILURE(status
))
241 errln("Could not create a Korean collator");
242 Locale::setDefault(oldDefault
, status
);
247 // Since the fix to this bug was to turn off decomposition for Korean collators,
248 // ensure that's what we got
249 if (c
->getAttribute(UCOL_NORMALIZATION_MODE
, status
) != UCOL_OFF
)
251 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
256 Locale::setDefault(oldDefault
, status
);
261 // RuleBasedCollator.getRules does not return the exact pattern as input
262 // for expanding character sequences
264 void CollationRegressionTest::Test4059820(/* char* par */)
266 UErrorCode status
= U_ZERO_ERROR
;
268 RuleBasedCollator
*c
= NULL
;
269 UnicodeString rules
= "&9 < a < b , c/a < d < z";
271 c
= new RuleBasedCollator(rules
, status
);
273 if (c
== NULL
|| U_FAILURE(status
))
275 errln("Failure building a collator.");
280 if ( c
->getRules().indexOf("c/a") == -1)
282 errln("returned rules do not contain 'c/a'");
290 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
292 void CollationRegressionTest::Test4060154(/* char* par */)
294 UErrorCode status
= U_ZERO_ERROR
;
297 rules
+= "&f < g, G < h, H < i, I < j, J";
299 rules
+= (UChar
)0x0131;
301 rules
+= (UChar
)0x0130;
304 RuleBasedCollator
*c
= NULL
;
306 c
= new RuleBasedCollator(rules
, status
);
308 if (c
== NULL
|| U_FAILURE(status
))
310 errln("failure building collator.");
315 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
318 String[] tertiary = {
322 "\u0131", "<", "\u0130",
328 static const UChar tertiary
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
330 {0x41, 0}, {0x3c, 0}, {0x42, 0},
331 {0x48, 0}, {0x3c, 0}, {0x0131, 0},
332 {0x48, 0}, {0x3c, 0}, {0x49, 0},
333 {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
334 {0x0130, 0}, {0x3c, 0}, {0x69, 0},
335 {0x0130, 0}, {0x3e, 0}, {0x48, 0}
338 c
->setStrength(Collator::TERTIARY
);
339 compareArray(*c
, tertiary
, ARRAY_LENGTH(tertiary
));
342 String[] secondary = {
344 "\u0131", "=", "\u0130",
347 static const UChar secondary
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
349 {0x48, 0}, {0x3c, 0}, {0x49, 0},
350 {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
353 c
->setStrength(Collator::PRIMARY
);
354 compareArray(*c
, secondary
, ARRAY_LENGTH(secondary
));
361 // Secondary/Tertiary comparison incorrect in French Secondary
363 void CollationRegressionTest::Test4062418(/* char* par */)
365 UErrorCode status
= U_ZERO_ERROR
;
367 RuleBasedCollator
*c
= NULL
;
369 c
= (RuleBasedCollator
*) Collator::createInstance(Locale::getCanadaFrench(), status
);
371 if (c
== NULL
|| U_FAILURE(status
))
373 errln("Failed to create collator for Locale::getCanadaFrench()");
378 c
->setStrength(Collator::SECONDARY
);
382 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
385 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
387 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
390 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
397 // Collator::compare() method broken if either string contains spaces
399 void CollationRegressionTest::Test4065540(/* char* par */)
401 if (en_us
->compare("abcd e", "abcd f") == 0)
403 errln("'abcd e' == 'abcd f'");
409 // Unicode characters need to be recursively decomposed to get the
410 // correct result. For example,
411 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
413 void CollationRegressionTest::Test4066189(/* char* par */)
415 static const UChar chars1
[] = {0x1EB1, 0};
416 static const UChar chars2
[] = {0x61, 0x0306, 0x0300, 0};
417 const UnicodeString
test1(chars1
);
418 const UnicodeString
test2(chars2
);
419 UErrorCode status
= U_ZERO_ERROR
;
421 // NOTE: The java code used en_us to create the
422 // CollationElementIterator's. I'm pretty sure that
423 // was wrong, so I've change the code to use c1 and c2
424 RuleBasedCollator
*c1
= (RuleBasedCollator
*) en_us
->clone();
425 c1
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
426 CollationElementIterator
*i1
= c1
->createCollationElementIterator(test1
);
428 RuleBasedCollator
*c2
= (RuleBasedCollator
*) en_us
->clone();
429 c2
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, status
);
430 CollationElementIterator
*i2
= c2
->createCollationElementIterator(test2
);
432 assertEqual(*i1
, *i2
);
442 // French secondary collation checking at the end of compare iteration fails
444 void CollationRegressionTest::Test4066696(/* char* par */)
446 UErrorCode status
= U_ZERO_ERROR
;
447 RuleBasedCollator
*c
= NULL
;
449 c
= (RuleBasedCollator
*)Collator::createInstance(Locale::getCanadaFrench(), status
);
451 if (c
== NULL
|| U_FAILURE(status
))
453 errln("Failure creating collator for Locale::getCanadaFrench()");
458 c
->setStrength(Collator::SECONDARY
);
462 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
468 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
473 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
475 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
478 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
485 // Bad canonicalization of same-class combining characters
487 void CollationRegressionTest::Test4076676(/* char* par */)
489 // These combining characters are all in the same class, so they should not
490 // be reordered, and they should compare as unequal.
491 static const UChar s1
[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
492 static const UChar s2
[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
494 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
495 c
->setStrength(Collator::TERTIARY
);
497 if (c
->compare(s1
,s2
) == 0)
499 errln("Same-class combining chars were reordered");
507 // RuleBasedCollator::operator==(NULL) throws NullPointerException
509 void CollationRegressionTest::Test4079231(/* char* par */)
511 // I don't think there's any way to write this test
512 // in C++. The following is equivalent to the Java,
513 // but doesn't compile 'cause NULL can't be converted
516 // if (en_us->operator==(NULL))
518 // errln("en_us->operator==(NULL) returned TRUE");
523 if (en_us->equals(null)) {
524 errln("en_us->equals(null) returned true");
527 catch (Exception e) {
528 errln("en_us->equals(null) threw " + e.toString());
535 // RuleBasedCollator breaks on "< a < bb" rule
537 void CollationRegressionTest::Test4078588(/* char *par */)
539 UErrorCode status
= U_ZERO_ERROR
;
540 RuleBasedCollator
*rbc
= new RuleBasedCollator("&9 < a < bb", status
);
542 if (rbc
== NULL
|| U_FAILURE(status
))
544 errln("Failed to create RuleBasedCollator.");
549 Collator::EComparisonResult result
= rbc
->compare("a","bb");
551 if (result
!= Collator::LESS
)
553 errln((UnicodeString
)"Compare(a,bb) returned " + (int)result
554 + (UnicodeString
)"; expected -1");
562 // Combining characters in different classes not reordered properly.
564 void CollationRegressionTest::Test4081866(/* char* par */)
566 // These combining characters are all in different classes,
567 // so they should be reordered and the strings should compare as equal.
568 static const UChar s1
[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
569 static const UChar s2
[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
571 UErrorCode status
= U_ZERO_ERROR
;
572 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
573 c
->setStrength(Collator::TERTIARY
);
575 // Now that the default collators are set to NO_DECOMPOSITION
576 // (as a result of fixing bug 4114077), we must set it explicitly
577 // when we're testing reordering behavior. -- lwerner, 5/5/98
578 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
580 if (c
->compare(s1
,s2
) != 0)
582 errln("Combining chars were not reordered");
590 // string comparison errors in Scandinavian collators
592 void CollationRegressionTest::Test4087241(/* char* par */)
594 UErrorCode status
= U_ZERO_ERROR
;
595 Locale
da_DK("da", "DK");
596 RuleBasedCollator
*c
= NULL
;
598 c
= (RuleBasedCollator
*) Collator::createInstance(da_DK
, status
);
600 if (c
== NULL
|| U_FAILURE(status
))
602 errln("Failed to create collator for da_DK locale");
607 c
->setStrength(Collator::SECONDARY
);
609 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
611 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae
612 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring
613 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut
616 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
623 // CollationKey takes ignorable strings into account when it shouldn't
625 void CollationRegressionTest::Test4087243(/* char* par */)
627 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
628 c
->setStrength(Collator::TERTIARY
);
630 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
632 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A
635 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
643 // Micro symbol and greek lowercase letter Mu should sort identically
645 void CollationRegressionTest::Test4092260(/* char* par */)
647 UErrorCode status
= U_ZERO_ERROR
;
651 c
= Collator::createInstance(el
, status
);
653 if (c
== NULL
|| U_FAILURE(status
))
655 errln("Failed to create collator for el locale.");
660 // These now have tertiary differences in UCA
661 c
->setAttribute(UCOL_STRENGTH
, UCOL_SECONDARY
, status
);
663 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
665 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
668 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
675 void CollationRegressionTest::Test4095316(/* char* par */)
677 UErrorCode status
= U_ZERO_ERROR
;
678 Locale
el_GR("el", "GR");
679 Collator
*c
= Collator::createInstance(el_GR
, status
);
681 if (c
== NULL
|| U_FAILURE(status
))
683 errln("Failed to create collator for el_GR locale");
687 // These now have tertiary differences in UCA
688 //c->setStrength(Collator::TERTIARY);
689 c
->setAttribute(UCOL_STRENGTH
, UCOL_SECONDARY
, status
);
691 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
693 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
696 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
703 void CollationRegressionTest::Test4101940(/* char* par */)
705 UErrorCode status
= U_ZERO_ERROR
;
706 RuleBasedCollator
*c
= NULL
;
707 UnicodeString rules
= "&9 < a < b";
708 UnicodeString nothing
= "";
710 c
= new RuleBasedCollator(rules
, status
);
712 if (c
== NULL
|| U_FAILURE(status
))
714 errln("Failed to create RuleBasedCollator");
719 CollationElementIterator
*i
= c
->createCollationElementIterator(nothing
);
722 if (i
->next(status
) != CollationElementIterator::NULLORDER
)
724 errln("next did not return NULLORDER");
733 // Collator::compare not handling spaces properly
735 void CollationRegressionTest::Test4103436(/* char* par */)
737 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
738 c
->setStrength(Collator::TERTIARY
);
740 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
742 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
743 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
746 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
753 // Collation not Unicode conformant with Hangul syllables
755 void CollationRegressionTest::Test4114076(/* char* par */)
757 UErrorCode status
= U_ZERO_ERROR
;
758 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
759 c
->setStrength(Collator::TERTIARY
);
762 // With Canonical decomposition, Hangul syllables should get decomposed
763 // into Jamo, but Jamo characters should not be decomposed into
766 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
768 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
771 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
772 compareArray(*c
, test1
, ARRAY_LENGTH(test1
));
775 // *In earlier versions of Unicode, jamo characters like ksf
776 // had compatibility mappings to kf + sf. These mappings were
777 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
778 // That is, the following test is obsolete as of 2.1.9
780 //obsolete- // With Full decomposition, it should go all the way down to
781 //obsolete- // conjoining Jamo characters.
783 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
785 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
788 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
789 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2));
797 // Collator::getCollationKey was hanging on certain character sequences
799 void CollationRegressionTest::Test4124632(/* char* par */)
801 UErrorCode status
= U_ZERO_ERROR
;
802 Collator
*coll
= NULL
;
804 coll
= Collator::createInstance(Locale::getJapan(), status
);
806 if (coll
== NULL
|| U_FAILURE(status
))
808 errln("Failed to create collator for Locale::JAPAN");
813 static const UChar test
[] = {0x41, 0x0308, 0x62, 0x63, 0};
816 coll
->getCollationKey(test
, key
, status
);
818 if (key
.isBogus() || U_FAILURE(status
))
820 errln("CollationKey creation failed.");
828 // sort order of french words with multiple accents has errors
830 void CollationRegressionTest::Test4132736(/* char* par */)
832 UErrorCode status
= U_ZERO_ERROR
;
836 c
= Collator::createInstance(Locale::getCanadaFrench(), status
);
837 c
->setStrength(Collator::TERTIARY
);
839 if (c
== NULL
|| U_FAILURE(status
))
841 errln("Failed to create a collator for Locale::getCanadaFrench()");
846 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
848 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
849 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
852 compareArray(*c
, test1
, ARRAY_LENGTH(test1
));
859 // The sorting using java.text.CollationKey is not in the exact order
861 void CollationRegressionTest::Test4133509(/* char* par */)
863 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
865 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
866 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
867 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
870 compareArray(*en_us
, test1
, ARRAY_LENGTH(test1
));
875 // Collation with decomposition off doesn't work for Europe
877 void CollationRegressionTest::Test4114077(/* char* par */)
879 // Ensure that we get the same results with decomposition off
880 // as we do with it on....
882 UErrorCode status
= U_ZERO_ERROR
;
883 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
884 c
->setStrength(Collator::TERTIARY
);
886 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
888 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
889 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
890 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0},
891 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute
893 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal
896 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, status
);
897 compareArray(*c
, test1
, ARRAY_LENGTH(test1
));
899 static const UChar test2
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
901 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal
904 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
905 compareArray(*c
, test2
, ARRAY_LENGTH(test2
));
912 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
914 void CollationRegressionTest::Test4141640(/* char* par */)
917 // Rather than just creating a Swedish collator, we might as well
918 // try to instantiate one for every locale available on the system
919 // in order to prevent this sort of bug from cropping up in the future
921 UErrorCode status
= U_ZERO_ERROR
;
922 int32_t i
, localeCount
;
923 const Locale
*locales
= Locale::getAvailableLocales(localeCount
);
925 for (i
= 0; i
< localeCount
; i
+= 1)
929 status
= U_ZERO_ERROR
;
930 c
= Collator::createInstance(locales
[i
], status
);
932 if (c
== NULL
|| U_FAILURE(status
))
934 UnicodeString msg
, localeName
;
936 msg
+= "Could not create collator for locale ";
937 msg
+= locales
[i
].getName();
948 // getCollationKey throws exception for spanish text
949 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
951 void CollationRegressionTest::Test4139572(/* char* par */)
954 // Code pasted straight from the bug report
955 // (and then translated to C++ ;-)
957 // create spanish locale and collator
958 UErrorCode status
= U_ZERO_ERROR
;
959 Locale
l("es", "es");
960 Collator
*col
= NULL
;
962 col
= Collator::createInstance(l
, status
);
964 if (col
== NULL
|| U_FAILURE(status
))
966 errln("Failed to create a collator for es_es locale.");
973 // this spanish phrase kills it!
974 col
->getCollationKey("Nombre De Objeto", key
, status
);
976 if (key
.isBogus() || U_FAILURE(status
))
978 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
983 /* HSYS : RuleBasedCollator::compare() performance enhancements
984 compare() does not create CollationElementIterator() anymore.*/
986 class My4146160Collator
: public RuleBasedCollator
989 My4146160Collator(RuleBasedCollator
&rbc
, UErrorCode
&status
);
990 ~My4146160Collator();
992 CollationElementIterator
*createCollationElementIterator(const UnicodeString
&text
) const;
994 CollationElementIterator
*createCollationElementIterator(const CharacterIterator
&text
) const;
996 static int32_t count
;
999 int32_t My4146160Collator::count
= 0;
1001 My4146160Collator::My4146160Collator(RuleBasedCollator
&rbc
, UErrorCode
&status
)
1002 : RuleBasedCollator(rbc
.getRules(), status
)
1006 My4146160Collator::~My4146160Collator()
1010 CollationElementIterator
*My4146160Collator::createCollationElementIterator(const UnicodeString
&text
) const
1013 return RuleBasedCollator::createCollationElementIterator(text
);
1016 CollationElementIterator
*My4146160Collator::createCollationElementIterator(const CharacterIterator
&text
) const
1019 return RuleBasedCollator::createCollationElementIterator(text
);
1024 // RuleBasedCollator doesn't use createCollationElementIterator internally
1026 void CollationRegressionTest::Test4146160(/* char* par */)
1030 // Use a custom collator class whose createCollationElementIterator
1031 // methods increment a count....
1033 UErrorCode status
= U_ZERO_ERROR
;
1036 My4146160Collator::count
= 0;
1037 My4146160Collator
*mc
= NULL
;
1039 mc
= new My4146160Collator(*en_us
, status
);
1041 if (mc
== NULL
|| U_FAILURE(status
))
1043 errln("Failed to create a My4146160Collator.");
1048 mc
->getCollationKey("1", key
, status
);
1050 if (key
.isBogus() || U_FAILURE(status
))
1052 errln("Failure to get a CollationKey from a My4146160Collator.");
1057 if (My4146160Collator::count
< 1)
1059 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1062 My4146160Collator::count
= 0;
1063 mc
->compare("1", "2");
1065 if (My4146160Collator::count
< 1)
1067 errln("My4146160Collator::createtCollationElementIterator not called for compare");
1074 void CollationRegressionTest::Test4179216() {
1075 // you can position a CollationElementIterator in the middle of
1076 // a contracting character sequence, yielding a bogus collation
1078 IcuTestErrorCode
errorCode(*this, "Test4179216");
1079 RuleBasedCollator
coll(en_us
->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode
);
1080 UnicodeString testText
= "church church catcatcher runcrunchynchy";
1081 CollationElementIterator
*iter
= coll
.createCollationElementIterator(testText
);
1083 // test that the "ch" combination works properly
1084 iter
->setOffset(4, errorCode
);
1085 int32_t elt4
= CollationElementIterator::primaryOrder(iter
->next(errorCode
));
1088 int32_t elt0
= CollationElementIterator::primaryOrder(iter
->next(errorCode
));
1090 iter
->setOffset(5, errorCode
);
1091 int32_t elt5
= CollationElementIterator::primaryOrder(iter
->next(errorCode
));
1093 // Compares and prints only 16-bit primary weights.
1094 if (elt4
!= elt0
|| elt5
!= elt0
) {
1095 errln("The collation elements at positions 0 (0x%04x), "
1096 "4 (0x%04x), and 5 (0x%04x) don't match.",
1100 // test that the "cat" combination works properly
1101 iter
->setOffset(14, errorCode
);
1102 int32_t elt14
= CollationElementIterator::primaryOrder(iter
->next(errorCode
));
1104 iter
->setOffset(15, errorCode
);
1105 int32_t elt15
= CollationElementIterator::primaryOrder(iter
->next(errorCode
));
1107 iter
->setOffset(16, errorCode
);
1108 int32_t elt16
= CollationElementIterator::primaryOrder(iter
->next(errorCode
));
1110 iter
->setOffset(17, errorCode
);
1111 int32_t elt17
= CollationElementIterator::primaryOrder(iter
->next(errorCode
));
1113 iter
->setOffset(18, errorCode
);
1114 int32_t elt18
= CollationElementIterator::primaryOrder(iter
->next(errorCode
));
1116 iter
->setOffset(19, errorCode
);
1117 int32_t elt19
= CollationElementIterator::primaryOrder(iter
->next(errorCode
));
1119 // Compares and prints only 16-bit primary weights.
1120 if (elt14
!= elt15
|| elt14
!= elt16
|| elt14
!= elt17
1121 || elt14
!= elt18
|| elt14
!= elt19
) {
1122 errln("\"cat\" elements don't match: elt14 = 0x%04x, "
1123 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
1124 "elt18 = 0x%04x, elt19 = 0x%04x",
1125 elt14
, elt15
, elt16
, elt17
, elt18
, elt19
);
1128 // now generate a complete list of the collation elements,
1129 // first using next() and then using setOffset(), and
1130 // make sure both interfaces return the same set of elements
1133 int32_t elt
= iter
->next(errorCode
);
1135 while (elt
!= CollationElementIterator::NULLORDER
) {
1137 elt
= iter
->next(errorCode
);
1140 LocalArray
<UnicodeString
> nextElements(new UnicodeString
[count
]);
1141 LocalArray
<UnicodeString
> setOffsetElements(new UnicodeString
[count
]);
1142 int32_t lastPos
= 0;
1145 elt
= iter
->next(errorCode
);
1147 while (elt
!= CollationElementIterator::NULLORDER
) {
1148 nextElements
[count
++] = testText
.tempSubStringBetween(lastPos
, iter
->getOffset());
1149 lastPos
= iter
->getOffset();
1150 elt
= iter
->next(errorCode
);
1152 int32_t nextElementsLength
= count
;
1154 for (int32_t i
= 0; i
< testText
.length(); ) {
1155 iter
->setOffset(i
, errorCode
);
1156 lastPos
= iter
->getOffset();
1157 elt
= iter
->next(errorCode
);
1158 setOffsetElements
[count
++] = testText
.tempSubStringBetween(lastPos
, iter
->getOffset());
1159 i
= iter
->getOffset();
1161 for (int32_t i
= 0; i
< nextElementsLength
; i
++) {
1162 if (nextElements
[i
] == setOffsetElements
[i
]) {
1163 logln(nextElements
[i
]);
1165 errln(UnicodeString("Error: next() yielded ") + nextElements
[i
] +
1166 ", but setOffset() yielded " + setOffsetElements
[i
]);
1174 // nextSortKeyPart incorrect for EO_S1 collation
1175 static int32_t calcKeyIncremental(UCollator
*coll
, const UChar
* text
, int32_t len
, uint8_t *keyBuf
, int32_t /*keyBufLen*/, UErrorCode
& status
) {
1176 UCharIterator uiter
;
1177 uint32_t state
[2] = { 0, 0 };
1181 uiter_setString(&uiter
, text
, len
);
1184 int32_t keyPartLen
= ucol_nextSortKeyPart(coll
, &uiter
, state
, &keyBuf
[keyLen
], count
, &status
);
1185 if (U_FAILURE(status
)) {
1188 if (keyPartLen
== 0) {
1191 keyLen
+= keyPartLen
;
1196 void CollationRegressionTest::TestT7189() {
1197 UErrorCode status
= U_ZERO_ERROR
;
1201 static const UChar text1
[][CollationRegressionTest::MAX_TOKEN_LEN
] = {
1202 // "Achter De Hoven"
1203 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1205 { 0x41, 0x42, 0x43, 0x00 },
1207 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1210 static const UChar text2
[][CollationRegressionTest::MAX_TOKEN_LEN
] = {
1211 // "Achter de Hoven"
1212 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1214 { 0x61, 0x62, 0x63, 0x00 },
1216 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1219 // Open the collator
1220 coll
= ucol_openFromShortString("EO_S1", FALSE
, NULL
, &status
);
1221 if (U_FAILURE(status
)) {
1222 errln("Failed to create a collator for short string EO_S1");
1226 for (i
= 0; i
< sizeof(text1
) / (CollationRegressionTest::MAX_TOKEN_LEN
* sizeof(UChar
)); i
++) {
1227 uint8_t key1
[100], key2
[100];
1230 len1
= calcKeyIncremental(coll
, text1
[i
], -1, key1
, sizeof(key1
), status
);
1231 if (U_FAILURE(status
)) {
1232 errln(UnicodeString("Failed to get a partial collation key for ") + text1
[i
]);
1235 len2
= calcKeyIncremental(coll
, text2
[i
], -1, key2
, sizeof(key2
), status
);
1236 if (U_FAILURE(status
)) {
1237 errln(UnicodeString("Failed to get a partial collation key for ") + text2
[i
]);
1241 if (len1
== len2
&& uprv_memcmp(key1
, key2
, len1
) == 0) {
1242 errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1
[i
] + "\n" + " text2: " + text2
[i
] + "\n" + " key : " + TestUtility::hex(key1
, len1
));
1244 logln(UnicodeString("Keys produced -\n") + " text1: " + text1
[i
] + "\n" + " key1 : " + TestUtility::hex(key1
, len1
) + "\n" + " text2: " + text2
[i
] + "\n" + " key2 : "
1245 + TestUtility::hex(key2
, len2
));
1251 void CollationRegressionTest::TestCaseFirstCompression() {
1252 RuleBasedCollator
*col
= (RuleBasedCollator
*) en_us
->clone();
1253 UErrorCode status
= U_ZERO_ERROR
;
1256 caseFirstCompressionSub(col
, "default");
1259 col
->setAttribute(UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, status
);
1260 if (U_FAILURE(status
)) {
1261 errln("Failed to set UCOL_UPPER_FIRST");
1264 caseFirstCompressionSub(col
, "upper first");
1267 col
->setAttribute(UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, status
);
1268 if (U_FAILURE(status
)) {
1269 errln("Failed to set UCOL_LOWER_FIRST");
1272 caseFirstCompressionSub(col
, "lower first");
1277 void CollationRegressionTest::caseFirstCompressionSub(Collator
*col
, UnicodeString opt
) {
1278 const int32_t maxLength
= 50;
1280 UChar str1
[maxLength
];
1281 UChar str2
[maxLength
];
1283 CollationKey key1
, key2
;
1285 for (int32_t len
= 1; len
<= maxLength
; len
++) {
1287 for (; i
< len
- 1; i
++) {
1288 str1
[i
] = str2
[i
] = (UChar
)0x61; // 'a'
1290 str1
[i
] = (UChar
)0x41; // 'A'
1291 str2
[i
] = (UChar
)0x61; // 'a'
1293 UErrorCode status
= U_ZERO_ERROR
;
1294 col
->getCollationKey(str1
, len
, key1
, status
);
1295 col
->getCollationKey(str2
, len
, key2
, status
);
1297 UCollationResult cmpKey
= key1
.compareTo(key2
, status
);
1298 UCollationResult cmpCol
= col
->compare(str1
, len
, str2
, len
, status
);
1300 if (U_FAILURE(status
)) {
1301 errln("Error in caseFirstCompressionSub");
1302 } else if (cmpKey
!= cmpCol
) {
1303 errln((UnicodeString
)"Inconsistent comparison(" + opt
1304 + "): str1=" + UnicodeString(str1
, len
) + ", str2=" + UnicodeString(str2
, len
)
1305 + ", cmpKey=" + cmpKey
+ ", cmpCol=" + cmpCol
);
1310 void CollationRegressionTest::TestTrailingComment() {
1311 // ICU ticket #8070:
1312 // Check that the rule parser handles a comment without terminating end-of-line.
1313 IcuTestErrorCode
errorCode(*this, "TestTrailingComment");
1314 RuleBasedCollator
coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode
);
1315 UnicodeString
a((UChar
)0x61), b((UChar
)0x62), c((UChar
)0x63);
1316 assertTrue("c<b", coll
.compare(c
, b
) < 0);
1317 assertTrue("b<a", coll
.compare(b
, a
) < 0);
1320 void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
1321 // ICU ticket #9959:
1322 // Forbid rules with a before-reset followed by a stronger relation.
1323 IcuTestErrorCode
errorCode(*this, "TestBeforeWithTooStrongAfter");
1324 RuleBasedCollator
before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode
);
1325 if(errorCode
.isSuccess()) {
1326 errln("should forbid before-2-reset followed by primary relation");
1330 RuleBasedCollator
before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode
);
1331 if(errorCode
.isSuccess()) {
1332 errln("should forbid before-3-reset followed by primary or secondary relation");
1338 void CollationRegressionTest::compareArray(Collator
&c
,
1339 const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
],
1343 Collator::EComparisonResult expectedResult
= Collator::EQUAL
;
1345 for (i
= 0; i
< testCount
; i
+= 3)
1347 UnicodeString
source(tests
[i
]);
1348 UnicodeString
comparison(tests
[i
+ 1]);
1349 UnicodeString
target(tests
[i
+ 2]);
1351 if (comparison
== "<")
1353 expectedResult
= Collator::LESS
;
1355 else if (comparison
== ">")
1357 expectedResult
= Collator::GREATER
;
1359 else if (comparison
== "=")
1361 expectedResult
= Collator::EQUAL
;
1365 UnicodeString
bogus1("Bogus comparison string \"");
1366 UnicodeString
bogus2("\"");
1367 errln(bogus1
+ comparison
+ bogus2
);
1370 Collator::EComparisonResult compareResult
= c
.compare(source
, target
);
1372 CollationKey sourceKey
, targetKey
;
1373 UErrorCode status
= U_ZERO_ERROR
;
1375 c
.getCollationKey(source
, sourceKey
, status
);
1377 if (U_FAILURE(status
))
1379 errln("Couldn't get collationKey for source");
1383 c
.getCollationKey(target
, targetKey
, status
);
1385 if (U_FAILURE(status
))
1387 errln("Couldn't get collationKey for target");
1391 Collator::EComparisonResult keyResult
= sourceKey
.compareTo(targetKey
);
1393 reportCResult( source
, target
, sourceKey
, targetKey
, compareResult
, keyResult
, compareResult
, expectedResult
);
1398 void CollationRegressionTest::assertEqual(CollationElementIterator
&i1
, CollationElementIterator
&i2
)
1400 int32_t c1
, c2
, count
= 0;
1401 UErrorCode status
= U_ZERO_ERROR
;
1405 c1
= i1
.next(status
);
1406 c2
= i2
.next(status
);
1410 UnicodeString msg
, msg1(" ");
1412 msg
+= msg1
+ count
;
1413 msg
+= ": strength(0x";
1414 appendHex(c1
, 8, msg
);
1415 msg
+= ") != strength(0x";
1416 appendHex(c2
, 8, msg
);
1425 while (c1
!= CollationElementIterator::NULLORDER
);
1428 void CollationRegressionTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* /* par */)
1432 logln("Collation Regression Tests: ");
1436 dataerrln("Class collator not instantiated");
1440 TESTCASE_AUTO_BEGIN
;
1441 TESTCASE_AUTO(Test4048446
);
1442 TESTCASE_AUTO(Test4051866
);
1443 TESTCASE_AUTO(Test4053636
);
1444 TESTCASE_AUTO(Test4054238
);
1445 TESTCASE_AUTO(Test4054734
);
1446 TESTCASE_AUTO(Test4054736
);
1447 TESTCASE_AUTO(Test4058613
);
1448 TESTCASE_AUTO(Test4059820
);
1449 TESTCASE_AUTO(Test4060154
);
1450 TESTCASE_AUTO(Test4062418
);
1451 TESTCASE_AUTO(Test4065540
);
1452 TESTCASE_AUTO(Test4066189
);
1453 TESTCASE_AUTO(Test4066696
);
1454 TESTCASE_AUTO(Test4076676
);
1455 TESTCASE_AUTO(Test4078588
);
1456 TESTCASE_AUTO(Test4079231
);
1457 TESTCASE_AUTO(Test4081866
);
1458 TESTCASE_AUTO(Test4087241
);
1459 TESTCASE_AUTO(Test4087243
);
1460 TESTCASE_AUTO(Test4092260
);
1461 TESTCASE_AUTO(Test4095316
);
1462 TESTCASE_AUTO(Test4101940
);
1463 TESTCASE_AUTO(Test4103436
);
1464 TESTCASE_AUTO(Test4114076
);
1465 TESTCASE_AUTO(Test4114077
);
1466 TESTCASE_AUTO(Test4124632
);
1467 TESTCASE_AUTO(Test4132736
);
1468 TESTCASE_AUTO(Test4133509
);
1469 TESTCASE_AUTO(Test4139572
);
1470 TESTCASE_AUTO(Test4141640
);
1471 TESTCASE_AUTO(Test4146160
);
1472 TESTCASE_AUTO(Test4179216
);
1473 TESTCASE_AUTO(TestT7189
);
1474 TESTCASE_AUTO(TestCaseFirstCompression
);
1475 TESTCASE_AUTO(TestTrailingComment
);
1476 TESTCASE_AUTO(TestBeforeWithTooStrongAfter
);
1480 #endif /* #if !UCONFIG_NO_COLLATION */