1 /********************************************************************
3 * Copyright (c) 1997-2003, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
7 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_COLLATION
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
18 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
20 CollationRegressionTest::CollationRegressionTest()
22 UErrorCode status
= U_ZERO_ERROR
;
24 en_us
= (RuleBasedCollator
*)Collator::createInstance(Locale::getUS(), status
);
25 if(U_FAILURE(status
)) {
28 errln("Collator creation failed with %s", u_errorName(status
));
33 CollationRegressionTest::~CollationRegressionTest()
41 // CollationElementIterator.reset() doesn't work
43 void CollationRegressionTest::Test4048446(/* char* par */)
45 const UnicodeString test1
= "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
46 const UnicodeString test2
= "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
47 CollationElementIterator
*i1
= en_us
->createCollationElementIterator(test1
);
48 CollationElementIterator
*i2
= en_us
->createCollationElementIterator(test1
);
49 UErrorCode status
= U_ZERO_ERROR
;
51 if (i1
== NULL
|| i2
== NULL
)
53 errln("Could not create CollationElementIterator's");
59 while (i1
->next(status
) != CollationElementIterator::NULLORDER
)
61 if (U_FAILURE(status
))
63 errln("error calling next()");
73 assertEqual(*i1
, *i2
);
81 // Collator -> rules -> Collator round-trip broken for expanding characters
83 void CollationRegressionTest::Test4051866(/* char* par */)
86 RuleBasedCollator c1 = new RuleBasedCollator("< o "
95 UErrorCode status
= U_ZERO_ERROR
;
99 rules
+= (UChar
)0x3080;
101 rules
+= (UChar
)0x1530;
104 rules
+= (UChar
)0x3080;
106 rules
+= (UChar
)0x1520;
109 // Build a collator containing expanding characters
110 RuleBasedCollator
*c1
= new RuleBasedCollator(rules
, status
);
112 // Build another using the rules from the first
113 RuleBasedCollator
*c2
= new RuleBasedCollator(c1
->getRules(), status
);
115 // Make sure they're the same
116 if (!(c1
->getRules() == c2
->getRules()))
118 errln("Rules are not equal");
127 // Collator thinks "black-bird" == "black"
129 void CollationRegressionTest::Test4053636(/* char* par */)
131 if (en_us
->equals("black_bird", "black"))
133 errln("black-bird == black");
139 // CollationElementIterator will not work correctly if the associated
140 // Collator object's mode is changed
142 void CollationRegressionTest::Test4054238(/* char* par */)
144 const UChar chars3
[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
145 const UnicodeString
test3(chars3
);
146 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
148 // NOTE: The Java code uses en_us to create the CollationElementIterators
149 // but I'm pretty sure that's wrong, so I've changed this to use c.
150 UErrorCode status
= U_ZERO_ERROR
;
151 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
152 CollationElementIterator
*i1
= c
->createCollationElementIterator(test3
);
159 // Collator::IDENTICAL documented but not implemented
161 void CollationRegressionTest::Test4054734(/* char* par */)
164 Here's the original Java:
167 "\u0001", "<", "\u0002",
168 "\u0001", "=", "\u0001",
169 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
170 "\u00C0", "=", "A\u0300" // Decomp should make these equal
173 String[] nodecomp = {
174 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
178 static const UChar decomp
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
180 {0x0001, 0}, {0x3c, 0}, {0x0002, 0},
181 {0x0001, 0}, {0x3d, 0}, {0x0001, 0},
182 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
183 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
187 UErrorCode status
= U_ZERO_ERROR
;
188 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
190 c
->setStrength(Collator::IDENTICAL
);
192 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
193 compareArray(*c
, decomp
, ARRAY_LENGTH(decomp
));
200 // Full Decomposition mode not implemented
202 void CollationRegressionTest::Test4054736(/* char* par */)
204 UErrorCode status
= U_ZERO_ERROR
;
205 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
207 c
->setStrength(Collator::SECONDARY
);
208 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
210 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
212 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed
215 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
222 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
224 void CollationRegressionTest::Test4058613(/* char* par */)
226 // Creating a default collator doesn't work when Korean is the default
229 Locale oldDefault
= Locale::getDefault();
230 UErrorCode status
= U_ZERO_ERROR
;
232 Locale::setDefault(Locale::getKorean(), status
);
234 if (U_FAILURE(status
))
236 errln("Could not set default locale to Locale::KOREAN");
242 c
= Collator::createInstance("en_US", status
);
244 if (c
== NULL
|| U_FAILURE(status
))
246 errln("Could not create a Korean collator");
247 Locale::setDefault(oldDefault
, status
);
252 // Since the fix to this bug was to turn off decomposition for Korean collators,
253 // ensure that's what we got
254 if (c
->getAttribute(UCOL_NORMALIZATION_MODE
, status
) != UCOL_OFF
)
256 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
261 Locale::setDefault(oldDefault
, status
);
266 // RuleBasedCollator.getRules does not return the exact pattern as input
267 // for expanding character sequences
269 void CollationRegressionTest::Test4059820(/* char* par */)
271 UErrorCode status
= U_ZERO_ERROR
;
273 RuleBasedCollator
*c
= NULL
;
274 UnicodeString rules
= "< a < b , c/a < d < z";
276 c
= new RuleBasedCollator(rules
, status
);
278 if (c
== NULL
|| U_FAILURE(status
))
280 errln("Failure building a collator.");
285 if ( c
->getRules().indexOf("c/a") == -1)
287 errln("returned rules do not contain 'c/a'");
295 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
297 void CollationRegressionTest::Test4060154(/* char* par */)
299 UErrorCode status
= U_ZERO_ERROR
;
302 rules
+= "< g, G < h, H < i, I < j, J";
304 rules
+= (UChar
)0x0131;
306 rules
+= (UChar
)0x0130;
309 RuleBasedCollator
*c
= NULL
;
311 c
= new RuleBasedCollator(rules
, status
);
313 if (c
== NULL
|| U_FAILURE(status
))
315 errln("failure building collator.");
320 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
323 String[] tertiary = {
327 "\u0131", "<", "\u0130",
333 static const UChar tertiary
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
335 {0x41, 0}, {0x3c, 0}, {0x42, 0},
336 {0x48, 0}, {0x3c, 0}, {0x0131, 0},
337 {0x48, 0}, {0x3c, 0}, {0x49, 0},
338 {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
339 {0x0130, 0}, {0x3c, 0}, {0x69, 0},
340 {0x0130, 0}, {0x3e, 0}, {0x48, 0}
343 c
->setStrength(Collator::TERTIARY
);
344 compareArray(*c
, tertiary
, ARRAY_LENGTH(tertiary
));
347 String[] secondary = {
349 "\u0131", "=", "\u0130",
352 static const UChar secondary
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
354 {0x48, 0}, {0x3c, 0}, {0x49, 0},
355 {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
358 c
->setStrength(Collator::PRIMARY
);
359 compareArray(*c
, secondary
, ARRAY_LENGTH(secondary
));
366 // Secondary/Tertiary comparison incorrect in French Secondary
368 void CollationRegressionTest::Test4062418(/* char* par */)
370 UErrorCode status
= U_ZERO_ERROR
;
372 RuleBasedCollator
*c
= NULL
;
374 c
= (RuleBasedCollator
*) Collator::createInstance(Locale::getFrance(), status
);
376 if (c
== NULL
|| U_FAILURE(status
))
378 errln("Failed to create collator for Locale::getFrance()");
383 c
->setStrength(Collator::SECONDARY
);
387 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater
390 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
392 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
395 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
402 // Collator::compare() method broken if either string contains spaces
404 void CollationRegressionTest::Test4065540(/* char* par */)
406 if (en_us
->compare("abcd e", "abcd f") == 0)
408 errln("'abcd e' == 'abcd f'");
414 // Unicode characters need to be recursively decomposed to get the
415 // correct result. For example,
416 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
418 void CollationRegressionTest::Test4066189(/* char* par */)
420 static const UChar chars1
[] = {0x1EB1, 0};
421 static const UChar chars2
[] = {0x61, 0x0306, 0x0300, 0};
422 const UnicodeString
test1(chars1
);
423 const UnicodeString
test2(chars2
);
424 UErrorCode status
= U_ZERO_ERROR
;
426 // NOTE: The java code used en_us to create the
427 // CollationElementIterator's. I'm pretty sure that
428 // was wrong, so I've change the code to use c1 and c2
429 RuleBasedCollator
*c1
= (RuleBasedCollator
*) en_us
->clone();
430 c1
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
431 CollationElementIterator
*i1
= c1
->createCollationElementIterator(test1
);
433 RuleBasedCollator
*c2
= (RuleBasedCollator
*) en_us
->clone();
434 c2
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, status
);
435 CollationElementIterator
*i2
= c2
->createCollationElementIterator(test2
);
437 assertEqual(*i1
, *i2
);
447 // French secondary collation checking at the end of compare iteration fails
449 void CollationRegressionTest::Test4066696(/* char* par */)
451 UErrorCode status
= U_ZERO_ERROR
;
452 RuleBasedCollator
*c
= NULL
;
454 c
= (RuleBasedCollator
*)Collator::createInstance(Locale::getFrance(), status
);
456 if (c
== NULL
|| U_FAILURE(status
))
458 errln("Failure creating collator for Locale::getFrance()");
463 c
->setStrength(Collator::SECONDARY
);
467 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute
473 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute
478 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
480 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
483 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
490 // Bad canonicalization of same-class combining characters
492 void CollationRegressionTest::Test4076676(/* char* par */)
494 // These combining characters are all in the same class, so they should not
495 // be reordered, and they should compare as unequal.
496 static const UChar s1
[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
497 static const UChar s2
[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
499 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
500 c
->setStrength(Collator::TERTIARY
);
502 if (c
->compare(s1
,s2
) == 0)
504 errln("Same-class combining chars were reordered");
512 // RuleBasedCollator::operator==(NULL) throws NullPointerException
514 void CollationRegressionTest::Test4079231(/* char* par */)
516 // I don't think there's any way to write this test
517 // in C++. The following is equivalent to the Java,
518 // but doesn't compile 'cause NULL can't be converted
521 // if (en_us->operator==(NULL))
523 // errln("en_us->operator==(NULL) returned TRUE");
528 if (en_us->equals(null)) {
529 errln("en_us->equals(null) returned true");
532 catch (Exception e) {
533 errln("en_us->equals(null) threw " + e.toString());
540 // RuleBasedCollator breaks on "< a < bb" rule
542 void CollationRegressionTest::Test4078588(/* char *par */)
544 UErrorCode status
= U_ZERO_ERROR
;
545 RuleBasedCollator
*rbc
= new RuleBasedCollator((UnicodeString
)"< a < bb", status
);
547 if (rbc
== NULL
|| U_FAILURE(status
))
549 errln("Failed to create RuleBasedCollator.");
554 Collator::EComparisonResult result
= rbc
->compare("a","bb");
556 if (result
!= Collator::LESS
)
558 errln((UnicodeString
)"Compare(a,bb) returned " + (int)result
559 + (UnicodeString
)"; expected -1");
567 // Combining characters in different classes not reordered properly.
569 void CollationRegressionTest::Test4081866(/* char* par */)
571 // These combining characters are all in different classes,
572 // so they should be reordered and the strings should compare as equal.
573 static const UChar s1
[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
574 static const UChar s2
[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
576 UErrorCode status
= U_ZERO_ERROR
;
577 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
578 c
->setStrength(Collator::TERTIARY
);
580 // Now that the default collators are set to NO_DECOMPOSITION
581 // (as a result of fixing bug 4114077), we must set it explicitly
582 // when we're testing reordering behavior. -- lwerner, 5/5/98
583 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
585 if (c
->compare(s1
,s2
) != 0)
587 errln("Combining chars were not reordered");
595 // string comparison errors in Scandinavian collators
597 void CollationRegressionTest::Test4087241(/* char* par */)
599 UErrorCode status
= U_ZERO_ERROR
;
600 Locale
da_DK("da", "DK");
601 RuleBasedCollator
*c
= NULL
;
603 c
= (RuleBasedCollator
*) Collator::createInstance(da_DK
, status
);
605 if (c
== NULL
|| U_FAILURE(status
))
607 errln("Failed to create collator for da_DK locale");
612 c
->setStrength(Collator::SECONDARY
);
614 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
616 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae
617 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-unlaut < a-ring
618 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut
621 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
628 // CollationKey takes ignorable strings into account when it shouldn't
630 void CollationRegressionTest::Test4087243(/* char* par */)
632 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
633 c
->setStrength(Collator::TERTIARY
);
635 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
637 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A
640 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
648 // Micro symbol and greek lowercase letter Mu should sort identically
650 void CollationRegressionTest::Test4092260(/* char* par */)
652 UErrorCode status
= U_ZERO_ERROR
;
656 c
= Collator::createInstance(el
, status
);
658 if (c
== NULL
|| U_FAILURE(status
))
660 errln("Failed to create collator for el locale.");
665 // These now have tertiary differences in UCA
666 c
->setAttribute(UCOL_STRENGTH
, UCOL_SECONDARY
, status
);
668 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
670 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
673 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
680 void CollationRegressionTest::Test4095316(/* char* par */)
682 UErrorCode status
= U_ZERO_ERROR
;
683 Locale
el_GR("el", "GR");
684 Collator
*c
= Collator::createInstance(el_GR
, status
);
686 if (c
== NULL
|| U_FAILURE(status
))
688 errln("Failed to create collator for el_GR locale");
692 // These now have tertiary differences in UCA
693 //c->setStrength(Collator::TERTIARY);
694 c
->setAttribute(UCOL_STRENGTH
, UCOL_SECONDARY
, status
);
696 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
698 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
701 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
708 void CollationRegressionTest::Test4101940(/* char* par */)
710 UErrorCode status
= U_ZERO_ERROR
;
711 RuleBasedCollator
*c
= NULL
;
712 UnicodeString rules
= "< a < b";
713 UnicodeString nothing
= "";
715 c
= new RuleBasedCollator(rules
, status
);
717 if (c
== NULL
|| U_FAILURE(status
))
719 errln("Failed to create RuleBasedCollator");
724 CollationElementIterator
*i
= c
->createCollationElementIterator(nothing
);
727 if (i
->next(status
) != CollationElementIterator::NULLORDER
)
729 errln("next did not return NULLORDER");
738 // Collator::compare not handling spaces properly
740 void CollationRegressionTest::Test4103436(/* char* par */)
742 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
743 c
->setStrength(Collator::TERTIARY
);
745 static const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
747 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
748 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
751 compareArray(*c
, tests
, ARRAY_LENGTH(tests
));
758 // Collation not Unicode conformant with Hangul syllables
760 void CollationRegressionTest::Test4114076(/* char* par */)
762 UErrorCode status
= U_ZERO_ERROR
;
763 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
764 c
->setStrength(Collator::TERTIARY
);
767 // With Canonical decomposition, Hangul syllables should get decomposed
768 // into Jamo, but Jamo characters should not be decomposed into
771 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
773 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
776 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
777 compareArray(*c
, test1
, ARRAY_LENGTH(test1
));
780 // *In earlier versions of Unicode, jamo characters like ksf
781 // had compatibility mappings to kf + sf. These mappings were
782 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
783 // That is, the following test is obsolete as of 2.1.9
785 //obsolete- // With Full decomposition, it should go all the way down to
786 //obsolete- // conjoining Jamo characters.
788 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
790 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
793 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT);
794 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2));
802 // Collator::getCollationKey was hanging on certain character sequences
804 void CollationRegressionTest::Test4124632(/* char* par */)
806 UErrorCode status
= U_ZERO_ERROR
;
807 Collator
*coll
= NULL
;
809 coll
= Collator::createInstance(Locale::getJapan(), status
);
811 if (coll
== NULL
|| U_FAILURE(status
))
813 errln("Failed to create collator for Locale::JAPAN");
818 static const UChar test
[] = {0x41, 0x0308, 0x62, 0x63, 0};
821 coll
->getCollationKey(test
, key
, status
);
823 if (key
.isBogus() || U_FAILURE(status
))
825 errln("CollationKey creation failed.");
833 // sort order of french words with multiple accents has errors
835 void CollationRegressionTest::Test4132736(/* char* par */)
837 UErrorCode status
= U_ZERO_ERROR
;
841 c
= Collator::createInstance(Locale::getFrance(), status
);
842 c
->setStrength(Collator::TERTIARY
);
844 if (c
== NULL
|| U_FAILURE(status
))
846 errln("Failed to create a collator for Locale::getFrance()");
851 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
853 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
854 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
857 compareArray(*c
, test1
, ARRAY_LENGTH(test1
));
864 // The sorting using java.text.CollationKey is not in the exact order
866 void CollationRegressionTest::Test4133509(/* char* par */)
868 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
870 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
871 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
872 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
875 compareArray(*en_us
, test1
, ARRAY_LENGTH(test1
));
880 // Collation with decomposition off doesn't work for Europe
882 void CollationRegressionTest::Test4114077(/* char* par */)
884 // Ensure that we get the same results with decomposition off
885 // as we do with it on....
887 UErrorCode status
= U_ZERO_ERROR
;
888 RuleBasedCollator
*c
= (RuleBasedCollator
*) en_us
->clone();
889 c
->setStrength(Collator::TERTIARY
);
891 static const UChar test1
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
893 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
894 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
895 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0},
896 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute
898 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal
901 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, status
);
902 compareArray(*c
, test1
, ARRAY_LENGTH(test1
));
904 static const UChar test2
[][CollationRegressionTest::MAX_TOKEN_LEN
] =
906 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal
909 c
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
910 compareArray(*c
, test2
, ARRAY_LENGTH(test2
));
917 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
919 void CollationRegressionTest::Test4141640(/* char* par */)
922 // Rather than just creating a Swedish collator, we might as well
923 // try to instantiate one for every locale available on the system
924 // in order to prevent this sort of bug from cropping up in the future
926 UErrorCode status
= U_ZERO_ERROR
;
927 int32_t i
, localeCount
;
928 const Locale
*locales
= Locale::getAvailableLocales(localeCount
);
930 for (i
= 0; i
< localeCount
; i
+= 1)
934 status
= U_ZERO_ERROR
;
935 c
= Collator::createInstance(locales
[i
], status
);
937 if (c
== NULL
|| U_FAILURE(status
))
939 UnicodeString msg
, localeName
;
941 msg
+= "Could not create collator for locale ";
942 msg
+= locales
[i
].getName();
953 // getCollationKey throws exception for spanish text
954 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
956 void CollationRegressionTest::Test4139572(/* char* par */)
959 // Code pasted straight from the bug report
960 // (and then translated to C++ ;-)
962 // create spanish locale and collator
963 UErrorCode status
= U_ZERO_ERROR
;
964 Locale
l("es", "es");
965 Collator
*col
= NULL
;
967 col
= Collator::createInstance(l
, status
);
969 if (col
== NULL
|| U_FAILURE(status
))
971 errln("Failed to create a collator for es_es locale.");
978 // this spanish phrase kills it!
979 col
->getCollationKey("Nombre De Objeto", key
, status
);
981 if (key
.isBogus() || U_FAILURE(status
))
983 errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
988 /* HSYS : RuleBasedCollator::compare() performance enhancements
989 compare() does not create CollationElementIterator() anymore.*/
991 class My4146160Collator
: public RuleBasedCollator
994 My4146160Collator(RuleBasedCollator
&rbc
, UErrorCode
&status
);
995 ~My4146160Collator();
997 CollationElementIterator
*createCollationElementIterator(const UnicodeString
&text
) const;
999 CollationElementIterator
*createCollationElementIterator(const CharacterIterator
&text
) const;
1001 static int32_t count
;
1004 int32_t My4146160Collator::count
= 0;
1006 My4146160Collator::My4146160Collator(RuleBasedCollator
&rbc
, UErrorCode
&status
)
1007 : RuleBasedCollator(rbc
.getRules(), status
)
1011 My4146160Collator::~My4146160Collator()
1015 CollationElementIterator
*My4146160Collator::createCollationElementIterator(const UnicodeString
&text
) const
1018 return RuleBasedCollator::createCollationElementIterator(text
);
1021 CollationElementIterator
*My4146160Collator::createCollationElementIterator(const CharacterIterator
&text
) const
1024 return RuleBasedCollator::createCollationElementIterator(text
);
1029 // RuleBasedCollator doesn't use createCollationElementIterator internally
1031 void CollationRegressionTest::Test4146160(/* char* par */)
1035 // Use a custom collator class whose createCollationElementIterator
1036 // methods increment a count....
1038 UErrorCode status
= U_ZERO_ERROR
;
1041 My4146160Collator::count
= 0;
1042 My4146160Collator
*mc
= NULL
;
1044 mc
= new My4146160Collator(*en_us
, status
);
1046 if (mc
== NULL
|| U_FAILURE(status
))
1048 errln("Failed to create a My4146160Collator.");
1053 mc
->getCollationKey("1", key
, status
);
1055 if (key
.isBogus() || U_FAILURE(status
))
1057 errln("Failure to get a CollationKey from a My4146160Collator.");
1062 if (My4146160Collator::count
< 1)
1064 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1067 My4146160Collator::count
= 0;
1068 mc
->compare("1", "2");
1070 if (My4146160Collator::count
< 1)
1072 errln("My4146160Collator::createtCollationElementIterator not called for compare");
1078 void CollationRegressionTest::compareArray(Collator
&c
,
1079 const UChar tests
[][CollationRegressionTest::MAX_TOKEN_LEN
],
1083 Collator::EComparisonResult expectedResult
= Collator::EQUAL
;
1085 for (i
= 0; i
< testCount
; i
+= 3)
1087 UnicodeString
source(tests
[i
]);
1088 UnicodeString
comparison(tests
[i
+ 1]);
1089 UnicodeString
target(tests
[i
+ 2]);
1091 if (comparison
== "<")
1093 expectedResult
= Collator::LESS
;
1095 else if (comparison
== ">")
1097 expectedResult
= Collator::GREATER
;
1099 else if (comparison
== "=")
1101 expectedResult
= Collator::EQUAL
;
1105 UnicodeString
bogus1("Bogus comparison string \"");
1106 UnicodeString
bogus2("\"");
1107 errln(bogus1
+ comparison
+ bogus2
);
1110 Collator::EComparisonResult compareResult
= c
.compare(source
, target
);
1112 CollationKey sourceKey
, targetKey
;
1113 UErrorCode status
= U_ZERO_ERROR
;
1115 c
.getCollationKey(source
, sourceKey
, status
);
1117 if (U_FAILURE(status
))
1119 errln("Couldn't get collationKey for source");
1123 c
.getCollationKey(target
, targetKey
, status
);
1125 if (U_FAILURE(status
))
1127 errln("Couldn't get collationKey for target");
1131 Collator::EComparisonResult keyResult
= sourceKey
.compareTo(targetKey
);
1133 reportCResult( source
, target
, sourceKey
, targetKey
, compareResult
, keyResult
, compareResult
, expectedResult
);
1138 void CollationRegressionTest::assertEqual(CollationElementIterator
&i1
, CollationElementIterator
&i2
)
1140 int32_t c1
, c2
, count
= 0;
1141 UErrorCode status
= U_ZERO_ERROR
;
1145 c1
= i1
.next(status
);
1146 c2
= i2
.next(status
);
1150 UnicodeString msg
, msg1(" ");
1152 msg
+= msg1
+ count
;
1153 msg
+= ": strength(0x";
1154 appendHex(c1
, 8, msg
);
1155 msg
+= ") != strength(0x";
1156 appendHex(c2
, 8, msg
);
1165 while (c1
!= CollationElementIterator::NULLORDER
);
1168 void CollationRegressionTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* /* par */)
1172 logln("Collation Regression Tests: ");
1178 case 0: name
= "Test4048446"; if (exec
) Test4048446(/* par */); break;
1179 case 1: name
= "Test4051866"; if (exec
) Test4051866(/* par */); break;
1180 case 2: name
= "Test4053636"; if (exec
) Test4053636(/* par */); break;
1181 case 3: name
= "Test4054238"; if (exec
) Test4054238(/* par */); break;
1182 case 4: name
= "Test4054734"; if (exec
) Test4054734(/* par */); break;
1183 case 5: name
= "Test4054736"; if (exec
) Test4054736(/* par */); break;
1184 case 6: name
= "Test4058613"; if (exec
) Test4058613(/* par */); break;
1185 case 7: name
= "Test4059820"; if (exec
) Test4059820(/* par */); break;
1186 case 8: name
= "Test4060154"; if (exec
) Test4060154(/* par */); break;
1187 case 9: name
= "Test4062418"; if (exec
) Test4062418(/* par */); break;
1188 case 10: name
= "Test4065540"; if (exec
) Test4065540(/* par */); break;
1189 case 11: name
= "Test4066189"; if (exec
) Test4066189(/* par */); break;
1190 case 12: name
= "Test4066696"; if (exec
) Test4066696(/* par */); break;
1191 case 13: name
= "Test4076676"; if (exec
) Test4076676(/* par */); break;
1192 case 14: name
= "Test4078588"; if (exec
) Test4078588(/* par */); break;
1193 case 15: name
= "Test4079231"; if (exec
) Test4079231(/* par */); break;
1194 case 16: name
= "Test4081866"; if (exec
) Test4081866(/* par */); break;
1195 case 17: name
= "Test4087241"; if (exec
) Test4087241(/* par */); break;
1196 case 18: name
= "Test4087243"; if (exec
) Test4087243(/* par */); break;
1197 case 19: name
= "Test4092260"; if (exec
) Test4092260(/* par */); break;
1198 case 20: name
= "Test4095316"; if (exec
) Test4095316(/* par */); break;
1199 case 21: name
= "Test4101940"; if (exec
) Test4101940(/* par */); break;
1200 case 22: name
= "Test4103436"; if (exec
) Test4103436(/* par */); break;
1201 case 23: name
= "Test4114076"; if (exec
) Test4114076(/* par */); break;
1202 case 24: name
= "Test4114077"; if (exec
) Test4114077(/* par */); break;
1203 case 25: name
= "Test4124632"; if (exec
) Test4124632(/* par */); break;
1204 case 26: name
= "Test4132736"; if (exec
) Test4132736(/* par */); break;
1205 case 27: name
= "Test4133509"; if (exec
) Test4133509(/* par */); break;
1206 case 28: name
= "Test4139572"; if (exec
) Test4139572(/* par */); break;
1207 case 29: name
= "Test4141640"; if (exec
) Test4141640(/* par */); break;
1208 case 30: name
= "Test4146160"; if (exec
) Test4146160(/* par */); break;
1209 default: name
= ""; break;
1212 errln("Class collator not instantiated");
1217 #endif /* #if !UCONFIG_NO_COLLATION */