1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 //===============================================================================
14 // Created by: Helena Shih
16 // Modification History:
18 // Date Name Description
19 // 2/5/97 aliu Added streamIn and streamOut methods. Added
20 // constructor which reads RuleBasedCollator object from
21 // a binary file. Added writeToFile method which streams
22 // RuleBasedCollator out to a binary file. The streamIn
23 // and streamOut methods use istream and ostream objects
25 // 6/30/97 helena Added tests for CollationElementIterator::setText, getOffset
26 // setOffset and DecompositionIterator::getOffset, setOffset.
27 // DecompositionIterator is made public so add class scope
29 // 02/10/98 damiba Added test for compare(UnicodeString&, UnicodeString&, int32_t)
30 //===============================================================================
32 #include "unicode/utypes.h"
34 #if !UCONFIG_NO_COLLATION
36 #include "unicode/localpointer.h"
37 #include "unicode/coll.h"
38 #include "unicode/tblcoll.h"
39 #include "unicode/coleitr.h"
40 #include "unicode/sortkey.h"
42 #include "unicode/chariter.h"
43 #include "unicode/schriter.h"
44 #include "unicode/strenum.h"
45 #include "unicode/ustring.h"
46 #include "unicode/ucol.h"
53 CollationAPITest::doAssert(UBool condition
, const char *message
)
56 errln(UnicodeString("ERROR : ") + message
);
60 // Collator Class Properties
61 // ctor, dtor, createInstance, compare, getStrength/setStrength
62 // getDecomposition/setDecomposition, getDisplayName
64 CollationAPITest::TestProperty(/* char* par */)
66 UErrorCode success
= U_ZERO_ERROR
;
69 * Expected version of the English collator.
70 * Currently, the major/minor version numbers change when the builder code
72 * number 2 is from the tailoring data version and
73 * number 3 is the UCA version.
74 * This changes with every UCA version change, and the expected value
75 * needs to be adjusted.
76 * Same in cintltst/capitst.c.
78 UVersionInfo currVersionArray
= {0x31, 0xC0, 0x05, 0x2A}; // from ICU 4.4/UCA 5.2
79 UVersionInfo versionArray
;
81 logln("The property tests begin : ");
82 logln("Test ctors : ");
83 col
= Collator::createInstance(Locale::getEnglish(), success
);
84 if (U_FAILURE(success
)){
85 errcheckln(success
, "English Collator creation failed. - %s", u_errorName(success
));
89 col
->getVersion(versionArray
);
90 // Check for a version greater than some value rather than equality
91 // so that we need not update the expected version each time.
92 if (uprv_memcmp(versionArray
, currVersionArray
, 4)<0) {
93 errln("Testing Collator::getVersion() - unexpected result: %02x.%02x.%02x.%02x",
94 versionArray
[0], versionArray
[1], versionArray
[2], versionArray
[3]);
96 logln("Collator::getVersion() result: %02x.%02x.%02x.%02x",
97 versionArray
[0], versionArray
[1], versionArray
[2], versionArray
[3]);
100 doAssert((col
->compare("ab", "abc") == Collator::LESS
), "ab < abc comparison failed");
101 doAssert((col
->compare("ab", "AB") == Collator::LESS
), "ab < AB comparison failed");
102 doAssert((col
->compare("blackbird", "black-bird") == Collator::GREATER
), "black-bird > blackbird comparison failed");
103 doAssert((col
->compare("black bird", "black-bird") == Collator::LESS
), "black bird > black-bird comparison failed");
104 doAssert((col
->compare("Hello", "hello") == Collator::GREATER
), "Hello > hello comparison failed");
105 doAssert((col
->compare("","",success
) == UCOL_EQUAL
), "Comparison between empty strings failed");
107 doAssert((col
->compareUTF8("\x61\x62\xc3\xa4", "\x61\x62\xc3\x9f", success
) == UCOL_LESS
), "ab a-umlaut < ab sharp-s UTF-8 comparison failed");
108 success
= U_ZERO_ERROR
;
110 UnicodeString abau
=UNICODE_STRING_SIMPLE("\\x61\\x62\\xe4").unescape();
111 UnicodeString abss
=UNICODE_STRING_SIMPLE("\\x61\\x62\\xdf").unescape();
112 UCharIterator abauIter
, abssIter
;
113 uiter_setReplaceable(&abauIter
, &abau
);
114 uiter_setReplaceable(&abssIter
, &abss
);
115 doAssert((col
->compare(abauIter
, abssIter
, success
) == UCOL_LESS
), "ab a-umlaut < ab sharp-s UCharIterator comparison failed");
116 success
= U_ZERO_ERROR
;
119 /*start of update [Bertrand A. D. 02/10/98]*/
120 doAssert((col
->compare("ab", "abc", 2) == Collator::EQUAL
), "ab = abc with length 2 comparison failed");
121 doAssert((col
->compare("ab", "AB", 2) == Collator::LESS
), "ab < AB with length 2 comparison failed");
122 doAssert((col
->compare("ab", "Aa", 1) == Collator::LESS
), "ab < Aa with length 1 comparison failed");
123 doAssert((col
->compare("ab", "Aa", 2) == Collator::GREATER
), "ab > Aa with length 2 comparison failed");
124 doAssert((col
->compare("black-bird", "blackbird", 5) == Collator::EQUAL
), "black-bird = blackbird with length of 5 comparison failed");
125 doAssert((col
->compare("black bird", "black-bird", 10) == Collator::LESS
), "black bird < black-bird with length 10 comparison failed");
126 doAssert((col
->compare("Hello", "hello", 5) == Collator::GREATER
), "Hello > hello with length 5 comparison failed");
127 /*end of update [Bertrand A. D. 02/10/98]*/
130 logln("Test ctors ends.");
131 logln("testing Collator::getStrength() method ...");
132 doAssert((col
->getStrength() == Collator::TERTIARY
), "collation object has the wrong strength");
133 doAssert((col
->getStrength() != Collator::PRIMARY
), "collation object's strength is primary difference");
136 logln("testing Collator::setStrength() method ...");
137 col
->setStrength(Collator::SECONDARY
);
138 doAssert((col
->getStrength() != Collator::TERTIARY
), "collation object's strength is secondary difference");
139 doAssert((col
->getStrength() != Collator::PRIMARY
), "collation object's strength is primary difference");
140 doAssert((col
->getStrength() == Collator::SECONDARY
), "collation object has the wrong strength");
144 logln("Get display name for the US English collation in German : ");
145 logln(Collator::getDisplayName(Locale::getUS(), Locale::getGerman(), name
));
146 doAssert((name
== UnicodeString("Englisch (Vereinigte Staaten)")), "getDisplayName failed");
148 logln("Get display name for the US English collation in English : ");
149 logln(Collator::getDisplayName(Locale::getUS(), Locale::getEnglish(), name
));
150 doAssert((name
== UnicodeString("English (United States)")), "getDisplayName failed");
152 // weiv : this test is bogus if we're running on any machine that has different default locale than English.
153 // Therefore, it is banned!
154 logln("Get display name for the US English in default locale language : ");
155 logln(Collator::getDisplayName(Locale::US
, name
));
156 doAssert((name
== UnicodeString("English (United States)")), "getDisplayName failed if this is an English machine");
159 RuleBasedCollator
*rcol
= (RuleBasedCollator
*)Collator::createInstance("da_DK",
161 if (U_FAILURE(success
)) {
162 errcheckln(success
, "Collator::createInstance(\"da_DK\") failed - %s", u_errorName(success
));
165 const UnicodeString
&daRules
= rcol
->getRules();
166 if(daRules
.isEmpty()) {
167 dataerrln("missing da_DK tailoring rule string");
169 doAssert(daRules
.indexOf("aa") >= 0, "da_DK rules do not contain 'aa'");
173 col
= Collator::createInstance(Locale::getFrench(), success
);
174 if (U_FAILURE(success
))
176 errln("Creating French collation failed.");
180 col
->setStrength(Collator::PRIMARY
);
181 logln("testing Collator::getStrength() method again ...");
182 doAssert((col
->getStrength() != Collator::TERTIARY
), "collation object has the wrong strength");
183 doAssert((col
->getStrength() == Collator::PRIMARY
), "collation object's strength is not primary difference");
185 logln("testing French Collator::setStrength() method ...");
186 col
->setStrength(Collator::TERTIARY
);
187 doAssert((col
->getStrength() == Collator::TERTIARY
), "collation object's strength is not tertiary difference");
188 doAssert((col
->getStrength() != Collator::PRIMARY
), "collation object's strength is primary difference");
189 doAssert((col
->getStrength() != Collator::SECONDARY
), "collation object's strength is secondary difference");
192 logln("Create junk collation: ");
193 Locale
abcd("ab", "CD", "");
194 success
= U_ZERO_ERROR
;
196 junk
= Collator::createInstance(abcd
, success
);
198 if (U_FAILURE(success
))
200 errln("Junk collation creation failed, should at least return default.");
204 doAssert(((RuleBasedCollator
*)junk
)->getRules().isEmpty(),
205 "The root collation should be returned for an unsupported language.");
206 Collator
*frCol
= Collator::createInstance(Locale::getCanadaFrench(), success
);
207 if (U_FAILURE(success
))
209 errln("Creating fr_CA collator failed.");
214 // If the default locale isn't French, the French and non-French collators
215 // should be different
216 if (frCol
->getLocale(ULOC_ACTUAL_LOCALE
, success
) != Locale::getCanadaFrench()) {
217 doAssert((*frCol
!= *junk
), "The junk is the same as the fr_CA collator.");
219 Collator
*aFrCol
= frCol
->clone();
220 doAssert((*frCol
== *aFrCol
), "The cloning of a fr_CA collator failed.");
221 logln("Collator property test ended.");
228 void CollationAPITest::TestKeywordValues() {
229 IcuTestErrorCode
errorCode(*this, "TestKeywordValues");
230 LocalPointer
<Collator
> col(Collator::createInstance(Locale::getEnglish(), errorCode
));
231 if (errorCode
.errIfFailureAndReset("English Collator creation failed")) {
235 LocalPointer
<StringEnumeration
> kwEnum(
236 col
->getKeywordValuesForLocale("collation", Locale::getEnglish(), TRUE
, errorCode
));
237 if (errorCode
.errIfFailureAndReset("Get Keyword Values for English Collator failed")) {
240 assertTrue("expect at least one collation tailoring for English", kwEnum
->count(errorCode
) > 0);
242 UBool hasStandard
= FALSE
;
243 while ((kw
= kwEnum
->next(NULL
, errorCode
)) != NULL
) {
244 if (strcmp(kw
, "standard") == 0) {
248 assertTrue("expect at least the 'standard' collation tailoring for English", hasStandard
);
252 CollationAPITest::TestRuleBasedColl()
254 RuleBasedCollator
*col1
, *col2
, *col3
, *col4
;
255 UErrorCode status
= U_ZERO_ERROR
;
257 UnicodeString
ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
258 UnicodeString
ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
260 col1
= new RuleBasedCollator(ruleset1
, status
);
261 if (U_FAILURE(status
)) {
262 errcheckln(status
, "RuleBased Collator creation failed. - %s", u_errorName(status
));
266 logln("PASS: RuleBased Collator creation passed\n");
269 status
= U_ZERO_ERROR
;
270 col2
= new RuleBasedCollator(ruleset2
, status
);
271 if (U_FAILURE(status
)) {
272 errln("RuleBased Collator creation failed.\n");
276 logln("PASS: RuleBased Collator creation passed\n");
279 status
= U_ZERO_ERROR
;
280 Locale
locale("aa", "AA");
281 col3
= (RuleBasedCollator
*)Collator::createInstance(locale
, status
);
282 if (U_FAILURE(status
)) {
283 errln("Fallback Collator creation failed.: %s\n");
287 logln("PASS: Fallback Collator creation passed\n");
291 status
= U_ZERO_ERROR
;
292 col3
= (RuleBasedCollator
*)Collator::createInstance(status
);
293 if (U_FAILURE(status
)) {
294 errln("Default Collator creation failed.: %s\n");
298 logln("PASS: Default Collator creation passed\n");
301 UnicodeString rule1
= col1
->getRules();
302 UnicodeString rule2
= col2
->getRules();
303 UnicodeString rule3
= col3
->getRules();
305 doAssert(rule1
!= rule2
, "Default collator getRules failed");
306 doAssert(rule2
!= rule3
, "Default collator getRules failed");
307 doAssert(rule1
!= rule3
, "Default collator getRules failed");
309 col4
= new RuleBasedCollator(rule2
, status
);
310 if (U_FAILURE(status
)) {
311 errln("RuleBased Collator creation failed.\n");
315 UnicodeString rule4
= col4
->getRules();
316 doAssert(rule2
== rule4
, "Default collator getRules failed");
318 uint8_t *clonedrule4
= col4
->cloneRuleData(length4
, status
);
319 if (U_FAILURE(status
)) {
320 errln("Cloned rule data failed.\n");
324 // free(clonedrule4); BAD API!!!!
325 uprv_free(clonedrule4
);
335 CollationAPITest::TestRules()
337 RuleBasedCollator
*coll
;
338 UErrorCode status
= U_ZERO_ERROR
;
341 coll
= (RuleBasedCollator
*)Collator::createInstance(Locale::getEnglish(), status
);
342 if (U_FAILURE(status
)) {
343 errcheckln(status
, "English Collator creation failed. - %s", u_errorName(status
));
347 logln("PASS: RuleBased Collator creation passed\n");
350 coll
->getRules(UCOL_TAILORING_ONLY
, rules
);
351 if (rules
.length() != 0x00) {
352 errln("English tailored rules failed - length is 0x%x expected 0x%x", rules
.length(), 0x00);
355 coll
->getRules(UCOL_FULL_RULES
, rules
);
356 if (rules
.length() < 0) {
357 errln("English full rules failed");
363 CollationAPITest::TestDecomposition() {
364 UErrorCode status
= U_ZERO_ERROR
;
365 Collator
*en_US
= Collator::createInstance("en_US", status
),
366 *el_GR
= Collator::createInstance("el_GR", status
),
367 *vi_VN
= Collator::createInstance("vi_VN", status
);
369 if (U_FAILURE(status
)) {
370 errcheckln(status
, "ERROR: collation creation failed. - %s", u_errorName(status
));
374 /* there is no reason to have canonical decomposition in en_US OR default locale */
375 if (vi_VN
->getAttribute(UCOL_NORMALIZATION_MODE
, status
) != UCOL_ON
)
377 errln("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
380 if (el_GR
->getAttribute(UCOL_NORMALIZATION_MODE
, status
) != UCOL_ON
)
382 errln("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
385 if (en_US
->getAttribute(UCOL_NORMALIZATION_MODE
, status
) != UCOL_OFF
)
387 errln("ERROR: en_US collation had canonical decomposition for normalization!\n");
396 CollationAPITest::TestSafeClone() {
397 static const int CLONETEST_COLLATOR_COUNT
= 3;
398 Collator
*someCollators
[CLONETEST_COLLATOR_COUNT
];
400 UErrorCode err
= U_ZERO_ERROR
;
403 UnicodeString
test1("abCda");
404 UnicodeString
test2("abcda");
406 /* one default collator & two complex ones */
407 someCollators
[0] = Collator::createInstance("en_US", err
);
408 someCollators
[1] = Collator::createInstance("ko", err
);
409 someCollators
[2] = Collator::createInstance("ja_JP", err
);
411 errcheckln(err
, "Couldn't instantiate collators. Error: %s", u_errorName(err
));
412 delete someCollators
[0];
413 delete someCollators
[1];
414 delete someCollators
[2];
418 /* change orig & clone & make sure they are independent */
420 for (index
= 0; index
< CLONETEST_COLLATOR_COUNT
; index
++)
422 col
= someCollators
[index
]->safeClone();
424 errln("SafeClone of collator should not return null\n");
427 col
->setStrength(Collator::TERTIARY
);
428 someCollators
[index
]->setStrength(Collator::PRIMARY
);
429 col
->setAttribute(UCOL_CASE_LEVEL
, UCOL_OFF
, err
);
430 someCollators
[index
]->setAttribute(UCOL_CASE_LEVEL
, UCOL_OFF
, err
);
432 doAssert(col
->greater(test1
, test2
), "Result should be \"abCda\" >>> \"abcda\" ");
433 doAssert(someCollators
[index
]->equals(test1
, test2
), "Result should be \"abcda\" == \"abCda\"");
435 delete someCollators
[index
];
440 CollationAPITest::TestHashCode(/* char* par */)
442 logln("hashCode tests begin.");
443 UErrorCode success
= U_ZERO_ERROR
;
445 col1
= Collator::createInstance(Locale::getEnglish(), success
);
446 if (U_FAILURE(success
))
448 errcheckln(success
, "Default collation creation failed. - %s", u_errorName(success
));
453 Locale
dk("da", "DK", "");
454 col2
= Collator::createInstance(dk
, success
);
455 if (U_FAILURE(success
))
457 errln("Danish collation creation failed.");
462 col3
= Collator::createInstance(Locale::getEnglish(), success
);
463 if (U_FAILURE(success
))
465 errln("2nd default collation creation failed.");
469 logln("Collator::hashCode() testing ...");
471 doAssert(col1
->hashCode() != col2
->hashCode(), "Hash test1 result incorrect" );
472 doAssert(!(col1
->hashCode() == col2
->hashCode()), "Hash test2 result incorrect" );
473 doAssert(col1
->hashCode() == col3
->hashCode(), "Hash result not equal" );
475 logln("hashCode tests end.");
479 UnicodeString
test1("Abcda");
480 UnicodeString
test2("abcda");
482 CollationKey sortk1
, sortk2
, sortk3
;
483 UErrorCode status
= U_ZERO_ERROR
;
485 col3
->getCollationKey(test1
, sortk1
, status
);
486 col3
->getCollationKey(test2
, sortk2
, status
);
487 col3
->getCollationKey(test2
, sortk3
, status
);
489 doAssert(sortk1
.hashCode() != sortk2
.hashCode(), "Hash test1 result incorrect");
490 doAssert(sortk2
.hashCode() == sortk3
.hashCode(), "Hash result not equal" );
495 //----------------------------------------------------------------------------
496 // CollationKey -- Tests the CollationKey methods
499 CollationAPITest::TestCollationKey(/* char* par */)
501 logln("testing CollationKey begins...");
503 UErrorCode success
=U_ZERO_ERROR
;
504 col
= Collator::createInstance(Locale::getEnglish(), success
);
505 if (U_FAILURE(success
))
507 errcheckln(success
, "Default collation creation failed. - %s", u_errorName(success
));
510 col
->setStrength(Collator::TERTIARY
);
512 CollationKey sortk1
, sortk2
;
513 UnicodeString
test1("Abcda"), test2("abcda");
514 UErrorCode key1Status
= U_ZERO_ERROR
, key2Status
= U_ZERO_ERROR
;
516 logln("Testing weird arguments");
517 // No string vs. empty string vs. completely-ignorable string:
518 // See ICU ticket #10495.
519 CollationKey sortkNone
;
521 sortkNone
.getByteArray(length
);
522 doAssert(!sortkNone
.isBogus() && length
== 0,
523 "Default-constructed collation key should be empty");
524 CollationKey sortkEmpty
;
525 col
->getCollationKey(NULL
, 0, sortkEmpty
, key1Status
);
526 // key gets reset here
527 const uint8_t* byteArrayEmpty
= sortkEmpty
.getByteArray(length
);
528 doAssert(sortkEmpty
.isBogus() == FALSE
&& length
== 3 &&
529 byteArrayEmpty
[0] == 1 && byteArrayEmpty
[1] == 1 && byteArrayEmpty
[2] == 0,
530 "Empty string should return a collation key with empty levels");
531 doAssert(sortkNone
.compareTo(sortkEmpty
) == Collator::LESS
,
532 "Expected no collation key < collation key for empty string");
533 doAssert(sortkEmpty
.compareTo(sortkNone
) == Collator::GREATER
,
534 "Expected collation key for empty string > no collation key");
536 CollationKey sortkIgnorable
;
537 // Most control codes and CGJ are completely ignorable.
538 // A string with only completely ignorables must compare equal to an empty string.
539 col
->getCollationKey(UnicodeString((UChar
)1).append((UChar
)0x34f), sortkIgnorable
, key1Status
);
540 sortkIgnorable
.getByteArray(length
);
541 doAssert(!sortkIgnorable
.isBogus() && length
== 3,
542 "Completely ignorable string should return a collation key with empty levels");
543 doAssert(sortkIgnorable
.compareTo(sortkEmpty
) == Collator::EQUAL
,
544 "Completely ignorable string should compare equal to empty string");
546 // bogus key returned here
547 key1Status
= U_ILLEGAL_ARGUMENT_ERROR
;
548 col
->getCollationKey(NULL
, 0, sortk1
, key1Status
);
549 doAssert(sortk1
.isBogus() && (sortk1
.getByteArray(length
), length
) == 0,
550 "Error code should return bogus collation key");
552 key1Status
= U_ZERO_ERROR
;
553 logln("Use tertiary comparison level testing ....");
555 col
->getCollationKey(test1
, sortk1
, key1Status
);
556 if (U_FAILURE(key1Status
)) {
557 errln("getCollationKey(Abcda) failed - %s", u_errorName(key1Status
));
560 doAssert((sortk1
.compareTo(col
->getCollationKey(test2
, sortk2
, key2Status
)))
561 == Collator::GREATER
,
562 "Result should be \"Abcda\" >>> \"abcda\"");
564 CollationKey
sortk3(sortk2
), sortkNew
;
567 doAssert((sortk1
!= sortk2
), "The sort keys should be different");
568 doAssert((sortk1
.hashCode() != sortk2
.hashCode()), "sort key hashCode() failed");
569 doAssert((sortk2
== sortk3
), "The sort keys should be the same");
570 doAssert((sortk1
== sortkNew
), "The sort keys assignment failed");
571 doAssert((sortk1
.hashCode() == sortkNew
.hashCode()), "sort key hashCode() failed");
572 doAssert((sortkNew
!= sortk3
), "The sort keys should be different");
573 doAssert(sortk1
.compareTo(sortk3
) == Collator::GREATER
, "Result should be \"Abcda\" >>> \"abcda\"");
574 doAssert(sortk2
.compareTo(sortk3
) == Collator::EQUAL
, "Result should be \"abcda\" == \"abcda\"");
575 doAssert(sortkEmpty
.compareTo(sortk1
) == Collator::LESS
, "Result should be (empty key) <<< \"Abcda\"");
576 doAssert(sortk1
.compareTo(sortkEmpty
) == Collator::GREATER
, "Result should be \"Abcda\" >>> (empty key)");
577 doAssert(sortkEmpty
.compareTo(sortkEmpty
) == Collator::EQUAL
, "Result should be (empty key) == (empty key)");
578 doAssert(sortk1
.compareTo(sortk3
, success
) == UCOL_GREATER
, "Result should be \"Abcda\" >>> \"abcda\"");
579 doAssert(sortk2
.compareTo(sortk3
, success
) == UCOL_EQUAL
, "Result should be \"abcda\" == \"abcda\"");
580 doAssert(sortkEmpty
.compareTo(sortk1
, success
) == UCOL_LESS
, "Result should be (empty key) <<< \"Abcda\"");
581 doAssert(sortk1
.compareTo(sortkEmpty
, success
) == UCOL_GREATER
, "Result should be \"Abcda\" >>> (empty key)");
582 doAssert(sortkEmpty
.compareTo(sortkEmpty
, success
) == UCOL_EQUAL
, "Result should be (empty key) == (empty key)");
584 int32_t cnt1
, cnt2
, cnt3
, cnt4
;
586 const uint8_t* byteArray1
= sortk1
.getByteArray(cnt1
);
587 const uint8_t* byteArray2
= sortk2
.getByteArray(cnt2
);
589 const uint8_t* byteArray3
= 0;
590 byteArray3
= sortk1
.getByteArray(cnt3
);
592 const uint8_t* byteArray4
= 0;
593 byteArray4
= sortk2
.getByteArray(cnt4
);
595 CollationKey
sortk4(byteArray1
, cnt1
), sortk5(byteArray2
, cnt2
);
596 CollationKey
sortk6(byteArray3
, cnt3
), sortk7(byteArray4
, cnt4
);
598 doAssert(sortk1
.compareTo(sortk4
) == Collator::EQUAL
, "CollationKey::toByteArray(sortk1) Failed.");
599 doAssert(sortk2
.compareTo(sortk5
) == Collator::EQUAL
, "CollationKey::toByteArray(sortk2) Failed.");
600 doAssert(sortk4
.compareTo(sortk5
) == Collator::GREATER
, "sortk4 >>> sortk5 Failed");
601 doAssert(sortk1
.compareTo(sortk6
) == Collator::EQUAL
, "CollationKey::getByteArray(sortk1) Failed.");
602 doAssert(sortk2
.compareTo(sortk7
) == Collator::EQUAL
, "CollationKey::getByteArray(sortk2) Failed.");
603 doAssert(sortk6
.compareTo(sortk7
) == Collator::GREATER
, "sortk6 >>> sortk7 Failed");
605 logln("Equality tests : ");
606 doAssert(sortk1
== sortk4
, "sortk1 == sortk4 Failed.");
607 doAssert(sortk2
== sortk5
, "sortk2 == sortk5 Failed.");
608 doAssert(sortk1
!= sortk5
, "sortk1 != sortk5 Failed.");
609 doAssert(sortk1
== sortk6
, "sortk1 == sortk6 Failed.");
610 doAssert(sortk2
== sortk7
, "sortk2 == sortk7 Failed.");
611 doAssert(sortk1
!= sortk7
, "sortk1 != sortk7 Failed.");
617 doAssert(sortk1
== sortk3
, "sortk1 = sortk3 assignment Failed.");
618 doAssert(sortk2
!= sortk3
, "sortk2 != sortk3 Failed.");
619 logln("testing sortkey ends...");
621 col
->setStrength(Collator::SECONDARY
);
622 doAssert(col
->getCollationKey(test1
, sortk1
, key1Status
).compareTo(
623 col
->getCollationKey(test2
, sortk2
, key2Status
))
625 "Result should be \"Abcda\" == \"abcda\"");
629 //----------------------------------------------------------------------------
630 // Tests the CollatorElementIterator class.
631 // ctor, RuleBasedCollator::createCollationElementIterator(), operator==, operator!=
634 CollationAPITest::TestElemIter(/* char* par */)
636 logln("testing sortkey begins...");
638 UErrorCode success
= U_ZERO_ERROR
;
639 col
= Collator::createInstance(Locale::getEnglish(), success
);
640 if (U_FAILURE(success
))
642 errcheckln(success
, "Default collation creation failed. - %s", u_errorName(success
));
646 UnicodeString
testString1("XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
647 UnicodeString
testString2("Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
648 logln("Constructors and comparison testing....");
649 CollationElementIterator
*iterator1
= ((RuleBasedCollator
*)col
)->createCollationElementIterator(testString1
);
651 CharacterIterator
*chariter
=new StringCharacterIterator(testString1
);
652 CollationElementIterator
*coliter
=((RuleBasedCollator
*)col
)->createCollationElementIterator(*chariter
);
655 CollationElementIterator
*iterator2
= ((RuleBasedCollator
*)col
)->createCollationElementIterator(testString1
);
656 CollationElementIterator
*iterator3
= ((RuleBasedCollator
*)col
)->createCollationElementIterator(testString2
);
658 int32_t offset
= iterator1
->getOffset();
660 errln("Error in getOffset for collation element iterator\n");
663 iterator1
->setOffset(6, success
);
664 if (U_FAILURE(success
)) {
665 errln("Error in setOffset for collation element iterator\n");
668 iterator1
->setOffset(0, success
);
669 int32_t order1
, order2
, order3
;
670 doAssert((*iterator1
== *iterator2
), "The two iterators should be the same");
671 doAssert((*iterator1
!= *iterator3
), "The two iterators should be different");
673 doAssert((*coliter
== *iterator1
), "The two iterators should be the same");
674 doAssert((*coliter
== *iterator2
), "The two iterators should be the same");
675 doAssert((*coliter
!= *iterator3
), "The two iterators should be different");
677 order1
= iterator1
->next(success
);
678 if (U_FAILURE(success
))
680 errln("Somehow ran out of memory stepping through the iterator.");
684 doAssert((*iterator1
!= *iterator2
), "The first iterator advance failed");
685 order2
= iterator2
->getOffset();
686 doAssert((order1
!= order2
), "The order result should not be the same");
687 order2
= iterator2
->next(success
);
688 if (U_FAILURE(success
))
690 errln("Somehow ran out of memory stepping through the iterator.");
694 doAssert((*iterator1
== *iterator2
), "The second iterator advance failed");
695 doAssert((order1
== order2
), "The order result should be the same");
696 order3
= iterator3
->next(success
);
697 if (U_FAILURE(success
))
699 errln("Somehow ran out of memory stepping through the iterator.");
703 doAssert((CollationElementIterator::primaryOrder(order1
) ==
704 CollationElementIterator::primaryOrder(order3
)), "The primary orders should be the same");
705 doAssert((CollationElementIterator::secondaryOrder(order1
) ==
706 CollationElementIterator::secondaryOrder(order3
)), "The secondary orders should be the same");
707 doAssert((CollationElementIterator::tertiaryOrder(order1
) ==
708 CollationElementIterator::tertiaryOrder(order3
)), "The tertiary orders should be the same");
710 order1
= iterator1
->next(success
); order3
= iterator3
->next(success
);
711 if (U_FAILURE(success
))
713 errln("Somehow ran out of memory stepping through the iterator.");
717 doAssert((CollationElementIterator::primaryOrder(order1
) ==
718 CollationElementIterator::primaryOrder(order3
)), "The primary orders should be identical");
719 doAssert((CollationElementIterator::tertiaryOrder(order1
) !=
720 CollationElementIterator::tertiaryOrder(order3
)), "The tertiary orders should be different");
722 order1
= iterator1
->next(success
);
723 order3
= iterator3
->next(success
);
724 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
726 doAssert((CollationElementIterator::secondaryOrder(order1) !=
727 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
729 doAssert((order1
!= CollationElementIterator::NULLORDER
), "Unexpected end of iterator reached");
731 iterator1
->reset(); iterator2
->reset(); iterator3
->reset();
732 order1
= iterator1
->next(success
);
733 if (U_FAILURE(success
))
735 errln("Somehow ran out of memory stepping through the iterator.");
739 doAssert((*iterator1
!= *iterator2
), "The first iterator advance failed");
741 order2
= iterator2
->next(success
);
742 if (U_FAILURE(success
))
744 errln("Somehow ran out of memory stepping through the iterator.");
748 doAssert((*iterator1
== *iterator2
), "The second iterator advance failed");
749 doAssert((order1
== order2
), "The order result should be the same");
751 order3
= iterator3
->next(success
);
752 if (U_FAILURE(success
))
754 errln("Somehow ran out of memory stepping through the iterator.");
758 doAssert((CollationElementIterator::primaryOrder(order1
) ==
759 CollationElementIterator::primaryOrder(order3
)), "The primary orders should be the same");
760 doAssert((CollationElementIterator::secondaryOrder(order1
) ==
761 CollationElementIterator::secondaryOrder(order3
)), "The secondary orders should be the same");
762 doAssert((CollationElementIterator::tertiaryOrder(order1
) ==
763 CollationElementIterator::tertiaryOrder(order3
)), "The tertiary orders should be the same");
765 order1
= iterator1
->next(success
); order2
= iterator2
->next(success
); order3
= iterator3
->next(success
);
766 if (U_FAILURE(success
))
768 errln("Somehow ran out of memory stepping through the iterator.");
772 doAssert((CollationElementIterator::primaryOrder(order1
) ==
773 CollationElementIterator::primaryOrder(order3
)), "The primary orders should be identical");
774 doAssert((CollationElementIterator::tertiaryOrder(order1
) !=
775 CollationElementIterator::tertiaryOrder(order3
)), "The tertiary orders should be different");
777 order1
= iterator1
->next(success
); order3
= iterator3
->next(success
);
778 if (U_FAILURE(success
))
780 errln("Somehow ran out of memory stepping through the iterator.");
784 /* NO! Secondary orders of two CEs are not related, especially in the case of '_' vs 'I' */
786 doAssert((CollationElementIterator::secondaryOrder(order1) !=
787 CollationElementIterator::secondaryOrder(order3)), "The secondary orders should not be the same");
789 doAssert((order1
!= CollationElementIterator::NULLORDER
), "Unexpected end of iterator reached");
790 doAssert((*iterator2
!= *iterator3
), "The iterators should be different");
794 success
=U_UNSUPPORTED_ERROR
;
795 Collator
*colerror
=NULL
;
796 colerror
=Collator::createInstance(Locale::getEnglish(), success
);
797 if (colerror
!= 0 || success
== U_ZERO_ERROR
){
798 errln("Error: createInstance(UErrorCode != U_ZERO_ERROR) should just return and not create an instance\n");
800 int32_t position
=coliter
->previous(success
);
801 if(position
!= CollationElementIterator::NULLORDER
){
802 errln((UnicodeString
)"Expected NULLORDER got" + position
);
805 coliter
->setText(*chariter
, success
);
806 if(!U_FAILURE(success
)){
807 errln("Expeceted error");
809 iterator1
->setText((UnicodeString
)"hello there", success
);
810 if(!U_FAILURE(success
)){
811 errln("Expeceted error");
823 logln("testing CollationElementIterator ends...");
826 // Test RuleBasedCollator ctor, dtor, operator==, operator!=, clone, copy, and getRules
828 CollationAPITest::TestOperators(/* char* par */)
830 UErrorCode success
= U_ZERO_ERROR
;
831 UnicodeString
ruleset1("&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
832 UnicodeString
ruleset2("&9 < a, A < b, B < c, C < d, D, e, E");
833 RuleBasedCollator
*col1
= new RuleBasedCollator(ruleset1
, success
);
834 if (U_FAILURE(success
)) {
835 errcheckln(success
, "RuleBasedCollator creation failed. - %s", u_errorName(success
));
838 success
= U_ZERO_ERROR
;
839 RuleBasedCollator
*col2
= new RuleBasedCollator(ruleset2
, success
);
840 if (U_FAILURE(success
)) {
841 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set.");
844 logln("The operator tests begin : ");
845 logln("testing operator==, operator!=, clone methods ...");
846 doAssert((*col1
!= *col2
), "The two different table collations compared equal");
848 doAssert((*col1
== *col2
), "Collator objects not equal after assignment (operator=)");
850 success
= U_ZERO_ERROR
;
851 Collator
*col3
= Collator::createInstance(Locale::getEnglish(), success
);
852 if (U_FAILURE(success
)) {
853 errln("Default collation creation failed.");
856 doAssert((*col1
!= *col3
), "The two different table collations compared equal");
857 Collator
* col4
= col1
->clone();
858 Collator
* col5
= col3
->clone();
859 doAssert((*col1
== *col4
), "Cloned collation objects not equal");
860 doAssert((*col3
!= *col4
), "Two different table collations compared equal");
861 doAssert((*col3
== *col5
), "Cloned collation objects not equal");
862 doAssert((*col4
!= *col5
), "Two cloned collations compared equal");
864 const UnicodeString
& defRules
= ((RuleBasedCollator
*)col3
)->getRules();
865 RuleBasedCollator
* col6
= new RuleBasedCollator(defRules
, success
);
866 if (U_FAILURE(success
)) {
867 errln("Creating default collation with rules failed.");
870 doAssert((((RuleBasedCollator
*)col3
)->getRules() == col6
->getRules()), "Default collator getRules failed");
872 success
= U_ZERO_ERROR
;
873 RuleBasedCollator
*col7
= new RuleBasedCollator(ruleset2
, Collator::TERTIARY
, success
);
874 if (U_FAILURE(success
)) {
875 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength.");
878 success
= U_ZERO_ERROR
;
879 RuleBasedCollator
*col8
= new RuleBasedCollator(ruleset2
, UCOL_OFF
, success
);
880 if (U_FAILURE(success
)) {
881 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with Normalizer::NO_OP.");
884 success
= U_ZERO_ERROR
;
885 RuleBasedCollator
*col9
= new RuleBasedCollator(ruleset2
, Collator::PRIMARY
, UCOL_ON
, success
);
886 if (U_FAILURE(success
)) {
887 errln("The RuleBasedCollator constructor failed when building with the 2nd rule set with tertiary strength and Normalizer::NO_OP.");
890 // doAssert((*col7 == *col8), "The two equal table collations compared different");
891 doAssert((*col7
!= *col9
), "The two different table collations compared equal");
892 doAssert((*col8
!= *col9
), "The two different table collations compared equal");
894 logln("operator tests ended.");
906 // test clone and copy
908 CollationAPITest::TestDuplicate(/* char* par */)
910 UErrorCode status
= U_ZERO_ERROR
;
911 Collator
*col1
= Collator::createInstance(Locale::getEnglish(), status
);
912 if (U_FAILURE(status
)) {
913 logln("Default collator creation failed.");
916 Collator
*col2
= col1
->clone();
917 doAssert((*col1
== *col2
), "Cloned object is not equal to the orginal");
918 UnicodeString
ruleset("&9 < a, A < b, B < c, C < d, D, e, E");
919 RuleBasedCollator
*col3
= new RuleBasedCollator(ruleset
, status
);
920 if (U_FAILURE(status
)) {
921 logln("Collation tailoring failed.");
924 doAssert((*col1
!= *col3
), "Cloned object is equal to some dummy");
925 *col3
= *((RuleBasedCollator
*)col1
);
926 doAssert((*col1
== *col3
), "Copied object is not equal to the orginal");
928 UCollationResult res
;
929 UnicodeString
first((UChar
)0x0061);
930 UnicodeString
second((UChar
)0x0062);
931 UnicodeString
copiedEnglishRules(((RuleBasedCollator
*)col1
)->getRules());
935 // Try using the cloned collators after deleting the original data
936 res
= col2
->compare(first
, second
, status
);
937 if(res
!= UCOL_LESS
) {
938 errln("a should be less then b after tailoring");
940 if (((RuleBasedCollator
*)col2
)->getRules() != copiedEnglishRules
) {
941 errln(UnicodeString("English rule difference. ")
942 + copiedEnglishRules
+ UnicodeString("\ngetRules=") + ((RuleBasedCollator
*)col2
)->getRules());
944 res
= col3
->compare(first
, second
, status
);
945 if(res
!= UCOL_LESS
) {
946 errln("a should be less then b after tailoring");
948 if (col3
->getRules() != copiedEnglishRules
) {
949 errln(UnicodeString("English rule difference. ")
950 + copiedEnglishRules
+ UnicodeString("\ngetRules=") + col3
->getRules());
958 CollationAPITest::TestCompare(/* char* par */)
960 logln("The compare tests begin : ");
962 UErrorCode success
= U_ZERO_ERROR
;
963 col
= Collator::createInstance(Locale::getEnglish(), success
);
964 if (U_FAILURE(success
)) {
965 errcheckln(success
, "Default collation creation failed. - %s", u_errorName(success
));
968 UnicodeString
test1("Abcda"), test2("abcda");
969 logln("Use tertiary comparison level testing ....");
971 doAssert((!col
->equals(test1
, test2
) ), "Result should be \"Abcda\" != \"abcda\"");
972 doAssert((col
->greater(test1
, test2
) ), "Result should be \"Abcda\" >>> \"abcda\"");
973 doAssert((col
->greaterOrEqual(test1
, test2
) ), "Result should be \"Abcda\" >>> \"abcda\"");
975 col
->setStrength(Collator::SECONDARY
);
976 logln("Use secondary comparison level testing ....");
978 doAssert((col
->equals(test1
, test2
) ), "Result should be \"Abcda\" == \"abcda\"");
979 doAssert((!col
->greater(test1
, test2
) ), "Result should be \"Abcda\" == \"abcda\"");
980 doAssert((col
->greaterOrEqual(test1
, test2
) ), "Result should be \"Abcda\" == \"abcda\"");
982 col
->setStrength(Collator::PRIMARY
);
983 logln("Use primary comparison level testing ....");
985 doAssert((col
->equals(test1
, test2
) ), "Result should be \"Abcda\" == \"abcda\"");
986 doAssert((!col
->greater(test1
, test2
) ), "Result should be \"Abcda\" == \"abcda\"");
987 doAssert((col
->greaterOrEqual(test1
, test2
) ), "Result should be \"Abcda\" == \"abcda\"");
989 // Test different APIs
990 const UChar
* t1
= test1
.getBuffer();
991 int32_t t1Len
= test1
.length();
992 const UChar
* t2
= test2
.getBuffer();
993 int32_t t2Len
= test2
.length();
995 doAssert((col
->compare(test1
, test2
) == Collator::EQUAL
), "Problem");
996 doAssert((col
->compare(test1
, test2
, success
) == UCOL_EQUAL
), "Problem");
997 doAssert((col
->compare(t1
, t1Len
, t2
, t2Len
) == Collator::EQUAL
), "Problem");
998 doAssert((col
->compare(t1
, t1Len
, t2
, t2Len
, success
) == UCOL_EQUAL
), "Problem");
999 doAssert((col
->compare(test1
, test2
, t1Len
) == Collator::EQUAL
), "Problem");
1000 doAssert((col
->compare(test1
, test2
, t1Len
, success
) == UCOL_EQUAL
), "Problem");
1002 col
->setAttribute(UCOL_STRENGTH
, UCOL_TERTIARY
, success
);
1003 doAssert((col
->compare(test1
, test2
) == Collator::GREATER
), "Problem");
1004 doAssert((col
->compare(test1
, test2
, success
) == UCOL_GREATER
), "Problem");
1005 doAssert((col
->compare(t1
, t1Len
, t2
, t2Len
) == Collator::GREATER
), "Problem");
1006 doAssert((col
->compare(t1
, t1Len
, t2
, t2Len
, success
) == UCOL_GREATER
), "Problem");
1007 doAssert((col
->compare(test1
, test2
, t1Len
) == Collator::GREATER
), "Problem");
1008 doAssert((col
->compare(test1
, test2
, t1Len
, success
) == UCOL_GREATER
), "Problem");
1012 logln("The compare tests end.");
1017 CollationAPITest::TestGetAll(/* char* par */)
1019 int32_t count1
, count2
;
1020 UErrorCode status
= U_ZERO_ERROR
;
1022 logln("Trying Collator::getAvailableLocales(int&)");
1024 const Locale
* list
= Collator::getAvailableLocales(count1
);
1025 for (int32_t i
= 0; i
< count1
; ++i
) {
1026 UnicodeString dispName
;
1027 logln(UnicodeString("Locale name: ")
1028 + UnicodeString(list
[i
].getName())
1029 + UnicodeString(" , the display name is : ")
1030 + UnicodeString(list
[i
].getDisplayName(dispName
)));
1033 if (count1
== 0 || list
== NULL
) {
1034 dataerrln("getAvailableLocales(int&) returned an empty list");
1037 logln("Trying Collator::getAvailableLocales()");
1038 StringEnumeration
* localeEnum
= Collator::getAvailableLocales();
1039 const UnicodeString
* locStr
;
1040 const char *locCStr
;
1043 if (localeEnum
== NULL
) {
1044 dataerrln("getAvailableLocales() returned NULL");
1048 while ((locStr
= localeEnum
->snext(status
)) != NULL
)
1050 logln(UnicodeString("Locale name is: ") + *locStr
);
1053 if (count1
!= count2
) {
1054 errln("getAvailableLocales(int&) returned %d and getAvailableLocales() returned %d", count1
, count2
);
1057 logln("Trying Collator::getAvailableLocales() clone");
1059 StringEnumeration
* localeEnum2
= localeEnum
->clone();
1060 localeEnum2
->reset(status
);
1061 while ((locCStr
= localeEnum2
->next(NULL
, status
)) != NULL
)
1063 logln(UnicodeString("Locale name is: ") + UnicodeString(locCStr
));
1066 if (count1
!= count2
) {
1067 errln("getAvailableLocales(3rd time) returned %d and getAvailableLocales(2nd time) returned %d", count1
, count2
);
1069 if (localeEnum
->count(status
) != count1
) {
1070 errln("localeEnum->count() returned %d and getAvailableLocales() returned %d", localeEnum
->count(status
), count1
);
1076 void CollationAPITest::TestSortKey()
1078 UErrorCode status
= U_ZERO_ERROR
;
1080 this is supposed to open default date format, but later on it treats
1081 it like it is "en_US"
1082 - very bad if you try to run the tests on machine where default
1083 locale is NOT "en_US"
1085 Collator
*col
= Collator::createInstance(Locale::getEnglish(), status
);
1086 if (U_FAILURE(status
)) {
1087 errcheckln(status
, "ERROR: Default collation creation failed.: %s\n", u_errorName(status
));
1091 if (col
->getStrength() != Collator::TERTIARY
)
1093 errln("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1096 /* Need to use identical strength */
1097 col
->setAttribute(UCOL_STRENGTH
, UCOL_IDENTICAL
, status
);
1099 UChar test1
[6] = {0x41, 0x62, 0x63, 0x64, 0x61, 0},
1100 test2
[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0},
1101 test3
[6] = {0x61, 0x62, 0x63, 0x64, 0x61, 0};
1103 uint8_t sortkey1
[64];
1104 uint8_t sortkey2
[64];
1105 uint8_t sortkey3
[64];
1107 logln("Use tertiary comparison level testing ....\n");
1110 col
->getCollationKey(test1
, u_strlen(test1
), key1
, status
);
1113 col
->getCollationKey(test2
, u_strlen(test2
), key2
, status
);
1116 col
->getCollationKey(test3
, u_strlen(test3
), key3
, status
);
1118 doAssert(key1
.compareTo(key2
) == Collator::GREATER
,
1119 "Result should be \"Abcda\" > \"abcda\"");
1120 doAssert(key2
.compareTo(key1
) == Collator::LESS
,
1121 "Result should be \"abcda\" < \"Abcda\"");
1122 doAssert(key2
.compareTo(key3
) == Collator::EQUAL
,
1123 "Result should be \"abcda\" == \"abcda\"");
1125 // Clone the key2 sortkey for later.
1126 int32_t keylength
= 0;
1127 const uint8_t *key2primary_alias
= key2
.getByteArray(keylength
);
1128 LocalArray
<uint8_t> key2primary(new uint8_t[keylength
]);
1129 memcpy(key2primary
.getAlias(), key2primary_alias
, keylength
);
1131 col
->getSortKey(test1
, sortkey1
, 64);
1132 col
->getSortKey(test2
, sortkey2
, 64);
1133 col
->getSortKey(test3
, sortkey3
, 64);
1135 const uint8_t *tempkey
= key1
.getByteArray(keylength
);
1136 doAssert(memcmp(tempkey
, sortkey1
, keylength
) == 0,
1137 "Test1 string should have the same collation key and sort key");
1138 tempkey
= key2
.getByteArray(keylength
);
1139 doAssert(memcmp(tempkey
, sortkey2
, keylength
) == 0,
1140 "Test2 string should have the same collation key and sort key");
1141 tempkey
= key3
.getByteArray(keylength
);
1142 doAssert(memcmp(tempkey
, sortkey3
, keylength
) == 0,
1143 "Test3 string should have the same collation key and sort key");
1145 col
->getSortKey(test1
, 5, sortkey1
, 64);
1146 col
->getSortKey(test2
, 5, sortkey2
, 64);
1147 col
->getSortKey(test3
, 5, sortkey3
, 64);
1149 tempkey
= key1
.getByteArray(keylength
);
1150 doAssert(memcmp(tempkey
, sortkey1
, keylength
) == 0,
1151 "Test1 string should have the same collation key and sort key");
1152 tempkey
= key2
.getByteArray(keylength
);
1153 doAssert(memcmp(tempkey
, sortkey2
, keylength
) == 0,
1154 "Test2 string should have the same collation key and sort key");
1155 tempkey
= key3
.getByteArray(keylength
);
1156 doAssert(memcmp(tempkey
, sortkey3
, keylength
) == 0,
1157 "Test3 string should have the same collation key and sort key");
1159 UnicodeString
strtest1(test1
);
1160 col
->getSortKey(strtest1
, sortkey1
, 64);
1161 UnicodeString
strtest2(test2
);
1162 col
->getSortKey(strtest2
, sortkey2
, 64);
1163 UnicodeString
strtest3(test3
);
1164 col
->getSortKey(strtest3
, sortkey3
, 64);
1166 tempkey
= key1
.getByteArray(keylength
);
1167 doAssert(memcmp(tempkey
, sortkey1
, keylength
) == 0,
1168 "Test1 string should have the same collation key and sort key");
1169 tempkey
= key2
.getByteArray(keylength
);
1170 doAssert(memcmp(tempkey
, sortkey2
, keylength
) == 0,
1171 "Test2 string should have the same collation key and sort key");
1172 tempkey
= key3
.getByteArray(keylength
);
1173 doAssert(memcmp(tempkey
, sortkey3
, keylength
) == 0,
1174 "Test3 string should have the same collation key and sort key");
1176 logln("Use secondary comparision level testing ...\n");
1177 col
->setStrength(Collator::SECONDARY
);
1179 col
->getCollationKey(test1
, u_strlen(test1
), key1
, status
);
1180 col
->getCollationKey(test2
, u_strlen(test2
), key2
, status
);
1181 col
->getCollationKey(test3
, u_strlen(test3
), key3
, status
);
1183 doAssert(key1
.compareTo(key2
) == Collator::EQUAL
,
1184 "Result should be \"Abcda\" == \"abcda\"");
1185 doAssert(key2
.compareTo(key3
) == Collator::EQUAL
,
1186 "Result should be \"abcda\" == \"abcda\"");
1188 tempkey
= key2
.getByteArray(keylength
);
1189 doAssert(memcmp(tempkey
, key2primary
.getAlias(), keylength
- 1) == 0,
1190 "Binary format for 'abcda' sortkey different for secondary strength!");
1192 col
->getSortKey(test1
, sortkey1
, 64);
1193 col
->getSortKey(test2
, sortkey2
, 64);
1194 col
->getSortKey(test3
, sortkey3
, 64);
1196 tempkey
= key1
.getByteArray(keylength
);
1197 doAssert(memcmp(tempkey
, sortkey1
, keylength
) == 0,
1198 "Test1 string should have the same collation key and sort key");
1199 tempkey
= key2
.getByteArray(keylength
);
1200 doAssert(memcmp(tempkey
, sortkey2
, keylength
) == 0,
1201 "Test2 string should have the same collation key and sort key");
1202 tempkey
= key3
.getByteArray(keylength
);
1203 doAssert(memcmp(tempkey
, sortkey3
, keylength
) == 0,
1204 "Test3 string should have the same collation key and sort key");
1206 col
->getSortKey(test1
, 5, sortkey1
, 64);
1207 col
->getSortKey(test2
, 5, sortkey2
, 64);
1208 col
->getSortKey(test3
, 5, sortkey3
, 64);
1210 tempkey
= key1
.getByteArray(keylength
);
1211 doAssert(memcmp(tempkey
, sortkey1
, keylength
) == 0,
1212 "Test1 string should have the same collation key and sort key");
1213 tempkey
= key2
.getByteArray(keylength
);
1214 doAssert(memcmp(tempkey
, sortkey2
, keylength
) == 0,
1215 "Test2 string should have the same collation key and sort key");
1216 tempkey
= key3
.getByteArray(keylength
);
1217 doAssert(memcmp(tempkey
, sortkey3
, keylength
) == 0,
1218 "Test3 string should have the same collation key and sort key");
1220 col
->getSortKey(strtest1
, sortkey1
, 64);
1221 col
->getSortKey(strtest2
, sortkey2
, 64);
1222 col
->getSortKey(strtest3
, sortkey3
, 64);
1224 tempkey
= key1
.getByteArray(keylength
);
1225 doAssert(memcmp(tempkey
, sortkey1
, keylength
) == 0,
1226 "Test1 string should have the same collation key and sort key");
1227 tempkey
= key2
.getByteArray(keylength
);
1228 doAssert(memcmp(tempkey
, sortkey2
, keylength
) == 0,
1229 "Test2 string should have the same collation key and sort key");
1230 tempkey
= key3
.getByteArray(keylength
);
1231 doAssert(memcmp(tempkey
, sortkey3
, keylength
) == 0,
1232 "Test3 string should have the same collation key and sort key");
1234 logln("testing sortkey ends...");
1238 void CollationAPITest::TestSortKeyOverflow() {
1239 IcuTestErrorCode
errorCode(*this, "TestSortKeyOverflow()");
1240 LocalPointer
<Collator
> col(Collator::createInstance(Locale::getEnglish(), errorCode
));
1241 if (errorCode
.errDataIfFailureAndReset("Collator::createInstance(English) failed")) {
1244 col
->setAttribute(UCOL_STRENGTH
, UCOL_PRIMARY
, errorCode
);
1245 UChar i_and_phi
[] = { 0x438, 0x3c6 }; // Cyrillic small i & Greek small phi.
1246 // The sort key should be 6 bytes:
1247 // 2 bytes for the Cyrillic i, 1 byte for the primary-compression terminator,
1248 // 2 bytes for the Greek phi, and 1 byte for the NUL terminator.
1249 uint8_t sortKey
[12];
1250 int32_t length
= col
->getSortKey(i_and_phi
, 2, sortKey
, UPRV_LENGTHOF(sortKey
));
1251 uint8_t sortKey2
[12];
1252 for (int32_t capacity
= 0; capacity
< length
; ++capacity
) {
1253 uprv_memset(sortKey2
, 2, UPRV_LENGTHOF(sortKey2
));
1254 int32_t length2
= col
->getSortKey(i_and_phi
, 2, sortKey2
, capacity
);
1255 if (length2
!= length
|| 0 != uprv_memcmp(sortKey
, sortKey2
, capacity
)) {
1256 errln("getSortKey(i_and_phi, capacity=%d) failed to write proper prefix", capacity
);
1257 } else if (sortKey2
[capacity
] != 2 || sortKey2
[capacity
+ 1] != 2) {
1258 errln("getSortKey(i_and_phi, capacity=%d) wrote beyond capacity", capacity
);
1262 // Now try to break getCollationKey().
1263 // Internally, it always starts with a large stack buffer.
1264 // Since we cannot control the initial capacity, we throw an increasing number
1265 // of characters at it, with the problematic part at the end.
1266 const int32_t longCapacity
= 2000;
1267 // Each 'a' in the prefix should result in one primary sort key byte.
1268 // For i_and_phi we expect 6 bytes, then the NUL terminator.
1269 const int32_t maxPrefixLength
= longCapacity
- 6 - 1;
1270 LocalArray
<uint8_t> longSortKey(new uint8_t[longCapacity
]);
1271 UnicodeString
s(FALSE
, i_and_phi
, 2);
1272 for (int32_t prefixLength
= 0; prefixLength
< maxPrefixLength
; ++prefixLength
) {
1273 length
= col
->getSortKey(s
, longSortKey
.getAlias(), longCapacity
);
1274 CollationKey collKey
;
1275 col
->getCollationKey(s
, collKey
, errorCode
);
1276 int32_t collKeyLength
;
1277 const uint8_t *collSortKey
= collKey
.getByteArray(collKeyLength
);
1278 if (collKeyLength
!= length
|| 0 != uprv_memcmp(longSortKey
.getAlias(), collSortKey
, length
)) {
1279 errln("getCollationKey(prefix[%d]+i_and_phi) failed to write proper sort key", prefixLength
);
1282 // Insert an 'a' to match ++prefixLength.
1283 s
.insert(prefixLength
, (UChar
)0x61);
1287 void CollationAPITest::TestMaxExpansion()
1289 UErrorCode status
= U_ZERO_ERROR
;
1291 UChar32 unassigned
= 0xEFFFD;
1292 uint32_t sorder
= 0;
1293 uint32_t temporder
= 0;
1295 UnicodeString
rule("&a < ab < c/aba < d < z < ch");
1296 RuleBasedCollator
coll(rule
, status
);
1297 if(U_FAILURE(status
)) {
1298 errcheckln(status
, "Collator creation failed with error %s", u_errorName(status
));
1301 UnicodeString
str(ch
);
1302 CollationElementIterator
*iter
=
1303 coll
.createCollationElementIterator(str
);
1305 while (ch
< 0xFFFF && U_SUCCESS(status
)) {
1312 str
.setCharAt(0, ch
);
1313 iter
->setText(str
, status
);
1314 order
= iter
->previous(status
);
1316 /* thai management */
1318 order
= iter
->previous(status
);
1320 while (U_SUCCESS(status
) && iter
->previous(status
) != CollationElementIterator::NULLORDER
) {
1324 size
= coll
.getMaxExpansion(order
);
1325 if (U_FAILURE(status
) || size
< count
) {
1326 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1331 /* testing for exact max expansion */
1336 str
.setCharAt(0, ch
);
1337 iter
->setText(str
, status
);
1338 order
= iter
->previous(status
);
1339 size
= coll
.getMaxExpansion(order
);
1340 if (U_FAILURE(status
) || size
!= 1) {
1341 errln("Failure at codepoint U+%04X, maximum expansion count %d < %d",
1349 iter
->setText(str
, status
);
1350 temporder
= iter
->previous(status
);
1351 size
= coll
.getMaxExpansion(temporder
);
1352 if (U_FAILURE(status
) || size
!= 3) {
1353 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1354 ch
, temporder
, size
, 3);
1359 iter
->setText(str
, status
);
1360 temporder
= iter
->previous(status
);
1361 size
= coll
.getMaxExpansion(temporder
);
1362 if (U_FAILURE(status
) || size
!= 1) {
1363 errln("Failure at codepoint U+%04X, CE %08x, maximum expansion count %d != %d",
1364 ch
, temporder
, size
, 1);
1367 str
.setTo(unassigned
);
1368 iter
->setText(str
, status
);
1369 sorder
= iter
->previous(status
);
1370 size
= coll
.getMaxExpansion(sorder
);
1371 if (U_FAILURE(status
) || size
!= 2) {
1372 errln("Failure at supplementary codepoints, maximum expansion count %d < %d",
1379 iter
->setText(str
, status
);
1380 temporder
= iter
->previous(status
);
1381 size
= coll
.getMaxExpansion(temporder
);
1382 if (U_FAILURE(status
) || size
> 3) {
1383 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1389 /* testing special jamo &a<\u1160 */
1390 rule
= CharsToUnicodeString("\\u0026\\u0071\\u003c\\u1165\\u002f\\u0071\\u0071\\u0071\\u0071");
1392 RuleBasedCollator
jamocoll(rule
, status
);
1393 iter
= jamocoll
.createCollationElementIterator(str
);
1394 temporder
= iter
->previous(status
);
1395 size
= iter
->getMaxExpansion(temporder
);
1396 if (U_FAILURE(status
) || size
!= 6) {
1397 errln("Failure at codepoint U+%04X, maximum expansion count %d > %d",
1404 void CollationAPITest::TestDisplayName()
1406 UErrorCode error
= U_ZERO_ERROR
;
1407 Collator
*coll
= Collator::createInstance("en_US", error
);
1408 if (U_FAILURE(error
)) {
1409 errcheckln(error
, "Failure creating english collator - %s", u_errorName(error
));
1413 UnicodeString result
;
1414 coll
->getDisplayName(Locale::getCanadaFrench(), result
);
1415 Locale::getCanadaFrench().getDisplayName(name
);
1416 if (result
.compare(name
)) {
1417 errln("Failure getting the correct name for locale en_US");
1420 coll
->getDisplayName(Locale::getSimplifiedChinese(), result
);
1421 Locale::getSimplifiedChinese().getDisplayName(name
);
1422 if (result
.compare(name
)) {
1423 errln("Failure getting the correct name for locale zh_SG");
1428 void CollationAPITest::TestAttribute()
1430 UErrorCode error
= U_ZERO_ERROR
;
1431 Collator
*coll
= Collator::createInstance(error
);
1433 if (U_FAILURE(error
)) {
1434 errcheckln(error
, "Creation of default collator failed - %s", u_errorName(error
));
1438 coll
->setAttribute(UCOL_FRENCH_COLLATION
, UCOL_OFF
, error
);
1439 if (coll
->getAttribute(UCOL_FRENCH_COLLATION
, error
) != UCOL_OFF
||
1441 errln("Setting and retrieving of the french collation failed");
1444 coll
->setAttribute(UCOL_FRENCH_COLLATION
, UCOL_ON
, error
);
1445 if (coll
->getAttribute(UCOL_FRENCH_COLLATION
, error
) != UCOL_ON
||
1447 errln("Setting and retrieving of the french collation failed");
1450 coll
->setAttribute(UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, error
);
1451 if (coll
->getAttribute(UCOL_ALTERNATE_HANDLING
, error
) != UCOL_SHIFTED
||
1453 errln("Setting and retrieving of the alternate handling failed");
1456 coll
->setAttribute(UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, error
);
1457 if (coll
->getAttribute(UCOL_ALTERNATE_HANDLING
, error
) != UCOL_NON_IGNORABLE
||
1459 errln("Setting and retrieving of the alternate handling failed");
1462 coll
->setAttribute(UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, error
);
1463 if (coll
->getAttribute(UCOL_CASE_FIRST
, error
) != UCOL_LOWER_FIRST
||
1465 errln("Setting and retrieving of the case first attribute failed");
1468 coll
->setAttribute(UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, error
);
1469 if (coll
->getAttribute(UCOL_CASE_FIRST
, error
) != UCOL_UPPER_FIRST
||
1471 errln("Setting and retrieving of the case first attribute failed");
1474 coll
->setAttribute(UCOL_CASE_LEVEL
, UCOL_ON
, error
);
1475 if (coll
->getAttribute(UCOL_CASE_LEVEL
, error
) != UCOL_ON
||
1477 errln("Setting and retrieving of the case level attribute failed");
1480 coll
->setAttribute(UCOL_CASE_LEVEL
, UCOL_OFF
, error
);
1481 if (coll
->getAttribute(UCOL_CASE_LEVEL
, error
) != UCOL_OFF
||
1483 errln("Setting and retrieving of the case level attribute failed");
1486 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, error
);
1487 if (coll
->getAttribute(UCOL_NORMALIZATION_MODE
, error
) != UCOL_ON
||
1489 errln("Setting and retrieving of the normalization on/off attribute failed");
1492 coll
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, error
);
1493 if (coll
->getAttribute(UCOL_NORMALIZATION_MODE
, error
) != UCOL_OFF
||
1495 errln("Setting and retrieving of the normalization on/off attribute failed");
1498 coll
->setAttribute(UCOL_STRENGTH
, UCOL_PRIMARY
, error
);
1499 if (coll
->getAttribute(UCOL_STRENGTH
, error
) != UCOL_PRIMARY
||
1501 errln("Setting and retrieving of the collation strength failed");
1504 coll
->setAttribute(UCOL_STRENGTH
, UCOL_SECONDARY
, error
);
1505 if (coll
->getAttribute(UCOL_STRENGTH
, error
) != UCOL_SECONDARY
||
1507 errln("Setting and retrieving of the collation strength failed");
1510 coll
->setAttribute(UCOL_STRENGTH
, UCOL_TERTIARY
, error
);
1511 if (coll
->getAttribute(UCOL_STRENGTH
, error
) != UCOL_TERTIARY
||
1513 errln("Setting and retrieving of the collation strength failed");
1516 coll
->setAttribute(UCOL_STRENGTH
, UCOL_QUATERNARY
, error
);
1517 if (coll
->getAttribute(UCOL_STRENGTH
, error
) != UCOL_QUATERNARY
||
1519 errln("Setting and retrieving of the collation strength failed");
1522 coll
->setAttribute(UCOL_STRENGTH
, UCOL_IDENTICAL
, error
);
1523 if (coll
->getAttribute(UCOL_STRENGTH
, error
) != UCOL_IDENTICAL
||
1525 errln("Setting and retrieving of the collation strength failed");
1531 void CollationAPITest::TestVariableTopSetting() {
1532 UErrorCode status
= U_ZERO_ERROR
;
1534 UChar vt
[256] = { 0 };
1536 // Use the root collator, not the default collator.
1537 // This test fails with en_US_POSIX which tailors the dollar sign after 'A'.
1538 Collator
*coll
= Collator::createInstance(Locale::getRoot(), status
);
1539 if(U_FAILURE(status
)) {
1541 errcheckln(status
, "Collator creation failed with error %s", u_errorName(status
));
1545 uint32_t oldVarTop
= coll
->getVariableTop(status
);
1547 // ICU 53+: The character must be in a supported reordering group,
1548 // and the variable top is pinned to the end of that group.
1551 (void)coll
->setVariableTop(vt
, 1, status
);
1552 if(status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
1553 errln("setVariableTop(letter) did not detect illegal argument - %s", u_errorName(status
));
1556 status
= U_ZERO_ERROR
;
1557 vt
[0] = 0x24; // dollar sign (currency symbol)
1558 uint32_t newVarTop
= coll
->setVariableTop(vt
, 1, status
);
1559 if(U_FAILURE(status
)) {
1560 errln("setVariableTop(dollar sign) failed: %s", u_errorName(status
));
1563 if(newVarTop
!= coll
->getVariableTop(status
)) {
1564 errln("setVariableTop(dollar sign) != following getVariableTop()");
1567 UnicodeString
dollar((UChar
)0x24);
1568 UnicodeString
euro((UChar
)0x20AC);
1569 uint32_t newVarTop2
= coll
->setVariableTop(euro
, status
);
1570 assertEquals("setVariableTop(Euro sign) == following getVariableTop()",
1571 (int64_t)newVarTop2
, (int64_t)coll
->getVariableTop(status
));
1572 assertEquals("setVariableTop(Euro sign) == setVariableTop(dollar sign) (should pin to top of currency group)",
1573 (int64_t)newVarTop2
, (int64_t)newVarTop
);
1575 coll
->setAttribute(UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, status
);
1576 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL
, (int32_t)coll
->compare(UnicodeString(), dollar
));
1577 assertEquals("empty==euro", (int32_t)UCOL_EQUAL
, (int32_t)coll
->compare(UnicodeString(), euro
));
1578 assertEquals("dollar<zero", (int32_t)UCOL_LESS
, (int32_t)coll
->compare(dollar
, UnicodeString((UChar
)0x30)));
1580 coll
->setVariableTop(oldVarTop
, status
);
1582 uint32_t newerVarTop
= coll
->setVariableTop(UnicodeString(vt
, 1), status
);
1584 if(newVarTop
!= newerVarTop
) {
1585 errln("Didn't set vartop properly from UnicodeString!\n");
1592 void CollationAPITest::TestMaxVariable() {
1593 UErrorCode errorCode
= U_ZERO_ERROR
;
1594 LocalPointer
<Collator
> coll(Collator::createInstance(Locale::getRoot(), errorCode
));
1595 if(U_FAILURE(errorCode
)) {
1596 errcheckln(errorCode
, "Collator creation failed with error %s", u_errorName(errorCode
));
1600 (void)coll
->setMaxVariable(UCOL_REORDER_CODE_OTHERS
, errorCode
);
1601 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
1602 errln("setMaxVariable(others) did not detect illegal argument - %s", u_errorName(errorCode
));
1605 errorCode
= U_ZERO_ERROR
;
1606 (void)coll
->setMaxVariable(UCOL_REORDER_CODE_CURRENCY
, errorCode
);
1608 if(UCOL_REORDER_CODE_CURRENCY
!= coll
->getMaxVariable()) {
1609 errln("setMaxVariable(currency) != following getMaxVariable()");
1612 coll
->setAttribute(UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, errorCode
);
1613 assertEquals("empty==dollar", (int32_t)UCOL_EQUAL
, (int32_t)coll
->compare(UnicodeString(), UnicodeString((UChar
)0x24)));
1614 assertEquals("empty==euro", (int32_t)UCOL_EQUAL
, (int32_t)coll
->compare(UnicodeString(), UnicodeString((UChar
)0x20AC)));
1615 assertEquals("dollar<zero", (int32_t)UCOL_LESS
, (int32_t)coll
->compare(UnicodeString((UChar
)0x24), UnicodeString((UChar
)0x30)));
1618 void CollationAPITest::TestGetLocale() {
1619 UErrorCode status
= U_ZERO_ERROR
;
1620 const char *rules
= "&a<x<y<z";
1621 UChar rlz
[256] = {0};
1623 Collator
*coll
= Collator::createInstance("root", status
);
1624 if(U_FAILURE(status
)) {
1625 dataerrln("Failed to open collator for \"root\" with %s", u_errorName(status
));
1628 Locale locale
= coll
->getLocale(ULOC_ACTUAL_LOCALE
, status
);
1629 if(locale
!= Locale::getRoot()) {
1630 errln("Collator::createInstance(\"root\").getLocale(actual) != Locale::getRoot(); "
1631 "getLocale().getName() = \"%s\"",
1636 coll
= Collator::createInstance("", status
);
1637 if(U_FAILURE(status
)) {
1638 dataerrln("Failed to open collator for \"\" with %s", u_errorName(status
));
1641 locale
= coll
->getLocale(ULOC_ACTUAL_LOCALE
, status
);
1642 if(locale
!= Locale::getRoot()) {
1643 errln("Collator::createInstance(\"\").getLocale(actual) != Locale::getRoot(); "
1644 "getLocale().getName() = \"%s\"",
1651 static const struct {
1652 const char* requestedLocale
;
1653 const char* validLocale
;
1654 const char* actualLocale
;
1656 // Note: Locale::getRoot().getName() == "" not "root".
1657 { "de_DE", "de", "" },
1658 { "sr_RS", "sr_Cyrl_RS", "sr" },
1659 { "en_US_CALIFORNIA", "en_US", "" },
1660 { "fr_FR_NONEXISTANT", "fr", "" },
1661 // pinyin is the default, therefore suppressed.
1662 { "zh_CN", "zh_Hans_CN", "zh" },
1663 // zh_Hant has default=stroke but the data is in zh.
1664 { "zh_TW", "zh_Hant_TW", "zh@collation=stroke" },
1665 { "zh_TW@collation=pinyin", "zh_Hant_TW@collation=pinyin", "zh" },
1666 { "zh_CN@collation=stroke", "zh_Hans_CN@collation=stroke", "zh@collation=stroke" },
1667 // yue/yue_Hant aliased to zh_Hant, yue_Hans aliased to zh_Hans.
1668 // requested, valid, actual
1669 { "yue", "zh_Hant", "zh@collation=stroke" },
1670 { "yue_HK", "zh_Hant", "zh@collation=stroke" },
1671 { "yue_Hant", "zh_Hant", "zh@collation=stroke" },
1672 { "yue_Hant_HK", "zh_Hant", "zh@collation=stroke" },
1673 { "yue@collation=pinyin", "zh_Hant@collation=pinyin", "zh" },
1674 { "yue_HK@collation=pinyin", "zh_Hant@collation=pinyin", "zh" },
1675 { "yue_CN", "zh_Hans", "zh" },
1676 { "yue_Hans", "zh_Hans", "zh" },
1677 { "yue_Hans_CN", "zh_Hans", "zh" },
1678 { "yue_Hans@collation=stroke", "zh_Hans@collation=stroke", "zh@collation=stroke" },
1679 { "yue_CN@collation=stroke", "zh_Hans@collation=stroke", "zh@collation=stroke" }
1682 u_unescape(rules
, rlz
, 256);
1684 /* test opening collators for different locales */
1685 for(i
= 0; i
<UPRV_LENGTHOF(testStruct
); i
++) {
1686 status
= U_ZERO_ERROR
;
1687 coll
= Collator::createInstance(testStruct
[i
].requestedLocale
, status
);
1688 if(U_FAILURE(status
)) {
1689 errln("Failed to open collator for %s with %s", testStruct
[i
].requestedLocale
, u_errorName(status
));
1693 // The requested locale may be the same as the valid locale,
1694 // or may not be supported at all. See ticket #10477.
1695 locale
= coll
->getLocale(ULOC_REQUESTED_LOCALE
, status
);
1696 if(U_SUCCESS(status
) &&
1697 locale
!= testStruct
[i
].requestedLocale
&& locale
!= testStruct
[i
].validLocale
) {
1698 errln("[Coll %s]: Error in requested locale, expected %s or %s, got %s",
1699 testStruct
[i
].requestedLocale
,
1700 testStruct
[i
].requestedLocale
, testStruct
[i
].validLocale
, locale
.getName());
1702 status
= U_ZERO_ERROR
;
1703 locale
= coll
->getLocale(ULOC_VALID_LOCALE
, status
);
1704 if(locale
!= testStruct
[i
].validLocale
) {
1705 errln("[Coll %s]: Error in valid locale, expected %s, got %s",
1706 testStruct
[i
].requestedLocale
, testStruct
[i
].validLocale
, locale
.getName());
1708 locale
= coll
->getLocale(ULOC_ACTUAL_LOCALE
, status
);
1709 if(locale
!= testStruct
[i
].actualLocale
) {
1710 errln("[Coll %s]: Error in actual locale, expected %s, got %s",
1711 testStruct
[i
].requestedLocale
, testStruct
[i
].actualLocale
, locale
.getName());
1713 // If we open a collator for the actual locale, we should get an equivalent one again.
1714 LocalPointer
<Collator
> coll2(Collator::createInstance(locale
, status
));
1715 if(U_FAILURE(status
)) {
1716 errln("Failed to open collator for actual locale \"%s\" with %s",
1717 locale
.getName(), u_errorName(status
));
1719 Locale actual2
= coll2
->getLocale(ULOC_ACTUAL_LOCALE
, status
);
1720 if(actual2
!= locale
) {
1721 errln("[Coll actual \"%s\"]: Error in actual locale, got different one: \"%s\"",
1722 locale
.getName(), actual2
.getName());
1724 if(*coll2
!= *coll
) {
1725 errln("[Coll actual \"%s\"]: Got different collator than before", locale
.getName());
1731 /* completely non-existent locale for collator should get a root collator */
1733 LocalPointer
<Collator
> coll(Collator::createInstance("blahaha", status
));
1734 if(U_FAILURE(status
)) {
1735 errln("Failed to open collator with %s", u_errorName(status
));
1738 Locale valid
= coll
->getLocale(ULOC_VALID_LOCALE
, status
);
1739 const char *name
= valid
.getName();
1740 if(*name
!= 0 && strcmp(name
, "root") != 0) {
1741 errln("Valid locale for nonexisting-locale collator is \"%s\" not root", name
);
1743 Locale actual
= coll
->getLocale(ULOC_ACTUAL_LOCALE
, status
);
1744 name
= actual
.getName();
1745 if(*name
!= 0 && strcmp(name
, "root") != 0) {
1746 errln("Actual locale for nonexisting-locale collator is \"%s\" not root", name
);
1752 /* collator instantiated from rules should have all three locales NULL */
1753 coll
= new RuleBasedCollator(rlz
, status
);
1754 locale
= coll
->getLocale(ULOC_REQUESTED_LOCALE
, status
);
1755 if(U_SUCCESS(status
) && !locale
.isBogus()) {
1756 errln("For collator instantiated from rules, requested locale %s is not bogus", locale
.getName());
1758 status
= U_ZERO_ERROR
;
1759 locale
= coll
->getLocale(ULOC_VALID_LOCALE
, status
);
1760 if(!locale
.isBogus()) {
1761 errln("For collator instantiated from rules, valid locale %s is not bogus", locale
.getName());
1763 locale
= coll
->getLocale(ULOC_ACTUAL_LOCALE
, status
);
1764 if(!locale
.isBogus()) {
1765 errln("For collator instantiated from rules, actual locale %s is not bogus", locale
.getName());
1771 const char *original
;
1778 static int U_CALLCONV
1779 compare_teststruct(const void *string1
, const void *string2
) {
1780 return(strcmp((const char *)((struct teststruct
*)string1
)->key
, (const char *)((struct teststruct
*)string2
)->key
));
1785 void CollationAPITest::TestBounds(void) {
1786 UErrorCode status
= U_ZERO_ERROR
;
1788 Collator
*coll
= Collator::createInstance(Locale("sh"), status
);
1789 if(U_FAILURE(status
)) {
1791 errcheckln(status
, "Collator creation failed with %s", u_errorName(status
));
1795 uint8_t sortkey
[512], lower
[512], upper
[512];
1798 static const char * const test
[] = {
1802 "j\\u00F6hn sm\\u00EFth",
1803 "J\\u00F6hn Sm\\u00EFth",
1804 "J\\u00D6HN SM\\u00CFTH",
1809 struct teststruct tests
[] = {
1810 {"\\u010CAKI MIHALJ", {0}},
1811 {"\\u010CAKI MIHALJ", {0}},
1812 {"\\u010CAKI PIRO\\u0160KA", {0}},
1813 {"\\u010CABAI ANDRIJA", {0}},
1814 {"\\u010CABAI LAJO\\u0160", {0}},
1815 {"\\u010CABAI MARIJA", {0}},
1816 {"\\u010CABAI STEVAN", {0}},
1817 {"\\u010CABAI STEVAN", {0}},
1818 {"\\u010CABARKAPA BRANKO", {0}},
1819 {"\\u010CABARKAPA MILENKO", {0}},
1820 {"\\u010CABARKAPA MIROSLAV", {0}},
1821 {"\\u010CABARKAPA SIMO", {0}},
1822 {"\\u010CABARKAPA STANKO", {0}},
1823 {"\\u010CABARKAPA TAMARA", {0}},
1824 {"\\u010CABARKAPA TOMA\\u0160", {0}},
1825 {"\\u010CABDARI\\u0106 NIKOLA", {0}},
1826 {"\\u010CABDARI\\u0106 ZORICA", {0}},
1827 {"\\u010CABI NANDOR", {0}},
1828 {"\\u010CABOVI\\u0106 MILAN", {0}},
1829 {"\\u010CABRADI AGNEZIJA", {0}},
1830 {"\\u010CABRADI IVAN", {0}},
1831 {"\\u010CABRADI JELENA", {0}},
1832 {"\\u010CABRADI LJUBICA", {0}},
1833 {"\\u010CABRADI STEVAN", {0}},
1834 {"\\u010CABRDA MARTIN", {0}},
1835 {"\\u010CABRILO BOGDAN", {0}},
1836 {"\\u010CABRILO BRANISLAV", {0}},
1837 {"\\u010CABRILO LAZAR", {0}},
1838 {"\\u010CABRILO LJUBICA", {0}},
1839 {"\\u010CABRILO SPASOJA", {0}},
1840 {"\\u010CADE\\u0160 ZDENKA", {0}},
1841 {"\\u010CADESKI BLAGOJE", {0}},
1842 {"\\u010CADOVSKI VLADIMIR", {0}},
1843 {"\\u010CAGLJEVI\\u0106 TOMA", {0}},
1844 {"\\u010CAGOROVI\\u0106 VLADIMIR", {0}},
1845 {"\\u010CAJA VANKA", {0}},
1846 {"\\u010CAJI\\u0106 BOGOLJUB", {0}},
1847 {"\\u010CAJI\\u0106 BORISLAV", {0}},
1848 {"\\u010CAJI\\u0106 RADOSLAV", {0}},
1849 {"\\u010CAK\\u0160IRAN MILADIN", {0}},
1850 {"\\u010CAKAN EUGEN", {0}},
1851 {"\\u010CAKAN EVGENIJE", {0}},
1852 {"\\u010CAKAN IVAN", {0}},
1853 {"\\u010CAKAN JULIJAN", {0}},
1854 {"\\u010CAKAN MIHAJLO", {0}},
1855 {"\\u010CAKAN STEVAN", {0}},
1856 {"\\u010CAKAN VLADIMIR", {0}},
1857 {"\\u010CAKAN VLADIMIR", {0}},
1858 {"\\u010CAKAN VLADIMIR", {0}},
1859 {"\\u010CAKARA ANA", {0}},
1860 {"\\u010CAKAREVI\\u0106 MOMIR", {0}},
1861 {"\\u010CAKAREVI\\u0106 NEDELJKO", {0}},
1862 {"\\u010CAKI \\u0160ANDOR", {0}},
1863 {"\\u010CAKI AMALIJA", {0}},
1864 {"\\u010CAKI ANDRA\\u0160", {0}},
1865 {"\\u010CAKI LADISLAV", {0}},
1866 {"\\u010CAKI LAJO\\u0160", {0}},
1867 {"\\u010CAKI LASLO", {0}}
1872 int32_t i
= 0, j
= 0, k
= 0, buffSize
= 0, skSize
= 0, lowerSize
= 0, upperSize
= 0;
1873 int32_t arraySize
= UPRV_LENGTHOF(tests
);
1875 (void)lowerSize
; // Suppress unused variable warnings.
1878 for(i
= 0; i
<arraySize
; i
++) {
1879 buffSize
= u_unescape(tests
[i
].original
, buffer
, 512);
1880 skSize
= coll
->getSortKey(buffer
, buffSize
, tests
[i
].key
, 512);
1883 qsort(tests
, arraySize
, sizeof(struct teststruct
), compare_teststruct
);
1885 for(i
= 0; i
< arraySize
-1; i
++) {
1886 for(j
= i
+1; j
< arraySize
; j
++) {
1887 lowerSize
= coll
->getBound(tests
[i
].key
, -1, UCOL_BOUND_LOWER
, 1, lower
, 512, status
);
1888 upperSize
= coll
->getBound(tests
[j
].key
, -1, UCOL_BOUND_UPPER
, 1, upper
, 512, status
);
1889 for(k
= i
; k
<= j
; k
++) {
1890 if(strcmp((const char *)lower
, (const char *)tests
[k
].key
) > 0) {
1891 errln("Problem with lower! j = %i (%s vs %s)", k
, tests
[k
].original
, tests
[i
].original
);
1893 if(strcmp((const char *)upper
, (const char *)tests
[k
].key
) <= 0) {
1894 errln("Problem with upper! j = %i (%s vs %s)", k
, tests
[k
].original
, tests
[j
].original
);
1901 for(i
= 0; i
<UPRV_LENGTHOF(test
); i
++) {
1902 buffSize
= u_unescape(test
[i
], buffer
, 512);
1903 skSize
= coll
->getSortKey(buffer
, buffSize
, sortkey
, 512);
1904 lowerSize
= ucol_getBound(sortkey
, skSize
, UCOL_BOUND_LOWER
, 1, lower
, 512, &status
);
1905 upperSize
= ucol_getBound(sortkey
, skSize
, UCOL_BOUND_UPPER_LONG
, 1, upper
, 512, &status
);
1906 for(j
= i
+1; j
<UPRV_LENGTHOF(test
); j
++) {
1907 buffSize
= u_unescape(test
[j
], buffer
, 512);
1908 skSize
= coll
->getSortKey(buffer
, buffSize
, sortkey
, 512);
1909 if(strcmp((const char *)lower
, (const char *)sortkey
) > 0) {
1910 errln("Problem with lower! i = %i, j = %i (%s vs %s)", i
, j
, test
[i
], test
[j
]);
1912 if(strcmp((const char *)upper
, (const char *)sortkey
) <= 0) {
1913 errln("Problem with upper! i = %i, j = %i (%s vs %s)", i
, j
, test
[i
], test
[j
]);
1921 void CollationAPITest::TestGetTailoredSet()
1925 const char *tests
[20];
1928 { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1929 { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1932 int32_t i
= 0, j
= 0;
1933 UErrorCode status
= U_ZERO_ERROR
;
1936 UnicodeSet
*set
= NULL
;
1938 for(i
= 0; i
< UPRV_LENGTHOF(setTest
); i
++) {
1939 buff
= UnicodeString(setTest
[i
].rules
, -1, US_INV
).unescape();
1940 RuleBasedCollator
coll(buff
, status
);
1941 if(U_SUCCESS(status
)) {
1942 set
= coll
.getTailoredSet(status
);
1943 if(set
->size() < setTest
[i
].testsize
) {
1944 errln("Tailored set size smaller (%d) than expected (%d)", set
->size(), setTest
[i
].testsize
);
1946 for(j
= 0; j
< setTest
[i
].testsize
; j
++) {
1947 buff
= UnicodeString(setTest
[i
].tests
[j
], -1, US_INV
).unescape();
1948 if(!set
->contains(buff
)) {
1949 errln("Tailored set doesn't contain %s... It should", setTest
[i
].tests
[j
]);
1954 errcheckln(status
, "Couldn't open collator with rules %s - %s", setTest
[i
].rules
, u_errorName(status
));
1959 void CollationAPITest::TestUClassID()
1961 char id
= *((char *)RuleBasedCollator::getStaticClassID());
1963 errln("Static class id for RuleBasedCollator should be 0");
1965 UErrorCode status
= U_ZERO_ERROR
;
1966 RuleBasedCollator
*coll
1967 = (RuleBasedCollator
*)Collator::createInstance(status
);
1968 if(U_FAILURE(status
)) {
1970 errcheckln(status
, "Collator creation failed with %s", u_errorName(status
));
1973 id
= *((char *)coll
->getDynamicClassID());
1975 errln("Dynamic class id for RuleBasedCollator should be 0");
1977 id
= *((char *)CollationKey::getStaticClassID());
1979 errln("Static class id for CollationKey should be 0");
1981 CollationKey
*key
= new CollationKey();
1982 id
= *((char *)key
->getDynamicClassID());
1984 errln("Dynamic class id for CollationKey should be 0");
1986 id
= *((char *)CollationElementIterator::getStaticClassID());
1988 errln("Static class id for CollationElementIterator should be 0");
1990 UnicodeString
str("testing");
1991 CollationElementIterator
*iter
= coll
->createCollationElementIterator(str
);
1992 id
= *((char *)iter
->getDynamicClassID());
1994 errln("Dynamic class id for CollationElementIterator should be 0");
2001 class TestCollator
: public Collator
2004 virtual Collator
* clone(void) const;
2006 using Collator::compare
;
2008 virtual UCollationResult
compare(const UnicodeString
& source
,
2009 const UnicodeString
& target
,
2010 UErrorCode
& status
) const;
2011 virtual UCollationResult
compare(const UnicodeString
& source
,
2012 const UnicodeString
& target
,
2014 UErrorCode
& status
) const;
2015 virtual UCollationResult
compare(const UChar
* source
,
2016 int32_t sourceLength
,
2017 const UChar
* target
,
2018 int32_t targetLength
,
2019 UErrorCode
& status
) const;
2020 virtual CollationKey
& getCollationKey(const UnicodeString
& source
,
2022 UErrorCode
& status
) const;
2023 virtual CollationKey
& getCollationKey(const UChar
*source
,
2024 int32_t sourceLength
,
2026 UErrorCode
& status
) const;
2027 virtual int32_t hashCode(void) const;
2028 virtual Locale
getLocale(ULocDataLocaleType type
, UErrorCode
& status
) const;
2029 virtual ECollationStrength
getStrength(void) const;
2030 virtual void setStrength(ECollationStrength newStrength
);
2031 virtual UClassID
getDynamicClassID(void) const;
2032 virtual void getVersion(UVersionInfo info
) const;
2033 virtual void setAttribute(UColAttribute attr
, UColAttributeValue value
,
2034 UErrorCode
&status
);
2035 virtual UColAttributeValue
getAttribute(UColAttribute attr
,
2036 UErrorCode
&status
) const;
2037 virtual uint32_t setVariableTop(const UChar
*varTop
, int32_t len
,
2038 UErrorCode
&status
);
2039 virtual uint32_t setVariableTop(const UnicodeString
&varTop
,
2040 UErrorCode
&status
);
2041 virtual void setVariableTop(uint32_t varTop
, UErrorCode
&status
);
2042 virtual uint32_t getVariableTop(UErrorCode
&status
) const;
2043 virtual int32_t getSortKey(const UnicodeString
& source
,
2045 int32_t resultLength
) const;
2046 virtual int32_t getSortKey(const UChar
*source
, int32_t sourceLength
,
2047 uint8_t*result
, int32_t resultLength
) const;
2048 virtual UnicodeSet
*getTailoredSet(UErrorCode
&status
) const;
2049 virtual UBool
operator==(const Collator
& other
) const;
2050 // Collator::operator!= calls !Collator::operator== which works for all subclasses.
2051 virtual void setLocales(const Locale
& requestedLocale
, const Locale
& validLocale
, const Locale
& actualLocale
);
2052 TestCollator() : Collator() {};
2053 TestCollator(UCollationStrength collationStrength
,
2054 UNormalizationMode decompositionMode
) : Collator(collationStrength
, decompositionMode
) {};
2057 inline UBool
TestCollator::operator==(const Collator
& other
) const {
2058 // TestCollator has no fields, so we test for identity.
2059 return this == &other
;
2061 // Normally, subclasses should do something like the following:
2062 // if (this == &other) { return TRUE; }
2063 // if (!Collator::operator==(other)) { return FALSE; } // not the same class
2065 // const TestCollator &o = (const TestCollator&)other;
2066 // (compare this vs. o's subclass fields)
2069 Collator
* TestCollator::clone() const
2071 return new TestCollator();
2074 UCollationResult
TestCollator::compare(const UnicodeString
& source
,
2075 const UnicodeString
& target
,
2076 UErrorCode
& status
) const
2078 if(U_SUCCESS(status
)) {
2079 return UCollationResult(source
.compare(target
));
2085 UCollationResult
TestCollator::compare(const UnicodeString
& source
,
2086 const UnicodeString
& target
,
2088 UErrorCode
& status
) const
2090 if(U_SUCCESS(status
)) {
2091 return UCollationResult(source
.compare(0, length
, target
));
2097 UCollationResult
TestCollator::compare(const UChar
* source
,
2098 int32_t sourceLength
,
2099 const UChar
* target
,
2100 int32_t targetLength
,
2101 UErrorCode
& status
) const
2103 UnicodeString
s(source
, sourceLength
);
2104 UnicodeString
t(target
, targetLength
);
2105 return compare(s
, t
, status
);
2108 CollationKey
& TestCollator::getCollationKey(const UnicodeString
& source
,
2110 UErrorCode
& status
) const
2114 length
= source
.extract(temp
, length
, NULL
, status
);
2116 CollationKey
tempkey((uint8_t*)temp
, length
);
2121 CollationKey
& TestCollator::getCollationKey(const UChar
*source
,
2122 int32_t sourceLength
,
2124 UErrorCode
& status
) const
2126 //s tack allocation used since collationkey does not keep the unicodestring
2127 UnicodeString
str(source
, sourceLength
);
2128 return getCollationKey(str
, key
, status
);
2131 int32_t TestCollator::getSortKey(const UnicodeString
& source
, uint8_t* result
,
2132 int32_t resultLength
) const
2134 UErrorCode status
= U_ZERO_ERROR
;
2135 int32_t length
= source
.extract((char *)result
, resultLength
, NULL
,
2141 int32_t TestCollator::getSortKey(const UChar
*source
, int32_t sourceLength
,
2142 uint8_t*result
, int32_t resultLength
) const
2144 UnicodeString
str(source
, sourceLength
);
2145 return getSortKey(str
, result
, resultLength
);
2148 int32_t TestCollator::hashCode() const
2153 Locale
TestCollator::getLocale(ULocDataLocaleType type
, UErrorCode
& status
) const
2155 // api not used, this is to make the compiler happy
2156 if (U_FAILURE(status
)) {
2162 Collator::ECollationStrength
TestCollator::getStrength() const
2167 void TestCollator::setStrength(Collator::ECollationStrength newStrength
)
2169 // api not used, this is to make the compiler happy
2173 UClassID
TestCollator::getDynamicClassID(void) const
2178 void TestCollator::getVersion(UVersionInfo info
) const
2180 // api not used, this is to make the compiler happy
2181 memset(info
, 0, U_MAX_VERSION_LENGTH
);
2184 void TestCollator::setAttribute(UColAttribute
/*attr*/, UColAttributeValue
/*value*/,
2185 UErrorCode
& /*status*/)
2189 UColAttributeValue
TestCollator::getAttribute(UColAttribute attr
,
2190 UErrorCode
&status
) const
2192 // api not used, this is to make the compiler happy
2193 if (U_FAILURE(status
) || attr
== UCOL_ATTRIBUTE_COUNT
) {
2196 return UCOL_DEFAULT
;
2199 uint32_t TestCollator::setVariableTop(const UChar
*varTop
, int32_t len
,
2202 // api not used, this is to make the compiler happy
2203 if (U_SUCCESS(status
) && (varTop
== 0 || len
< -1)) {
2204 status
= U_ILLEGAL_ARGUMENT_ERROR
;
2209 uint32_t TestCollator::setVariableTop(const UnicodeString
&varTop
,
2212 // api not used, this is to make the compiler happy
2213 if (U_SUCCESS(status
) && varTop
.length() == 0) {
2214 status
= U_ILLEGAL_ARGUMENT_ERROR
;
2219 void TestCollator::setVariableTop(uint32_t varTop
, UErrorCode
&status
)
2221 // api not used, this is to make the compiler happy
2222 if (U_SUCCESS(status
) && varTop
== 0) {
2223 status
= U_ILLEGAL_ARGUMENT_ERROR
;
2227 uint32_t TestCollator::getVariableTop(UErrorCode
&status
) const
2230 // api not used, this is to make the compiler happy
2231 if (U_SUCCESS(status
)) {
2234 return (uint32_t)(0xFFFFFFFFu
);
2237 UnicodeSet
* TestCollator::getTailoredSet(UErrorCode
&status
) const
2239 return Collator::getTailoredSet(status
);
2242 void TestCollator::setLocales(const Locale
& requestedLocale
, const Locale
& validLocale
, const Locale
& actualLocale
)
2244 Collator::setLocales(requestedLocale
, validLocale
, actualLocale
);
2248 void CollationAPITest::TestSubclass()
2252 doAssert(col1
!= col2
, "2 instances of TestCollator should be different");
2253 if (col1
.hashCode() != col2
.hashCode()) {
2254 errln("Every TestCollator has the same hashcode");
2256 UnicodeString
abc("abc", 3);
2257 UnicodeString
bcd("bcd", 3);
2258 if (col1
.compare(abc
, bcd
) != abc
.compare(bcd
)) {
2259 errln("TestCollator compare should be the same as the default "
2260 "string comparison");
2263 UErrorCode status
= U_ZERO_ERROR
;
2264 col1
.getCollationKey(abc
, key
, status
);
2266 const char* bytes
= (const char *)key
.getByteArray(length
);
2267 UnicodeString
keyarray(bytes
, length
, NULL
, status
);
2268 if (abc
!= keyarray
) {
2269 errln("TestCollator collationkey API is returning wrong values");
2272 UnicodeSet
expectedset(0, 0x10FFFF);
2273 UnicodeSet
*defaultset
= col1
.getTailoredSet(status
);
2274 if (!defaultset
->containsAll(expectedset
)
2275 || !expectedset
.containsAll(*defaultset
)) {
2276 errln("Error: expected default tailoring to be 0 to 0x10ffff");
2280 // use base class implementation
2281 Locale loc1
= Locale::getGermany();
2282 Locale loc2
= Locale::getFrance();
2283 col1
.setLocales(loc1
, loc2
, loc2
); // default implementation has no effect
2285 UnicodeString displayName
;
2286 col1
.getDisplayName(loc1
, loc2
, displayName
); // de_DE collator in fr_FR locale
2288 TestCollator
col3(UCOL_TERTIARY
, UNORM_NONE
);
2289 UnicodeString
a("a");
2290 UnicodeString
b("b");
2291 Collator::EComparisonResult result
= Collator::EComparisonResult(a
.compare(b
));
2292 if(col1
.compare(a
, b
) != result
) {
2293 errln("Collator doesn't give default result");
2295 if(col1
.compare(a
, b
, 1) != result
) {
2296 errln("Collator doesn't give default result");
2298 if(col1
.compare(a
.getBuffer(), a
.length(), b
.getBuffer(), b
.length()) != result
) {
2299 errln("Collator doesn't give default result");
2303 void CollationAPITest::TestNULLCharTailoring()
2305 UErrorCode status
= U_ZERO_ERROR
;
2306 UChar buf
[256] = {0};
2307 int32_t len
= u_unescape("&a < '\\u0000'", buf
, 256);
2308 UnicodeString
first((UChar
)0x0061);
2309 UnicodeString
second((UChar
)0);
2310 RuleBasedCollator
*coll
= new RuleBasedCollator(UnicodeString(buf
, len
), status
);
2311 if(U_FAILURE(status
)) {
2313 errcheckln(status
, "Failed to open collator - %s", u_errorName(status
));
2316 UCollationResult res
= coll
->compare(first
, second
, status
);
2317 if(res
!= UCOL_LESS
) {
2318 errln("a should be less then NULL after tailoring");
2323 void CollationAPITest::TestClone() {
2325 UErrorCode status
= U_ZERO_ERROR
;
2326 RuleBasedCollator
* c0
= (RuleBasedCollator
*)Collator::createInstance(status
);
2328 if (U_FAILURE(status
)) {
2329 errcheckln(status
, "Collator::CreateInstance(status) failed with %s", u_errorName(status
));
2333 c0
->setStrength(Collator::TERTIARY
);
2334 dump("c0", c0
, status
);
2337 RuleBasedCollator
* c1
= (RuleBasedCollator
*)Collator::createInstance(status
);
2338 c1
->setStrength(Collator::TERTIARY
);
2339 UColAttributeValue val
= c1
->getAttribute(UCOL_CASE_FIRST
, status
);
2340 if(val
== UCOL_LOWER_FIRST
){
2341 c1
->setAttribute(UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, status
);
2343 c1
->setAttribute(UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, status
);
2345 dump("c0", c0
, status
);
2346 dump("c1", c1
, status
);
2349 RuleBasedCollator
* c2
= (RuleBasedCollator
*)c1
->clone();
2350 val
= c2
->getAttribute(UCOL_CASE_FIRST
, status
);
2351 if(val
== UCOL_LOWER_FIRST
){
2352 c2
->setAttribute(UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, status
);
2354 c2
->setAttribute(UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, status
);
2356 if(U_FAILURE(status
)){
2357 errln("set and get attributes of collator failed. %s\n", u_errorName(status
));
2360 dump("c0", c0
, status
);
2361 dump("c1", c1
, status
);
2362 dump("c2", c2
, status
);
2364 errln("The cloned objects refer to same data");
2371 void CollationAPITest::TestCloneBinary() {
2372 IcuTestErrorCode
errorCode(*this, "TestCloneBinary");
2373 LocalPointer
<Collator
> root(Collator::createInstance(Locale::getRoot(), errorCode
));
2374 LocalPointer
<Collator
> coll(Collator::createInstance("de@collation=phonebook", errorCode
));
2375 if(errorCode
.errDataIfFailureAndReset("Collator::createInstance(de@collation=phonebook)")) {
2378 RuleBasedCollator
*rbRoot
= dynamic_cast<RuleBasedCollator
*>(root
.getAlias());
2379 RuleBasedCollator
*rbc
= dynamic_cast<RuleBasedCollator
*>(coll
.getAlias());
2380 if(rbRoot
== NULL
|| rbc
== NULL
) {
2381 infoln("root or de@collation=phonebook is not a RuleBasedCollator");
2384 rbc
->setAttribute(UCOL_STRENGTH
, UCOL_PRIMARY
, errorCode
);
2385 UnicodeString
uUmlaut((UChar
)0xfc);
2386 UnicodeString ue
= UNICODE_STRING_SIMPLE("ue");
2387 assertEquals("rbc/primary: u-umlaut==ue", (int32_t)UCOL_EQUAL
, rbc
->compare(uUmlaut
, ue
, errorCode
));
2389 int32_t binLength
= rbc
->cloneBinary(bin
, UPRV_LENGTHOF(bin
), errorCode
);
2390 if(errorCode
.errDataIfFailureAndReset("rbc->cloneBinary()")) {
2393 logln("rbc->cloneBinary() -> %d bytes", (int)binLength
);
2395 RuleBasedCollator
rbc2(bin
, binLength
, rbRoot
, errorCode
);
2396 if(errorCode
.errDataIfFailureAndReset("RuleBasedCollator(rbc binary)")) {
2399 assertEquals("rbc2.strength==primary", (int32_t)UCOL_PRIMARY
, rbc2
.getAttribute(UCOL_STRENGTH
, errorCode
));
2400 assertEquals("rbc2: u-umlaut==ue", (int32_t)UCOL_EQUAL
, rbc2
.compare(uUmlaut
, ue
, errorCode
));
2401 assertTrue("rbc==rbc2", *rbc
== rbc2
);
2402 uint8_t bin2
[25000];
2403 int32_t bin2Length
= rbc2
.cloneBinary(bin2
, UPRV_LENGTHOF(bin2
), errorCode
);
2404 assertEquals("len(rbc binary)==len(rbc2 binary)", binLength
, bin2Length
);
2405 assertTrue("rbc binary==rbc2 binary", binLength
== bin2Length
&& memcmp(bin
, bin2
, binLength
) == 0);
2407 RuleBasedCollator
rbc3(bin
, -1, rbRoot
, errorCode
);
2408 if(errorCode
.errDataIfFailureAndReset("RuleBasedCollator(rbc binary, length<0)")) {
2411 assertEquals("rbc3.strength==primary", (int32_t)UCOL_PRIMARY
, rbc3
.getAttribute(UCOL_STRENGTH
, errorCode
));
2412 assertEquals("rbc3: u-umlaut==ue", (int32_t)UCOL_EQUAL
, rbc3
.compare(uUmlaut
, ue
, errorCode
));
2413 assertTrue("rbc==rbc3", *rbc
== rbc3
);
2416 void CollationAPITest::TestIterNumeric() {
2417 // Regression test for ticket #9915.
2418 // The collation code sometimes masked the continuation marker away
2419 // but later tested the result for isContinuation().
2420 // This test case failed because the third bytes of the computed numeric-collation primaries
2421 // were permutated with the script reordering table.
2422 // It should have been possible to reproduce this with the root collator
2423 // and characters with appropriate 3-byte primary weights.
2424 // The effectiveness of this test depends completely on the collation elements
2425 // and on the implementation code.
2426 IcuTestErrorCode
errorCode(*this, "TestIterNumeric");
2427 RuleBasedCollator
coll(UnicodeString("[reorder Hang Hani]"), errorCode
);
2428 if(errorCode
.errDataIfFailureAndReset("RuleBasedCollator constructor")) {
2431 coll
.setAttribute(UCOL_NUMERIC_COLLATION
, UCOL_ON
, errorCode
);
2432 UCharIterator iter40
, iter72
;
2433 uiter_setUTF8(&iter40
, "\x34\x30", 2);
2434 uiter_setUTF8(&iter72
, "\x37\x32", 2);
2435 UCollationResult result
= coll
.compare(iter40
, iter72
, errorCode
);
2436 assertEquals("40<72", (int32_t)UCOL_LESS
, (int32_t)result
);
2439 void CollationAPITest::TestBadKeywords() {
2440 // Test locale IDs with errors.
2441 // Valid locale IDs are tested via data-driven tests.
2442 UErrorCode errorCode
= U_ZERO_ERROR
;
2443 Locale
bogusLocale(Locale::getRoot());
2444 bogusLocale
.setToBogus();
2445 LocalPointer
<Collator
> coll(Collator::createInstance(bogusLocale
, errorCode
));
2446 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2447 errln("Collator::createInstance(bogus locale) did not fail as expected - %s",
2448 u_errorName(errorCode
));
2452 const char *localeID
= "it-u-ks-xyz";
2453 errorCode
= U_ZERO_ERROR
;
2454 coll
.adoptInstead(Collator::createInstance(localeID
, errorCode
));
2455 if(errorCode
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2456 dataerrln("Collator::createInstance(%s) did not fail as expected - %s",
2457 localeID
, u_errorName(errorCode
));
2460 // Unsupported attributes.
2461 localeID
= "it@colHiraganaQuaternary=true";
2462 errorCode
= U_ZERO_ERROR
;
2463 coll
.adoptInstead(Collator::createInstance(localeID
, errorCode
));
2464 if(errorCode
!= U_UNSUPPORTED_ERROR
) {
2465 if (errorCode
== U_FILE_ACCESS_ERROR
) {
2466 dataerrln("Collator::createInstance(it@colHiraganaQuaternary=true) : %s", u_errorName(errorCode
));
2468 errln("Collator::createInstance(%s) did not fail as expected - %s",
2469 localeID
, u_errorName(errorCode
));
2473 localeID
= "it-u-vt-u24";
2474 errorCode
= U_ZERO_ERROR
;
2475 coll
.adoptInstead(Collator::createInstance(localeID
, errorCode
));
2476 if(errorCode
!= U_UNSUPPORTED_ERROR
) {
2477 if (errorCode
== U_ILLEGAL_ARGUMENT_ERROR
|| errorCode
== U_FILE_ACCESS_ERROR
) {
2478 dataerrln("Collator::createInstance(it-u-vt-u24) : %s", u_errorName(errorCode
));
2480 errln("Collator::createInstance(%s) did not fail as expected - %s",
2481 localeID
, u_errorName(errorCode
));
2486 void CollationAPITest::TestGapTooSmall() {
2487 IcuTestErrorCode
errorCode(*this, "TestGapTooSmall");
2488 // Try to tailor >20k characters into a too-small primary gap between symbols
2489 // that have 3-byte primary weights.
2490 // In FractionalUCA.txt:
2491 // 263A; [0C BA D0, 05, 05] # Zyyy So [084A.0020.0002] * WHITE SMILING FACE
2492 // 263B; [0C BA D7, 05, 05] # Zyyy So [084B.0020.0002] * BLACK SMILING FACE
2494 RuleBasedCollator(u
"&☺<*\u4E00-\u9FFF", errorCode
);
2495 if(errorCode
.isSuccess()) {
2496 errln("no exception for primary-gap overflow");
2497 } else if(errorCode
.get() == U_BUFFER_OVERFLOW_ERROR
) {
2498 // This is the expected error.
2499 // assertTrue("exception message mentions 'gap'", e.getMessage().contains("gap"));
2501 errln("unexpected error for primary-gap overflow: %s", errorCode
.errorName());
2506 // CLDR 32/ICU 60 FractionalUCA.txt makes room at the end of the symbols range
2507 // for several 2-byte primaries, or a large number of 3-byters.
2508 // The reset point is primary-before what should be
2509 // the special currency-first-primary contraction,
2510 // which is hopefully fairly stable, but not guaranteed stable.
2511 // In FractionalUCA.txt:
2512 // FDD1 20AC; [0D 70 02, 05, 05] # CURRENCY first primary
2514 RuleBasedCollator
coll(u
"&[before 1]\uFDD1€<*\u4E00-\u9FFF", errorCode
);
2515 assertTrue("tailored Han before currency", coll
.compare(u
"\u4E00", u
"$", errorCode
) < 0);
2516 errorCode
.errIfFailureAndReset(
2517 "unexpected exception for tailoring many characters at the end of symbols");
2521 void CollationAPITest::dump(UnicodeString msg
, RuleBasedCollator
* c
, UErrorCode
& status
) {
2522 const char* bigone
= "One";
2523 const char* littleone
= "one";
2525 logln(msg
+ " " + c
->compare(bigone
, littleone
) +
2526 " s: " + c
->getStrength() +
2527 " u: " + c
->getAttribute(UCOL_CASE_FIRST
, status
));
2529 void CollationAPITest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par */)
2531 if (exec
) logln("TestSuite CollationAPITest: ");
2532 TESTCASE_AUTO_BEGIN
;
2533 TESTCASE_AUTO(TestProperty
);
2534 TESTCASE_AUTO(TestKeywordValues
);
2535 TESTCASE_AUTO(TestOperators
);
2536 TESTCASE_AUTO(TestDuplicate
);
2537 TESTCASE_AUTO(TestCompare
);
2538 TESTCASE_AUTO(TestHashCode
);
2539 TESTCASE_AUTO(TestCollationKey
);
2540 TESTCASE_AUTO(TestElemIter
);
2541 TESTCASE_AUTO(TestGetAll
);
2542 TESTCASE_AUTO(TestRuleBasedColl
);
2543 TESTCASE_AUTO(TestDecomposition
);
2544 TESTCASE_AUTO(TestSafeClone
);
2545 TESTCASE_AUTO(TestSortKey
);
2546 TESTCASE_AUTO(TestSortKeyOverflow
);
2547 TESTCASE_AUTO(TestMaxExpansion
);
2548 TESTCASE_AUTO(TestDisplayName
);
2549 TESTCASE_AUTO(TestAttribute
);
2550 TESTCASE_AUTO(TestVariableTopSetting
);
2551 TESTCASE_AUTO(TestMaxVariable
);
2552 TESTCASE_AUTO(TestRules
);
2553 TESTCASE_AUTO(TestGetLocale
);
2554 TESTCASE_AUTO(TestBounds
);
2555 TESTCASE_AUTO(TestGetTailoredSet
);
2556 TESTCASE_AUTO(TestUClassID
);
2557 TESTCASE_AUTO(TestSubclass
);
2558 TESTCASE_AUTO(TestNULLCharTailoring
);
2559 TESTCASE_AUTO(TestClone
);
2560 TESTCASE_AUTO(TestCloneBinary
);
2561 TESTCASE_AUTO(TestIterNumeric
);
2562 TESTCASE_AUTO(TestBadKeywords
);
2563 TESTCASE_AUTO(TestGapTooSmall
);
2567 #endif /* #if !UCONFIG_NO_COLLATION */