1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************/
7 /*****************************************************************************
11 * Modification History:
13 * Madhu Katragadda Ported for C API
14 * Brian Rower Added TestOpenVsOpenRules
15 ******************************************************************************
16 *//* C API TEST For COLLATOR */
18 #include "unicode/utypes.h"
20 #if !UCONFIG_NO_COLLATION
25 #include "unicode/uloc.h"
26 #include "unicode/ulocdata.h"
27 #include "unicode/ustring.h"
28 #include "unicode/ures.h"
29 #include "unicode/ucoleitr.h"
38 static void TestAttribute(void);
39 static void TestDefault(void);
40 static void TestDefaultKeyword(void);
41 static void TestBengaliSortKey(void);
44 static char* U_EXPORT2
ucol_sortKeyToString(const UCollator
*coll
, const uint8_t *sortkey
, char *buffer
, uint32_t len
) {
45 uint32_t position
= 0;
48 if (position
+ 1 < len
)
49 position
+= sprintf(buffer
+ position
, "[");
50 while ((b
= *sortkey
++) != 0) {
51 if (b
== 1 && position
+ 5 < len
) {
52 position
+= sprintf(buffer
+ position
, "%02X . ", b
);
53 } else if (b
!= 1 && position
+ 3 < len
) {
54 position
+= sprintf(buffer
+ position
, "%02X ", b
);
57 if (position
+ 3 < len
)
58 position
+= sprintf(buffer
+ position
, "%02X]", b
);
62 void addCollAPITest(TestNode
** root
)
64 /* WEIVTODO: return tests here */
65 addTest(root
, &TestProperty
, "tscoll/capitst/TestProperty");
66 addTest(root
, &TestRuleBasedColl
, "tscoll/capitst/TestRuleBasedColl");
67 addTest(root
, &TestCompare
, "tscoll/capitst/TestCompare");
68 addTest(root
, &TestSortKey
, "tscoll/capitst/TestSortKey");
69 addTest(root
, &TestHashCode
, "tscoll/capitst/TestHashCode");
70 addTest(root
, &TestElemIter
, "tscoll/capitst/TestElemIter");
71 addTest(root
, &TestGetAll
, "tscoll/capitst/TestGetAll");
72 /*addTest(root, &TestGetDefaultRules, "tscoll/capitst/TestGetDefaultRules");*/
73 addTest(root
, &TestDecomposition
, "tscoll/capitst/TestDecomposition");
74 addTest(root
, &TestSafeClone
, "tscoll/capitst/TestSafeClone");
75 addTest(root
, &TestCloneBinary
, "tscoll/capitst/TestCloneBinary");
76 addTest(root
, &TestGetSetAttr
, "tscoll/capitst/TestGetSetAttr");
77 addTest(root
, &TestBounds
, "tscoll/capitst/TestBounds");
78 addTest(root
, &TestGetLocale
, "tscoll/capitst/TestGetLocale");
79 addTest(root
, &TestSortKeyBufferOverrun
, "tscoll/capitst/TestSortKeyBufferOverrun");
80 addTest(root
, &TestAttribute
, "tscoll/capitst/TestAttribute");
81 addTest(root
, &TestGetTailoredSet
, "tscoll/capitst/TestGetTailoredSet");
82 addTest(root
, &TestMergeSortKeys
, "tscoll/capitst/TestMergeSortKeys");
83 addTest(root
, &TestShortString
, "tscoll/capitst/TestShortString");
84 addTest(root
, &TestGetContractionsAndUnsafes
, "tscoll/capitst/TestGetContractionsAndUnsafes");
85 addTest(root
, &TestOpenBinary
, "tscoll/capitst/TestOpenBinary");
86 addTest(root
, &TestDefault
, "tscoll/capitst/TestDefault");
87 addTest(root
, &TestDefaultKeyword
, "tscoll/capitst/TestDefaultKeyword");
88 addTest(root
, &TestOpenVsOpenRules
, "tscoll/capitst/TestOpenVsOpenRules");
89 addTest(root
, &TestBengaliSortKey
, "tscoll/capitst/TestBengaliSortKey");
90 addTest(root
, &TestGetKeywordValuesForLocale
, "tscoll/capitst/TestGetKeywordValuesForLocale");
91 addTest(root
, &TestStrcollNull
, "tscoll/capitst/TestStrcollNull");
94 void TestGetSetAttr(void) {
95 UErrorCode status
= U_ZERO_ERROR
;
96 UCollator
*coll
= ucol_open(NULL
, &status
);
99 UColAttributeValue val
[5];
101 UColAttributeValue nonValue
;
103 {UCOL_FRENCH_COLLATION
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},
104 {UCOL_ALTERNATE_HANDLING
, {UCOL_NON_IGNORABLE
, UCOL_SHIFTED
}, 2, UCOL_OFF
},/* attribute for handling variable elements*/
105 {UCOL_CASE_FIRST
, {UCOL_OFF
, UCOL_LOWER_FIRST
, UCOL_UPPER_FIRST
}, 3, UCOL_SHIFTED
},/* who goes first, lower case or uppercase */
106 {UCOL_CASE_LEVEL
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},/* do we have an extra case level */
107 {UCOL_NORMALIZATION_MODE
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},/* attribute for normalization */
108 {UCOL_DECOMPOSITION_MODE
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},
109 {UCOL_STRENGTH
, {UCOL_PRIMARY
, UCOL_SECONDARY
, UCOL_TERTIARY
, UCOL_QUATERNARY
, UCOL_IDENTICAL
}, 5, UCOL_SHIFTED
},/* attribute for strength */
110 {UCOL_HIRAGANA_QUATERNARY_MODE
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},/* when turned on, this attribute */
112 UColAttribute currAttr
;
113 UColAttributeValue value
;
114 uint32_t i
= 0, j
= 0;
117 log_err_status(status
, "Unable to open collator. %s\n", u_errorName(status
));
120 for(i
= 0; i
<UPRV_LENGTHOF(attrs
); i
++) {
121 currAttr
= attrs
[i
].att
;
122 ucol_setAttribute(coll
, currAttr
, UCOL_DEFAULT
, &status
);
123 if(U_FAILURE(status
)) {
124 log_err_status(status
, "ucol_setAttribute with the default value returned error: %s\n", u_errorName(status
));
127 value
= ucol_getAttribute(coll
, currAttr
, &status
);
128 if(U_FAILURE(status
)) {
129 log_err("ucol_getAttribute returned error: %s\n", u_errorName(status
));
132 for(j
= 0; j
<attrs
[i
].valueSize
; j
++) {
133 ucol_setAttribute(coll
, currAttr
, attrs
[i
].val
[j
], &status
);
134 if(U_FAILURE(status
)) {
135 log_err("ucol_setAttribute with the value %i returned error: %s\n", attrs
[i
].val
[j
], u_errorName(status
));
139 status
= U_ZERO_ERROR
;
140 ucol_setAttribute(coll
, currAttr
, attrs
[i
].nonValue
, &status
);
141 if(U_SUCCESS(status
)) {
142 log_err("ucol_setAttribute with the bad value didn't return an error\n");
145 status
= U_ZERO_ERROR
;
147 ucol_setAttribute(coll
, currAttr
, value
, &status
);
148 if(U_FAILURE(status
)) {
149 log_err("ucol_setAttribute with the default valuereturned error: %s\n", u_errorName(status
));
153 status
= U_ZERO_ERROR
;
154 value
= ucol_getAttribute(coll
, UCOL_ATTRIBUTE_COUNT
, &status
);
155 if(U_SUCCESS(status
)) {
156 log_err("ucol_getAttribute for UCOL_ATTRIBUTE_COUNT didn't return an error\n");
158 status
= U_ZERO_ERROR
;
159 ucol_setAttribute(coll
, UCOL_ATTRIBUTE_COUNT
, UCOL_DEFAULT
, &status
);
160 if(U_SUCCESS(status
)) {
161 log_err("ucol_setAttribute for UCOL_ATTRIBUTE_COUNT didn't return an error\n");
163 status
= U_ZERO_ERROR
;
168 static void doAssert(int condition
, const char *message
)
171 log_err("ERROR : %s\n", message
);
175 #define UTF8_BUF_SIZE 128
177 static void doStrcoll(const UCollator
* coll
, const UChar
* src
, int32_t srcLen
, const UChar
* tgt
, int32_t tgtLen
,
178 UCollationResult expected
, const char *message
) {
179 UErrorCode err
= U_ZERO_ERROR
;
180 char srcU8
[UTF8_BUF_SIZE
], tgtU8
[UTF8_BUF_SIZE
];
181 int32_t srcU8Len
= -1, tgtU8Len
= -1;
184 if (ucol_strcoll(coll
, src
, srcLen
, tgt
, tgtLen
) != expected
) {
185 log_err("ERROR : %s\n", message
);
188 u_strToUTF8(srcU8
, UTF8_BUF_SIZE
, &len
, src
, srcLen
, &err
);
189 if (U_FAILURE(err
) || len
>= UTF8_BUF_SIZE
) {
190 log_err("ERROR : UTF-8 conversion error\n");
196 u_strToUTF8(tgtU8
, UTF8_BUF_SIZE
, &len
, tgt
, tgtLen
, &err
);
197 if (U_FAILURE(err
) || len
>= UTF8_BUF_SIZE
) {
198 log_err("ERROR : UTF-8 conversion error\n");
205 if (ucol_strcollUTF8(coll
, srcU8
, srcU8Len
, tgtU8
, tgtU8Len
, &err
) != expected
207 log_err("ERROR: %s (strcollUTF8)\n", message
);
212 /* We don't have default rules, at least not in the previous sense */
213 void TestGetDefaultRules(){
215 UErrorCode status
=U_ZERO_ERROR
;
216 UCollator
*coll
=NULL
;
217 int32_t len1
= 0, len2
=0;
218 uint8_t *binColData
= NULL
;
220 UResourceBundle
*res
= NULL
;
221 UResourceBundle
*binColl
= NULL
;
222 uint8_t *binResult
= NULL
;
225 const UChar
* defaultRulesArray
=ucol_getDefaultRulesArray(&size
);
226 log_verbose("Test the function ucol_getDefaultRulesArray()\n");
228 coll
= ucol_openRules(defaultRulesArray
, size
, UCOL_ON
, UCOL_PRIMARY
, &status
);
229 if(U_SUCCESS(status
) && coll
!=NULL
) {
230 binColData
= (uint8_t*)ucol_cloneRuleData(coll
, &len1
, &status
);
236 res
=ures_open(NULL
, "root", &status
);
237 if(U_FAILURE(status
)){
238 log_err("ERROR: Failed to get resource for \"root Locale\" with %s", myErrorName(status
));
241 binColl
=ures_getByKey(res
, "%%Collation", binColl
, &status
);
242 if(U_SUCCESS(status
)){
243 binResult
=(uint8_t*)ures_getBinary(binColl
, &len2
, &status
);
244 if(U_FAILURE(status
)){
245 log_err("ERROR: ures_getBinary() failed\n");
248 log_err("ERROR: ures_getByKey(locale(default), %%Collation) failed");
253 log_err("Error: ucol_getDefaultRulesArray() failed to return the correct length.\n");
255 if(memcmp(binColData
, binResult
, len1
) != 0){
256 log_err("Error: ucol_getDefaultRulesArray() failed\n");
267 /* Collator Properties
268 ucol_open, ucol_strcoll, getStrength/setStrength
269 getDecomposition/setDecomposition, getDisplayName*/
272 UCollator
*col
, *ruled
;
276 UChar source
[12], target
[12];
278 UErrorCode status
= U_ZERO_ERROR
;
280 * Expected version of the English collator.
281 * Currently, the major/minor version numbers change when the builder code
283 * number 2 is from the tailoring data version and
284 * number 3 is the UCA version.
285 * This changes with every UCA version change, and the expected value
286 * needs to be adjusted.
287 * Same in intltest/apicoll.cpp.
289 UVersionInfo currVersionArray
= {0x31, 0xC0, 0x05, 0x2A}; /* from ICU 4.4/UCA 5.2 */
290 UVersionInfo versionArray
= {0, 0, 0, 0};
291 UVersionInfo versionUCAArray
= {0, 0, 0, 0};
292 UVersionInfo versionUCDArray
= {0, 0, 0, 0};
294 log_verbose("The property tests begin : \n");
295 log_verbose("Test ucol_strcoll : \n");
296 col
= ucol_open("en_US", &status
);
297 if (U_FAILURE(status
)) {
298 log_err_status(status
, "Default Collator creation failed.: %s\n", myErrorName(status
));
302 ucol_getVersion(col
, versionArray
);
303 /* Check for a version greater than some value rather than equality
304 * so that we need not update the expected version each time. */
305 if (uprv_memcmp(versionArray
, currVersionArray
, 4)<0) {
306 log_err("Testing ucol_getVersion() - unexpected result: %02x.%02x.%02x.%02x\n",
307 versionArray
[0], versionArray
[1], versionArray
[2], versionArray
[3]);
309 log_info("ucol_getVersion() en_US result: %02x.%02x.%02x.%02x\n",
310 versionArray
[0], versionArray
[1], versionArray
[2], versionArray
[3]);
313 /* Assume that the UCD and UCA versions are the same,
314 * rather than hardcoding (and updating each time) a particular UCA version. */
315 u_getUnicodeVersion(versionUCDArray
);
316 ucol_getUCAVersion(col
, versionUCAArray
);
317 if (0!=uprv_memcmp(versionUCAArray
, versionUCDArray
, 4)) {
318 log_err("Testing ucol_getUCAVersion() - unexpected result: %hu.%hu.%hu.%hu\n",
319 versionUCAArray
[0], versionUCAArray
[1], versionUCAArray
[2], versionUCAArray
[3]);
322 u_uastrcpy(source
, "ab");
323 u_uastrcpy(target
, "abc");
325 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_LESS
, "ab < abc comparison failed");
327 u_uastrcpy(source
, "ab");
328 u_uastrcpy(target
, "AB");
330 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_LESS
, "ab < AB comparison failed");
332 u_uastrcpy(source
, "blackbird");
333 u_uastrcpy(target
, "black-bird");
335 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_GREATER
, "black-bird > blackbird comparison failed");
337 u_uastrcpy(source
, "black bird");
338 u_uastrcpy(target
, "black-bird");
340 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_LESS
, "black bird < black-bird comparison failed");
342 u_uastrcpy(source
, "Hello");
343 u_uastrcpy(target
, "hello");
345 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_GREATER
, "Hello > hello comparison failed");
347 log_verbose("Test ucol_strcoll ends.\n");
349 log_verbose("testing ucol_getStrength() method ...\n");
350 doAssert( (ucol_getStrength(col
) == UCOL_TERTIARY
), "collation object has the wrong strength");
351 doAssert( (ucol_getStrength(col
) != UCOL_PRIMARY
), "collation object's strength is primary difference");
353 log_verbose("testing ucol_setStrength() method ...\n");
354 ucol_setStrength(col
, UCOL_SECONDARY
);
355 doAssert( (ucol_getStrength(col
) != UCOL_TERTIARY
), "collation object's strength is secondary difference");
356 doAssert( (ucol_getStrength(col
) != UCOL_PRIMARY
), "collation object's strength is primary difference");
357 doAssert( (ucol_getStrength(col
) == UCOL_SECONDARY
), "collation object has the wrong strength");
360 log_verbose("Get display name for the default collation in German : \n");
362 len
=ucol_getDisplayName("en_US", "de_DE", NULL
, 0, &status
);
363 if(status
==U_BUFFER_OVERFLOW_ERROR
){
365 disName
=(UChar
*)malloc(sizeof(UChar
) * (len
+1));
366 ucol_getDisplayName("en_US", "de_DE", disName
, len
+1, &status
);
367 log_verbose("the display name for default collation in german: %s\n", austrdup(disName
) );
370 if(U_FAILURE(status
)){
371 log_err("ERROR: in getDisplayName: %s\n", myErrorName(status
));
374 log_verbose("Default collation getDisplayName ended.\n");
376 ruled
= ucol_open("da_DK", &status
);
377 if(U_FAILURE(status
)) {
378 log_data_err("ucol_open(\"da_DK\") failed - %s\n", u_errorName(status
));
382 log_verbose("ucol_getRules() testing ...\n");
383 rules
= ucol_getRules(ruled
, &tempLength
);
384 if(tempLength
== 0) {
385 log_data_err("missing da_DK tailoring rule string\n");
387 UChar aa
[2] = { 0x61, 0x61 };
388 doAssert(u_strFindFirst(rules
, tempLength
, aa
, 2) != NULL
,
389 "da_DK rules do not contain 'aa'");
391 log_verbose("getRules tests end.\n");
393 UChar
*buffer
= (UChar
*)malloc(200000*sizeof(UChar
));
394 int32_t bufLen
= 200000;
396 log_verbose("ucol_getRulesEx() testing ...\n");
397 tempLength
= ucol_getRulesEx(col
,UCOL_TAILORING_ONLY
,buffer
,bufLen
);
398 doAssert( tempLength
== 0x00, "getRulesEx() result incorrect" );
399 log_verbose("getRules tests end.\n");
401 log_verbose("ucol_getRulesEx() testing ...\n");
402 tempLength
=ucol_getRulesEx(col
,UCOL_FULL_RULES
,buffer
,bufLen
);
403 if(tempLength
== 0) {
404 log_data_err("missing *full* rule string\n");
406 log_verbose("getRulesEx tests end.\n");
412 log_verbose("open an collator for french locale");
413 col
= ucol_open("fr_FR", &status
);
414 if (U_FAILURE(status
)) {
415 log_err("ERROR: Creating French collation failed.: %s\n", myErrorName(status
));
418 ucol_setStrength(col
, UCOL_PRIMARY
);
419 log_verbose("testing ucol_getStrength() method again ...\n");
420 doAssert( (ucol_getStrength(col
) != UCOL_TERTIARY
), "collation object has the wrong strength");
421 doAssert( (ucol_getStrength(col
) == UCOL_PRIMARY
), "collation object's strength is not primary difference");
423 log_verbose("testing French ucol_setStrength() method ...\n");
424 ucol_setStrength(col
, UCOL_TERTIARY
);
425 doAssert( (ucol_getStrength(col
) == UCOL_TERTIARY
), "collation object's strength is not tertiary difference");
426 doAssert( (ucol_getStrength(col
) != UCOL_PRIMARY
), "collation object's strength is primary difference");
427 doAssert( (ucol_getStrength(col
) != UCOL_SECONDARY
), "collation object's strength is secondary difference");
430 log_verbose("Get display name for the french collation in english : \n");
431 len
=ucol_getDisplayName("fr_FR", "en_US", NULL
, 0, &status
);
432 if(status
==U_BUFFER_OVERFLOW_ERROR
){
434 disName
=(UChar
*)malloc(sizeof(UChar
) * (len
+1));
435 ucol_getDisplayName("fr_FR", "en_US", disName
, len
+1, &status
);
436 log_verbose("the display name for french collation in english: %s\n", austrdup(disName
) );
439 if(U_FAILURE(status
)){
440 log_err("ERROR: in getDisplayName: %s\n", myErrorName(status
));
443 log_verbose("Default collation getDisplayName ended.\n");
447 /* Test RuleBasedCollator and getRules*/
448 void TestRuleBasedColl()
450 UCollator
*col1
, *col2
, *col3
, *col4
;
451 UCollationElements
*iter1
, *iter2
;
455 const UChar
*rule1
, *rule2
, *rule3
, *rule4
;
457 UErrorCode status
= U_ZERO_ERROR
;
458 u_uastrcpy(ruleset1
, "&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
459 u_uastrcpy(ruleset2
, "&9 < a, A < b, B < c, C < d, D, e, E");
462 col1
= ucol_openRules(ruleset1
, u_strlen(ruleset1
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
463 if (U_FAILURE(status
)) {
464 log_err_status(status
, "RuleBased Collator creation failed.: %s\n", myErrorName(status
));
468 log_verbose("PASS: RuleBased Collator creation passed\n");
470 status
= U_ZERO_ERROR
;
471 col2
= ucol_openRules(ruleset2
, u_strlen(ruleset2
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
472 if (U_FAILURE(status
)) {
473 log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status
));
477 log_verbose("PASS: RuleBased Collator creation passed\n");
480 status
= U_ZERO_ERROR
;
481 col3
= ucol_open(NULL
, &status
);
482 if (U_FAILURE(status
)) {
483 log_err("Default Collator creation failed.: %s\n", myErrorName(status
));
487 log_verbose("PASS: Default Collator creation passed\n");
489 rule1
= ucol_getRules(col1
, &tempLength
);
490 rule2
= ucol_getRules(col2
, &tempLength
);
491 rule3
= ucol_getRules(col3
, &tempLength
);
493 doAssert((u_strcmp(rule1
, rule2
) != 0), "Default collator getRules failed");
494 doAssert((u_strcmp(rule2
, rule3
) != 0), "Default collator getRules failed");
495 doAssert((u_strcmp(rule1
, rule3
) != 0), "Default collator getRules failed");
497 col4
=ucol_openRules(rule2
, u_strlen(rule2
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
498 if (U_FAILURE(status
)) {
499 log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status
));
502 rule4
= ucol_getRules(col4
, &tempLength
);
503 doAssert((u_strcmp(rule2
, rule4
) == 0), "Default collator getRules failed");
510 /* tests that modifier ! is always ignored */
511 u_uastrcpy(ruleset1
, "!&a<b");
515 col1
= ucol_openRules(ruleset1
, u_strlen(ruleset1
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
516 if (U_FAILURE(status
)) {
517 log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status
));
520 col2
= ucol_open("en_US", &status
);
521 if (U_FAILURE(status
)) {
522 log_err("en_US Collator creation failed.: %s\n", myErrorName(status
));
525 iter1
= ucol_openElements(col1
, teststr
, 3, &status
);
526 iter2
= ucol_openElements(col2
, teststr
, 3, &status
);
527 if(U_FAILURE(status
)) {
528 log_err("ERROR: CollationElement iterator creation failed.: %s\n", myErrorName(status
));
532 /* testing with en since thai has its own tailoring */
533 uint32_t ce
= ucol_next(iter1
, &status
);
534 uint32_t ce2
= ucol_next(iter2
, &status
);
535 if(U_FAILURE(status
)) {
536 log_err("ERROR: CollationElement iterator creation failed.: %s\n", myErrorName(status
));
540 log_err("! modifier test failed");
542 if (ce
== UCOL_NULLORDER
) {
546 ucol_closeElements(iter1
);
547 ucol_closeElements(iter2
);
550 /* CLDR 24+ requires a reset before the first relation */
551 u_uastrcpy(ruleset1
, "< z < a");
552 col1
= ucol_openRules(ruleset1
, u_strlen(ruleset1
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
553 if (status
!= U_PARSE_ERROR
&& status
!= U_INVALID_FORMAT_ERROR
) {
554 log_err("ucol_openRules(without initial reset: '< z < a') "
555 "should fail with U_PARSE_ERROR or U_INVALID_FORMAT_ERROR but yielded %s\n",
556 myErrorName(status
));
563 UErrorCode status
= U_ZERO_ERROR
;
568 log_verbose("The compare tests begin : \n");
570 col
= ucol_open("en_US", &status
);
571 if(U_FAILURE(status
)) {
572 log_err_status(status
, "ucal_open() collation creation failed.: %s\n", myErrorName(status
));
575 test1
=(UChar
*)malloc(sizeof(UChar
) * 6);
576 test2
=(UChar
*)malloc(sizeof(UChar
) * 6);
577 u_uastrcpy(test1
, "Abcda");
578 u_uastrcpy(test2
, "abcda");
580 log_verbose("Use tertiary comparison level testing ....\n");
582 doAssert( (!ucol_equal(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" != \"abcda\" ");
583 doAssert( (ucol_greater(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" >>> \"abcda\" ");
584 doAssert( (ucol_greaterOrEqual(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" >>> \"abcda\"");
586 ucol_setStrength(col
, UCOL_SECONDARY
);
587 log_verbose("Use secondary comparison level testing ....\n");
589 doAssert( (ucol_equal(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
) )), "Result should be \"Abcda\" == \"abcda\"");
590 doAssert( (!ucol_greater(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" == \"abcda\"");
591 doAssert( (ucol_greaterOrEqual(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
) )), "Result should be \"Abcda\" == \"abcda\"");
593 ucol_setStrength(col
, UCOL_PRIMARY
);
594 log_verbose("Use primary comparison level testing ....\n");
596 doAssert( (ucol_equal(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" == \"abcda\"");
597 doAssert( (!ucol_greater(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" == \"abcda\"");
598 doAssert( (ucol_greaterOrEqual(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" == \"abcda\"");
601 log_verbose("The compare tests end.\n");
608 ---------------------------------------------
609 tests decomposition setting
611 void TestDecomposition() {
612 UErrorCode status
= U_ZERO_ERROR
;
613 UCollator
*en_US
, *el_GR
, *vi_VN
;
614 en_US
= ucol_open("en_US", &status
);
615 el_GR
= ucol_open("el_GR", &status
);
616 vi_VN
= ucol_open("vi_VN", &status
);
618 if (U_FAILURE(status
)) {
619 log_err_status(status
, "ERROR: collation creation failed.: %s\n", myErrorName(status
));
623 if (ucol_getAttribute(vi_VN
, UCOL_NORMALIZATION_MODE
, &status
) != UCOL_ON
||
626 log_err("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
629 status
= U_ZERO_ERROR
;
630 if (ucol_getAttribute(el_GR
, UCOL_NORMALIZATION_MODE
, &status
) != UCOL_ON
||
633 log_err("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
636 status
= U_ZERO_ERROR
;
637 if (ucol_getAttribute(en_US
, UCOL_NORMALIZATION_MODE
, &status
) != UCOL_OFF
||
640 log_err("ERROR: en_US collation had canonical decomposition for normalization!\n");
648 #define CLONETEST_COLLATOR_COUNT 4
650 void TestSafeClone() {
653 static const UChar umlautUStr
[] = {0x00DC, 0};
654 static const UChar oeStr
[] = {0x0055, 0x0045, 0};
655 UCollator
* someCollators
[CLONETEST_COLLATOR_COUNT
];
656 UCollator
* someClonedCollators
[CLONETEST_COLLATOR_COUNT
];
658 UErrorCode err
= U_ZERO_ERROR
;
659 int8_t idx
= 6; /* Leave this here to test buffer alingment in memory*/
660 uint8_t buffer
[CLONETEST_COLLATOR_COUNT
] [U_COL_SAFECLONE_BUFFERSIZE
];
661 int32_t bufferSize
= U_COL_SAFECLONE_BUFFERSIZE
;
662 const char sampleRuleChars
[] = "&Z < CH";
663 UChar sampleRule
[sizeof(sampleRuleChars
)];
665 u_uastrcpy(test1
, "abCda");
666 u_uastrcpy(test2
, "abcda");
667 u_uastrcpy(sampleRule
, sampleRuleChars
);
669 /* one default collator & two complex ones */
670 someCollators
[0] = ucol_open("en_US", &err
);
671 someCollators
[1] = ucol_open("ko", &err
);
672 someCollators
[2] = ucol_open("ja_JP", &err
);
673 someCollators
[3] = ucol_openRules(sampleRule
, -1, UCOL_ON
, UCOL_TERTIARY
, NULL
, &err
);
675 for (idx
= 0; idx
< CLONETEST_COLLATOR_COUNT
; idx
++) {
676 ucol_close(someCollators
[idx
]);
678 log_data_err("Couldn't open one or more collators\n");
682 /* Check the various error & informational states: */
684 /* Null status - just returns NULL */
685 if (NULL
!= ucol_safeClone(someCollators
[0], buffer
[0], &bufferSize
, NULL
))
687 log_err("FAIL: Cloned Collator failed to deal correctly with null status\n");
689 /* error status - should return 0 & keep error the same */
690 err
= U_MEMORY_ALLOCATION_ERROR
;
691 if (NULL
!= ucol_safeClone(someCollators
[0], buffer
[0], &bufferSize
, &err
) || err
!= U_MEMORY_ALLOCATION_ERROR
)
693 log_err("FAIL: Cloned Collator failed to deal correctly with incoming error status\n");
697 /* Null buffer size pointer is ok */
698 if (NULL
== (col
= ucol_safeClone(someCollators
[0], buffer
[0], NULL
, &err
)) || U_FAILURE(err
))
700 log_err("FAIL: Cloned Collator failed to deal correctly with null bufferSize pointer\n");
705 /* buffer size pointer is 0 - fill in pbufferSize with a size */
707 if (NULL
!= ucol_safeClone(someCollators
[0], buffer
[0], &bufferSize
, &err
) ||
708 U_FAILURE(err
) || bufferSize
<= 0)
710 log_err("FAIL: Cloned Collator failed a sizing request ('preflighting')\n");
712 /* Verify our define is large enough */
713 if (U_COL_SAFECLONE_BUFFERSIZE
< bufferSize
)
715 log_err("FAIL: Pre-calculated buffer size is too small\n");
717 /* Verify we can use this run-time calculated size */
718 if (NULL
== (col
= ucol_safeClone(someCollators
[0], buffer
[0], &bufferSize
, &err
)) || U_FAILURE(err
))
720 log_err("FAIL: Collator can't be cloned with run-time size\n");
722 if (col
) ucol_close(col
);
723 /* size one byte too small - should allocate & let us know */
724 if (bufferSize
> 1) {
727 if (NULL
== (col
= ucol_safeClone(someCollators
[0], 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
729 log_err("FAIL: Cloned Collator failed to deal correctly with too-small buffer size\n");
731 if (col
) ucol_close(col
);
733 bufferSize
= U_COL_SAFECLONE_BUFFERSIZE
;
736 /* Null buffer pointer - return Collator & set error to U_SAFECLONE_ALLOCATED_ERROR */
737 if (NULL
== (col
= ucol_safeClone(someCollators
[0], 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
739 log_err("FAIL: Cloned Collator failed to deal correctly with null buffer pointer\n");
741 if (col
) ucol_close(col
);
744 /* Null Collator - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
745 if (NULL
!= ucol_safeClone(NULL
, buffer
[0], &bufferSize
, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
747 log_err("FAIL: Cloned Collator failed to deal correctly with null Collator pointer\n");
752 /* Test that a cloned collator doesn't accidentally use UCA. */
753 col
=ucol_open("de@collation=phonebook", &err
);
754 bufferSize
= U_COL_SAFECLONE_BUFFERSIZE
;
755 someClonedCollators
[0] = ucol_safeClone(col
, buffer
[0], &bufferSize
, &err
);
756 doAssert( (ucol_greater(col
, umlautUStr
, u_strlen(umlautUStr
), oeStr
, u_strlen(oeStr
))), "Original German phonebook collation sorts differently than expected");
757 doAssert( (ucol_greater(someClonedCollators
[0], umlautUStr
, u_strlen(umlautUStr
), oeStr
, u_strlen(oeStr
))), "Cloned German phonebook collation sorts differently than expected");
758 if (!ucol_equals(someClonedCollators
[0], col
)) {
759 log_err("FAIL: Cloned German phonebook collator is not equal to original.\n");
762 ucol_close(someClonedCollators
[0]);
766 /* change orig & clone & make sure they are independent */
768 for (idx
= 0; idx
< CLONETEST_COLLATOR_COUNT
; idx
++)
770 ucol_setStrength(someCollators
[idx
], UCOL_IDENTICAL
);
773 ucol_close(ucol_safeClone(someCollators
[idx
], buffer
[idx
], &bufferSize
, &err
));
774 if (err
!= U_SAFECLONE_ALLOCATED_WARNING
) {
775 log_err("FAIL: collator number %d was not allocated.\n", idx
);
776 log_err("FAIL: status of Collator[%d] is %d (hex: %x).\n", idx
, err
, err
);
779 bufferSize
= U_COL_SAFECLONE_BUFFERSIZE
;
781 someClonedCollators
[idx
] = ucol_safeClone(someCollators
[idx
], buffer
[idx
], &bufferSize
, &err
);
782 if (U_FAILURE(err
)) {
783 log_err("FAIL: Unable to clone collator %d - %s\n", idx
, u_errorName(err
));
786 if (!ucol_equals(someClonedCollators
[idx
], someCollators
[idx
])) {
787 log_err("FAIL: Cloned collator is not equal to original at index = %d.\n", idx
);
790 /* Check the usability */
791 ucol_setStrength(someCollators
[idx
], UCOL_PRIMARY
);
792 ucol_setAttribute(someCollators
[idx
], UCOL_CASE_LEVEL
, UCOL_OFF
, &err
);
794 doAssert( (ucol_equal(someCollators
[idx
], test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"abcda\" == \"abCda\"");
796 /* Close the original to make sure that the clone is usable. */
797 ucol_close(someCollators
[idx
]);
799 ucol_setStrength(someClonedCollators
[idx
], UCOL_TERTIARY
);
800 ucol_setAttribute(someClonedCollators
[idx
], UCOL_CASE_LEVEL
, UCOL_OFF
, &err
);
801 doAssert( (ucol_greater(someClonedCollators
[idx
], test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"abCda\" >>> \"abcda\" ");
803 ucol_close(someClonedCollators
[idx
]);
807 void TestCloneBinary(){
808 UErrorCode err
= U_ZERO_ERROR
;
809 UCollator
* col
= ucol_open("en_US", &err
);
814 if (U_FAILURE(err
)) {
815 log_data_err("Couldn't open collator. Error: %s\n", u_errorName(err
));
819 size
= ucol_cloneBinary(col
, NULL
, 0, &err
);
820 if(size
==0 || err
!=U_BUFFER_OVERFLOW_ERROR
) {
821 log_err("ucol_cloneBinary - couldn't check size. Error: %s\n", u_errorName(err
));
826 buffer
= (uint8_t *) malloc(size
);
827 ucol_cloneBinary(col
, buffer
, size
, &err
);
829 log_err("ucol_cloneBinary - couldn't clone.. Error: %s\n", u_errorName(err
));
834 /* how to check binary result ? */
836 c
= ucol_openBinary(buffer
, size
, col
, &err
);
838 log_err("ucol_openBinary failed. Error: %s\n", u_errorName(err
));
840 UChar t
[] = {0x41, 0x42, 0x43, 0}; /* ABC */
843 l1
= ucol_getSortKey(col
, t
, -1, NULL
,0);
844 l2
= ucol_getSortKey(c
, t
, -1, NULL
,0);
845 k1
= (uint8_t *) malloc(sizeof(uint8_t) * l1
);
846 k2
= (uint8_t *) malloc(sizeof(uint8_t) * l2
);
847 ucol_getSortKey(col
, t
, -1, k1
, l1
);
848 ucol_getSortKey(col
, t
, -1, k2
, l2
);
849 if (strcmp((char *)k1
,(char *)k2
) != 0){
850 log_err("ucol_openBinary - new collator should equal to old one\n");
861 static void TestBengaliSortKey(void)
863 const char *curLoc
= "bn";
864 UChar str1
[] = { 0x09BE, 0 };
865 UChar str2
[] = { 0x0B70, 0 };
866 UCollator
*c2
= NULL
;
868 int32_t rulesLength
=-1;
870 int32_t sortKeyLen1
= 0;
872 int32_t sortKeyLen2
= 0;
873 UErrorCode status
= U_ZERO_ERROR
;
874 char sortKeyStr1
[2048];
875 uint32_t sortKeyStrLen1
= UPRV_LENGTHOF(sortKeyStr1
);
876 char sortKeyStr2
[2048];
877 uint32_t sortKeyStrLen2
= UPRV_LENGTHOF(sortKeyStr2
);
878 UCollationResult result
;
880 static UChar preRules
[41] = { 0x26, 0x9fa, 0x3c, 0x98c, 0x3c, 0x9e1, 0x3c, 0x98f, 0x3c, 0x990, 0x3c, 0x993, 0x3c, 0x994, 0x3c, 0x9bc, 0x3c, 0x982, 0x3c, 0x983, 0x3c, 0x981, 0x3c, 0x9b0, 0x3c, 0x9b8, 0x3c, 0x9b9, 0x3c, 0x9bd, 0x3c, 0x9be, 0x3c, 0x9bf, 0x3c, 0x9c8, 0x3c, 0x9cb, 0x3d, 0x9cb , 0};
884 log_verbose("Rules: %s\n", aescstrdup(rules
, rulesLength
));
886 c2
= ucol_openRules(rules
, rulesLength
, UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
887 if (U_FAILURE(status
)) {
888 log_data_err("ERROR: Creating collator from rules failed with locale: %s : %s\n", curLoc
, myErrorName(status
));
892 sortKeyLen1
= ucol_getSortKey(c2
, str1
, -1, NULL
, 0);
893 sortKey1
= (uint8_t*)malloc(sortKeyLen1
+1);
894 ucol_getSortKey(c2
,str1
,-1,sortKey1
, sortKeyLen1
+1);
895 ucol_sortKeyToString(c2
, sortKey1
, sortKeyStr1
, sortKeyStrLen1
);
898 sortKeyLen2
= ucol_getSortKey(c2
, str2
, -1, NULL
, 0);
899 sortKey2
= (uint8_t*)malloc(sortKeyLen2
+1);
900 ucol_getSortKey(c2
,str2
,-1,sortKey2
, sortKeyLen2
+1);
902 ucol_sortKeyToString(c2
, sortKey2
, sortKeyStr2
, sortKeyStrLen2
);
906 result
=ucol_strcoll(c2
, str1
, -1, str2
, -1);
907 if(result
!=UCOL_LESS
) {
908 log_err("Error: %s was not less than %s: result=%d.\n", aescstrdup(str1
,-1), aescstrdup(str2
,-1), result
);
909 log_info("[%s] -> %s (%d, from rule)\n", aescstrdup(str1
,-1), sortKeyStr1
, sortKeyLen1
);
910 log_info("[%s] -> %s (%d, from rule)\n", aescstrdup(str2
,-1), sortKeyStr2
, sortKeyLen2
);
912 log_verbose("OK: %s was less than %s: result=%d.\n", aescstrdup(str1
,-1), aescstrdup(str2
,-1), result
);
913 log_verbose("[%s] -> %s (%d, from rule)\n", aescstrdup(str1
,-1), sortKeyStr1
, sortKeyLen1
);
914 log_verbose("[%s] -> %s (%d, from rule)\n", aescstrdup(str2
,-1), sortKeyStr2
, sortKeyLen2
);
924 TestOpenVsOpenRules ensures that collators from ucol_open and ucol_openRules
925 will generate identical sort keys
927 void TestOpenVsOpenRules(){
929 /* create an array of all the locales */
930 int32_t numLocales
= uloc_countAvailable();
931 int32_t sizeOfStdSet
;
933 UChar str
[41]; /* create an array of UChar of size maximum strSize + 1 */
940 int32_t sortKeyLen1
, sortKeyLen2
;
941 uint8_t *sortKey1
= NULL
, *sortKey2
= NULL
;
942 char sortKeyStr1
[512], sortKeyStr2
[512];
943 uint32_t sortKeyStrLen1
= UPRV_LENGTHOF(sortKeyStr1
),
944 sortKeyStrLen2
= UPRV_LENGTHOF(sortKeyStr2
);
951 UErrorCode err
= U_ZERO_ERROR
;
953 /* create a set of standard characters that aren't very interesting...
954 and then we can find some interesting ones later */
956 stdSet
= uset_open(0x61, 0x7A);
957 uset_addRange(stdSet
, 0x41, 0x5A);
958 uset_addRange(stdSet
, 0x30, 0x39);
959 sizeOfStdSet
= uset_size(stdSet
);
960 (void)sizeOfStdSet
; /* Suppress set but not used warning. */
963 if(getTestOption(QUICK_OPTION
))
968 for(x
= 0; x
< numLocales
; x
+=adder
){
969 curLoc
= (char *)uloc_getAvailable(x
);
970 log_verbose("Processing %s\n", curLoc
);
972 /* create a collator the normal API way */
973 c1
= ucol_open(curLoc
, &err
);
974 if (U_FAILURE(err
)) {
975 log_err("ERROR: Normal collation creation failed with locale: %s : %s\n", curLoc
, myErrorName(err
));
980 rules
= ucol_getRules(c1
, &rulesLength
);
981 if (rulesLength
== 0) {
982 /* The optional tailoring rule string is either empty (boring) or missing. */
987 /* use those rules to create a collator from rules */
988 c2
= ucol_openRules(rules
, rulesLength
, UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &err
);
989 if (U_FAILURE(err
)) {
990 log_err("ERROR: Creating collator from rules failed with locale: %s : %s\n", curLoc
, myErrorName(err
));
995 uld
= ulocdata_open(curLoc
, &err
);
997 /*now that we have some collators, we get several strings */
999 for(y
= 0; y
< 5; y
++){
1001 /* get a set of ALL the characters in this locale */
1002 eSet
= ulocdata_getExemplarSet(uld
, NULL
, 0, ULOCDATA_ES_STANDARD
, &err
);
1003 eSize
= uset_size(eSet
);
1005 /* make a string with these characters in it */
1006 strSize
= (rand()%40
) + 1;
1008 for(z
= 0; z
< strSize
; z
++){
1009 str
[z
] = uset_charAt(eSet
, rand()%eSize
);
1012 /* change the set to only include 'abnormal' characters (not A-Z, a-z, 0-9 */
1013 uset_removeAll(eSet
, stdSet
);
1014 eSize
= uset_size(eSet
);
1016 /* if there are some non-normal characters left, put a few into the string, just to make sure we have some */
1018 str
[2%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1019 str
[3%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1020 str
[5%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1021 str
[10%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1022 str
[13%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1024 /* terminate the string */
1025 str
[strSize
-1] = '\0';
1026 log_verbose("String used: %S\n", str
);
1028 /* get sort keys for both of them, and check that the keys are identicle */
1029 sortKeyLen1
= ucol_getSortKey(c1
, str
, u_strlen(str
), NULL
, 0);
1030 sortKey1
= (uint8_t*)malloc(sizeof(uint8_t) * (sortKeyLen1
+ 1));
1031 /*memset(sortKey1, 0xFE, sortKeyLen1);*/
1032 ucol_getSortKey(c1
, str
, u_strlen(str
), sortKey1
, sortKeyLen1
+ 1);
1033 ucol_sortKeyToString(c1
, sortKey1
, sortKeyStr1
, sortKeyStrLen1
);
1035 sortKeyLen2
= ucol_getSortKey(c2
, str
, u_strlen(str
), NULL
, 0);
1036 sortKey2
= (uint8_t*)malloc(sizeof(uint8_t) * (sortKeyLen2
+ 1));
1037 /*memset(sortKey2, 0xFE, sortKeyLen2);*/
1038 ucol_getSortKey(c2
, str
, u_strlen(str
), sortKey2
, sortKeyLen2
+ 1);
1039 ucol_sortKeyToString(c2
, sortKey2
, sortKeyStr2
, sortKeyStrLen2
);
1041 /* Check that the lengths are the same */
1042 if (sortKeyLen1
!= sortKeyLen2
) {
1043 log_err("ERROR : Sort key lengths %d and %d for text '%s' in locale '%s' do not match.\n",
1044 sortKeyLen1
, sortKeyLen2
, str
, curLoc
);
1047 /* check that the keys are the same */
1048 if (memcmp(sortKey1
, sortKey2
, sortKeyLen1
) != 0) {
1049 log_err("ERROR : Sort keys '%s' and '%s' for text '%s' in locale '%s' are not equivalent.\n",
1050 sortKeyStr1
, sortKeyStr2
, str
, curLoc
);
1053 /* clean up after each string */
1058 /* clean up after each locale */
1059 ulocdata_close(uld
);
1063 /* final clean up */
1067 ----------------------------------------------------------------------------
1068 ctor -- Tests the getSortKey
1072 uint8_t *sortk1
= NULL
, *sortk2
= NULL
, *sortk3
= NULL
, *sortkEmpty
= NULL
;
1073 int32_t sortklen
, osortklen
;
1075 UChar
*test1
, *test2
, *test3
;
1076 UErrorCode status
= U_ZERO_ERROR
;
1077 char toStringBuffer
[256], *resultP
;
1078 uint32_t toStringLen
=UPRV_LENGTHOF(toStringBuffer
);
1081 uint8_t s1
[] = { 0x9f, 0x00 };
1082 uint8_t s2
[] = { 0x61, 0x00 };
1085 strcmpResult
= strcmp((const char *)s1
, (const char *)s2
);
1086 log_verbose("strcmp(0x9f..., 0x61...) = %d\n", strcmpResult
);
1088 if(strcmpResult
<= 0) {
1089 log_err("ERR: expected strcmp(\"9f 00\", \"61 00\") to be >=0 (GREATER).. got %d. Calling strcmp() for sortkeys may not work! \n",
1094 log_verbose("testing SortKey begins...\n");
1095 /* this is supposed to open default date format, but later on it treats it like it is "en_US"
1096 - very bad if you try to run the tests on machine where default locale is NOT "en_US" */
1097 /* col = ucol_open(NULL, &status); */
1098 col
= ucol_open("en_US", &status
);
1099 if (U_FAILURE(status
)) {
1100 log_err_status(status
, "ERROR: Default collation creation failed.: %s\n", myErrorName(status
));
1105 if(ucol_getStrength(col
) != UCOL_DEFAULT_STRENGTH
)
1107 log_err("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1109 /* Need to use identical strength */
1110 ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
1112 test1
=(UChar
*)malloc(sizeof(UChar
) * 6);
1113 test2
=(UChar
*)malloc(sizeof(UChar
) * 6);
1114 test3
=(UChar
*)malloc(sizeof(UChar
) * 6);
1116 memset(test1
,0xFE, sizeof(UChar
)*6);
1117 memset(test2
,0xFE, sizeof(UChar
)*6);
1118 memset(test3
,0xFE, sizeof(UChar
)*6);
1121 u_uastrcpy(test1
, "Abcda");
1122 u_uastrcpy(test2
, "abcda");
1123 u_uastrcpy(test3
, "abcda");
1125 log_verbose("Use tertiary comparison level testing ....\n");
1127 sortklen
=ucol_getSortKey(col
, test1
, u_strlen(test1
), NULL
, 0);
1128 sortk1
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1129 memset(sortk1
,0xFE, sortklen
);
1130 ucol_getSortKey(col
, test1
, u_strlen(test1
), sortk1
, sortklen
+1);
1132 sortklen
=ucol_getSortKey(col
, test2
, u_strlen(test2
), NULL
, 0);
1133 sortk2
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1134 memset(sortk2
,0xFE, sortklen
);
1135 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk2
, sortklen
+1);
1137 osortklen
= sortklen
;
1138 sortklen
=ucol_getSortKey(col
, test2
, u_strlen(test3
), NULL
, 0);
1139 sortk3
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1140 memset(sortk3
,0xFE, sortklen
);
1141 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk3
, sortklen
+1);
1143 doAssert( (sortklen
== osortklen
), "Sortkey length should be the same (abcda, abcda)");
1145 doAssert( (memcmp(sortk1
, sortk2
, sortklen
) > 0), "Result should be \"Abcda\" > \"abcda\"");
1146 doAssert( (memcmp(sortk2
, sortk1
, sortklen
) < 0), "Result should be \"abcda\" < \"Abcda\"");
1147 doAssert( (memcmp(sortk2
, sortk3
, sortklen
) == 0), "Result should be \"abcda\" == \"abcda\"");
1149 resultP
= ucol_sortKeyToString(col
, sortk3
, toStringBuffer
, toStringLen
);
1150 doAssert( (resultP
!= 0), "sortKeyToString failed!");
1152 #if 1 /* verobse log of sortkeys */
1158 strcpy(junk2
, "abcda[2] ");
1159 strcpy(junk3
, " abcda[3] ");
1161 for(i
=0;i
<sortklen
;i
++)
1163 sprintf(junk2
+strlen(junk2
), "%02X ",(int)( 0xFF & sortk2
[i
]));
1164 sprintf(junk3
+strlen(junk3
), "%02X ",(int)( 0xFF & sortk3
[i
]));
1167 log_verbose("%s\n", junk2
);
1168 log_verbose("%s\n", junk3
);
1176 log_verbose("Use secondary comparision level testing ...\n");
1177 ucol_setStrength(col
, UCOL_SECONDARY
);
1178 sortklen
=ucol_getSortKey(col
, test1
, u_strlen(test1
), NULL
, 0);
1179 sortk1
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1180 ucol_getSortKey(col
, test1
, u_strlen(test1
), sortk1
, sortklen
+1);
1181 sortklen
=ucol_getSortKey(col
, test2
, u_strlen(test2
), NULL
, 0);
1182 sortk2
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1183 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk2
, sortklen
+1);
1185 doAssert( !(memcmp(sortk1
, sortk2
, sortklen
) > 0), "Result should be \"Abcda\" == \"abcda\"");
1186 doAssert( !(memcmp(sortk2
, sortk1
, sortklen
) < 0), "Result should be \"abcda\" == \"Abcda\"");
1187 doAssert( (memcmp(sortk1
, sortk2
, sortklen
) == 0), "Result should be \"abcda\" == \"abcda\"");
1189 log_verbose("getting sortkey for an empty string\n");
1190 ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
1191 sortklen
= ucol_getSortKey(col
, test1
, 0, NULL
, 0);
1192 sortkEmpty
= (uint8_t*)malloc(sizeof(uint8_t) * sortklen
+1);
1193 sortklen
= ucol_getSortKey(col
, test1
, 0, sortkEmpty
, sortklen
+1);
1194 if(sortklen
!= 3 || sortkEmpty
[0] != 1 || sortkEmpty
[0] != 1 || sortkEmpty
[2] != 0) {
1195 log_err("Empty string generated wrong sortkey!\n");
1199 log_verbose("testing passing invalid string\n");
1200 sortklen
= ucol_getSortKey(col
, NULL
, 10, NULL
, 0);
1202 log_err("Invalid string didn't return sortkey size of 0\n");
1206 log_verbose("testing sortkey ends...\n");
1217 uint8_t *sortk1
, *sortk2
, *sortk3
;
1218 int32_t sortk1len
, sortk2len
, sortk3len
;
1220 UChar
*test1
, *test2
, *test3
;
1221 UErrorCode status
= U_ZERO_ERROR
;
1222 log_verbose("testing getHashCode begins...\n");
1223 col
= ucol_open("en_US", &status
);
1224 if (U_FAILURE(status
)) {
1225 log_err_status(status
, "ERROR: Default collation creation failed.: %s\n", myErrorName(status
));
1228 test1
=(UChar
*)malloc(sizeof(UChar
) * 6);
1229 test2
=(UChar
*)malloc(sizeof(UChar
) * 6);
1230 test3
=(UChar
*)malloc(sizeof(UChar
) * 6);
1231 u_uastrcpy(test1
, "Abcda");
1232 u_uastrcpy(test2
, "abcda");
1233 u_uastrcpy(test3
, "abcda");
1235 log_verbose("Use tertiary comparison level testing ....\n");
1236 sortk1len
=ucol_getSortKey(col
, test1
, u_strlen(test1
), NULL
, 0);
1237 sortk1
=(uint8_t*)malloc(sizeof(uint8_t) * (sortk1len
+1));
1238 ucol_getSortKey(col
, test1
, u_strlen(test1
), sortk1
, sortk1len
+1);
1239 sortk2len
=ucol_getSortKey(col
, test2
, u_strlen(test2
), NULL
, 0);
1240 sortk2
=(uint8_t*)malloc(sizeof(uint8_t) * (sortk2len
+1));
1241 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk2
, sortk2len
+1);
1242 sortk3len
=ucol_getSortKey(col
, test2
, u_strlen(test3
), NULL
, 0);
1243 sortk3
=(uint8_t*)malloc(sizeof(uint8_t) * (sortk3len
+1));
1244 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk3
, sortk3len
+1);
1247 log_verbose("ucol_hashCode() testing ...\n");
1249 doAssert( ucol_keyHashCode(sortk1
, sortk1len
) != ucol_keyHashCode(sortk2
, sortk2len
), "Hash test1 result incorrect" );
1250 doAssert( !(ucol_keyHashCode(sortk1
, sortk1len
) == ucol_keyHashCode(sortk2
, sortk2len
)), "Hash test2 result incorrect" );
1251 doAssert( ucol_keyHashCode(sortk2
, sortk2len
) == ucol_keyHashCode(sortk3
, sortk3len
), "Hash result not equal" );
1253 log_verbose("hashCode tests end.\n");
1265 *----------------------------------------------------------------------------
1266 * Tests the UCollatorElements API.
1272 int32_t order1
, order2
, order3
;
1273 UChar
*testString1
, *testString2
;
1275 UCollationElements
*iterator1
, *iterator2
, *iterator3
;
1276 UErrorCode status
= U_ZERO_ERROR
;
1277 log_verbose("testing UCollatorElements begins...\n");
1278 col
= ucol_open("en_US", &status
);
1279 ucol_setAttribute(col
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
1280 if (U_FAILURE(status
)) {
1281 log_err_status(status
, "ERROR: Default collation creation failed.: %s\n", myErrorName(status
));
1285 testString1
=(UChar
*)malloc(sizeof(UChar
) * 150);
1286 testString2
=(UChar
*)malloc(sizeof(UChar
) * 150);
1287 u_uastrcpy(testString1
, "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
1288 u_uastrcpy(testString2
, "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
1290 log_verbose("Constructors and comparison testing....\n");
1292 iterator1
= ucol_openElements(col
, testString1
, u_strlen(testString1
), &status
);
1293 if(U_FAILURE(status
)) {
1294 log_err("ERROR: Default collationElement iterator creation failed.: %s\n", myErrorName(status
));
1298 else{ log_verbose("PASS: Default collationElement iterator1 creation passed\n");}
1300 iterator2
= ucol_openElements(col
, testString1
, u_strlen(testString1
), &status
);
1301 if(U_FAILURE(status
)) {
1302 log_err("ERROR: Default collationElement iterator creation failed.: %s\n", myErrorName(status
));
1306 else{ log_verbose("PASS: Default collationElement iterator2 creation passed\n");}
1308 iterator3
= ucol_openElements(col
, testString2
, u_strlen(testString2
), &status
);
1309 if(U_FAILURE(status
)) {
1310 log_err("ERROR: Default collationElement iterator creation failed.: %s\n", myErrorName(status
));
1314 else{ log_verbose("PASS: Default collationElement iterator3 creation passed\n");}
1316 offset
=ucol_getOffset(iterator1
);
1317 (void)offset
; /* Suppress set but not used warning. */
1318 ucol_setOffset(iterator1
, 6, &status
);
1319 if (U_FAILURE(status
)) {
1320 log_err("Error in setOffset for UCollatorElements iterator.: %s\n", myErrorName(status
));
1323 if(ucol_getOffset(iterator1
)==6)
1324 log_verbose("setOffset and getOffset working fine\n");
1326 log_err("error in set and get Offset got %d instead of 6\n", ucol_getOffset(iterator1
));
1329 ucol_setOffset(iterator1
, 0, &status
);
1330 order1
= ucol_next(iterator1
, &status
);
1331 if (U_FAILURE(status
)) {
1332 log_err("Somehow ran out of memory stepping through the iterator1.: %s\n", myErrorName(status
));
1335 order2
=ucol_getOffset(iterator2
);
1336 doAssert((order1
!= order2
), "The first iterator advance failed");
1337 order2
= ucol_next(iterator2
, &status
);
1338 if (U_FAILURE(status
)) {
1339 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1342 order3
= ucol_next(iterator3
, &status
);
1343 if (U_FAILURE(status
)) {
1344 log_err("Somehow ran out of memory stepping through the iterator3.: %s\n", myErrorName(status
));
1348 doAssert((order1
== order2
), "The second iterator advance failed should be the same as first one");
1350 doAssert( (ucol_primaryOrder(order1
) == ucol_primaryOrder(order3
)), "The primary orders should be identical");
1351 doAssert( (ucol_secondaryOrder(order1
) == ucol_secondaryOrder(order3
)), "The secondary orders should be identical");
1352 doAssert( (ucol_tertiaryOrder(order1
) == ucol_tertiaryOrder(order3
)), "The tertiary orders should be identical");
1354 order1
=ucol_next(iterator1
, &status
);
1355 if (U_FAILURE(status
)) {
1356 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1359 order3
=ucol_next(iterator3
, &status
);
1360 if (U_FAILURE(status
)) {
1361 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1364 doAssert( (ucol_primaryOrder(order1
) == ucol_primaryOrder(order3
)), "The primary orders should be identical");
1365 doAssert( (ucol_tertiaryOrder(order1
) != ucol_tertiaryOrder(order3
)), "The tertiary orders should be different");
1367 order1
=ucol_next(iterator1
, &status
);
1368 if (U_FAILURE(status
)) {
1369 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1372 order3
=ucol_next(iterator3
, &status
);
1373 if (U_FAILURE(status
)) {
1374 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1377 /* this here, my friends, is either pure lunacy or something so obsolete that even it's mother
1378 * doesn't care about it. Essentialy, this test complains if secondary values for 'I' and '_'
1379 * are the same. According to the UCA, this is not true. Therefore, remove the test.
1380 * Besides, if primary strengths for two code points are different, it doesn't matter one bit
1381 * what is the relation between secondary or any other strengths.
1382 * killed by weiv 06/11/2002.
1385 doAssert( ((order1 & UCOL_SECONDARYMASK) != (order3 & UCOL_SECONDARYMASK)), "The secondary orders should be different");
1387 doAssert( (order1
!= UCOL_NULLORDER
), "Unexpected end of iterator reached");
1391 ucol_closeElements(iterator1
);
1392 ucol_closeElements(iterator2
);
1393 ucol_closeElements(iterator3
);
1396 log_verbose("testing CollationElementIterator ends...\n");
1399 void TestGetLocale() {
1400 UErrorCode status
= U_ZERO_ERROR
;
1401 const char *rules
= "&a<x<y<z";
1402 UChar rlz
[256] = {0};
1403 uint32_t rlzLen
= u_unescape(rules
, rlz
, 256);
1405 UCollator
*coll
= NULL
;
1406 const char *locale
= NULL
;
1410 static const struct {
1411 const char* requestedLocale
;
1412 const char* validLocale
;
1413 const char* actualLocale
;
1415 { "sr_RS", "sr_Cyrl_RS", "sr" },
1416 { "sh_YU", "sr_Latn_RS", "sr_Latn" }, /* was sh, then aliased to hr, now sr_Latn via import per cldrbug 5647: */
1417 { "en_BE_FOO", "en", "root" },
1418 { "sv_SE_NONEXISTANT", "sv", "sv" }
1421 /* test opening collators for different locales */
1422 for(i
= 0; i
<UPRV_LENGTHOF(testStruct
); i
++) {
1423 status
= U_ZERO_ERROR
;
1424 coll
= ucol_open(testStruct
[i
].requestedLocale
, &status
);
1425 if(U_FAILURE(status
)) {
1426 log_err_status(status
, "Failed to open collator for %s with %s\n", testStruct
[i
].requestedLocale
, u_errorName(status
));
1431 * The requested locale may be the same as the valid locale,
1432 * or may not be supported at all. See ticket #10477.
1434 locale
= ucol_getLocaleByType(coll
, ULOC_REQUESTED_LOCALE
, &status
);
1435 if(U_SUCCESS(status
) &&
1436 strcmp(locale
, testStruct
[i
].requestedLocale
) != 0 && strcmp(locale
, testStruct
[i
].validLocale
) != 0) {
1437 log_err("[Coll %s]: Error in requested locale, expected %s, got %s\n", testStruct
[i
].requestedLocale
, testStruct
[i
].requestedLocale
, locale
);
1439 status
= U_ZERO_ERROR
;
1440 locale
= ucol_getLocaleByType(coll
, ULOC_VALID_LOCALE
, &status
);
1441 if(strcmp(locale
, testStruct
[i
].validLocale
) != 0) {
1442 log_err("[Coll %s]: Error in valid locale, expected %s, got %s\n", testStruct
[i
].requestedLocale
, testStruct
[i
].validLocale
, locale
);
1444 locale
= ucol_getLocaleByType(coll
, ULOC_ACTUAL_LOCALE
, &status
);
1445 if(strcmp(locale
, testStruct
[i
].actualLocale
) != 0) {
1446 log_err("[Coll %s]: Error in actual locale, expected %s, got %s\n", testStruct
[i
].requestedLocale
, testStruct
[i
].actualLocale
, locale
);
1451 /* completely non-existent locale for collator should get a root collator */
1453 UCollator
*defaultColl
= ucol_open(NULL
, &status
);
1454 coll
= ucol_open("blahaha", &status
);
1455 if(U_SUCCESS(status
)) {
1456 /* See comment above about ticket #10477.
1457 if(strcmp(ucol_getLocaleByType(coll, ULOC_REQUESTED_LOCALE, &status), "blahaha")) {
1458 log_err("Nonexisting locale didn't preserve the requested locale\n");
1460 const char *name
= ucol_getLocaleByType(coll
, ULOC_VALID_LOCALE
, &status
);
1461 if(*name
!= 0 && strcmp(name
, "root") != 0) {
1462 log_err("Valid locale for nonexisting-locale collator is \"%s\" not root\n", name
);
1464 name
= ucol_getLocaleByType(coll
, ULOC_ACTUAL_LOCALE
, &status
);
1465 if(*name
!= 0 && strcmp(name
, "root") != 0) {
1466 log_err("Actual locale for nonexisting-locale collator is \"%s\" not root\n", name
);
1469 ucol_close(defaultColl
);
1471 log_data_err("Couldn't open collators\n");
1477 /* collator instantiated from rules should have all three locales NULL */
1478 coll
= ucol_openRules(rlz
, rlzLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
1480 locale
= ucol_getLocaleByType(coll
, ULOC_REQUESTED_LOCALE
, &status
);
1481 if(U_SUCCESS(status
) && locale
!= NULL
) {
1482 log_err("For collator instantiated from rules, requested locale returned %s instead of NULL\n", locale
);
1484 status
= U_ZERO_ERROR
;
1485 locale
= ucol_getLocaleByType(coll
, ULOC_VALID_LOCALE
, &status
);
1486 if(locale
!= NULL
) {
1487 log_err("For collator instantiated from rules, valid locale returned %s instead of NULL\n", locale
);
1489 locale
= ucol_getLocaleByType(coll
, ULOC_ACTUAL_LOCALE
, &status
);
1490 if(locale
!= NULL
) {
1491 log_err("For collator instantiated from rules, actual locale returned %s instead of NULL\n", locale
);
1495 log_data_err("Couldn't get collator from ucol_openRules() - %s\n", u_errorName(status
));
1503 count
=ucol_countAvailable();
1504 /* use something sensible w/o hardcoding the count */
1506 log_err("Error in countAvailable(), it returned %d\n", count
);
1509 log_verbose("PASS: countAvailable() successful, it returned %d\n", count
);
1511 for(i
=0;i
<count
;i
++)
1512 log_verbose("%s\n", ucol_getAvailable(i
));
1519 const char *original
;
1523 static int compare_teststruct(const void *string1
, const void *string2
) {
1524 return(strcmp((const char *)((struct teststruct
*)string1
)->key
, (const char *)((struct teststruct
*)string2
)->key
));
1528 UErrorCode status
= U_ZERO_ERROR
;
1530 UCollator
*coll
= ucol_open("sh", &status
);
1532 uint8_t sortkey
[512], lower
[512], upper
[512];
1535 static const char * const test
[] = {
1539 "j\\u00F6hn sm\\u00EFth",
1540 "J\\u00F6hn Sm\\u00EFth",
1541 "J\\u00D6HN SM\\u00CFTH",
1546 struct teststruct tests
[] = {
1547 {"\\u010CAKI MIHALJ" } ,
1548 {"\\u010CAKI MIHALJ" } ,
1549 {"\\u010CAKI PIRO\\u0160KA" },
1550 {"\\u010CABAI ANDRIJA" } ,
1551 {"\\u010CABAI LAJO\\u0160" } ,
1552 {"\\u010CABAI MARIJA" } ,
1553 {"\\u010CABAI STEVAN" } ,
1554 {"\\u010CABAI STEVAN" } ,
1555 {"\\u010CABARKAPA BRANKO" } ,
1556 {"\\u010CABARKAPA MILENKO" } ,
1557 {"\\u010CABARKAPA MIROSLAV" } ,
1558 {"\\u010CABARKAPA SIMO" } ,
1559 {"\\u010CABARKAPA STANKO" } ,
1560 {"\\u010CABARKAPA TAMARA" } ,
1561 {"\\u010CABARKAPA TOMA\\u0160" } ,
1562 {"\\u010CABDARI\\u0106 NIKOLA" } ,
1563 {"\\u010CABDARI\\u0106 ZORICA" } ,
1564 {"\\u010CABI NANDOR" } ,
1565 {"\\u010CABOVI\\u0106 MILAN" } ,
1566 {"\\u010CABRADI AGNEZIJA" } ,
1567 {"\\u010CABRADI IVAN" } ,
1568 {"\\u010CABRADI JELENA" } ,
1569 {"\\u010CABRADI LJUBICA" } ,
1570 {"\\u010CABRADI STEVAN" } ,
1571 {"\\u010CABRDA MARTIN" } ,
1572 {"\\u010CABRILO BOGDAN" } ,
1573 {"\\u010CABRILO BRANISLAV" } ,
1574 {"\\u010CABRILO LAZAR" } ,
1575 {"\\u010CABRILO LJUBICA" } ,
1576 {"\\u010CABRILO SPASOJA" } ,
1577 {"\\u010CADE\\u0160 ZDENKA" } ,
1578 {"\\u010CADESKI BLAGOJE" } ,
1579 {"\\u010CADOVSKI VLADIMIR" } ,
1580 {"\\u010CAGLJEVI\\u0106 TOMA" } ,
1581 {"\\u010CAGOROVI\\u0106 VLADIMIR" } ,
1582 {"\\u010CAJA VANKA" } ,
1583 {"\\u010CAJI\\u0106 BOGOLJUB" } ,
1584 {"\\u010CAJI\\u0106 BORISLAV" } ,
1585 {"\\u010CAJI\\u0106 RADOSLAV" } ,
1586 {"\\u010CAK\\u0160IRAN MILADIN" } ,
1587 {"\\u010CAKAN EUGEN" } ,
1588 {"\\u010CAKAN EVGENIJE" } ,
1589 {"\\u010CAKAN IVAN" } ,
1590 {"\\u010CAKAN JULIJAN" } ,
1591 {"\\u010CAKAN MIHAJLO" } ,
1592 {"\\u010CAKAN STEVAN" } ,
1593 {"\\u010CAKAN VLADIMIR" } ,
1594 {"\\u010CAKAN VLADIMIR" } ,
1595 {"\\u010CAKAN VLADIMIR" } ,
1596 {"\\u010CAKARA ANA" } ,
1597 {"\\u010CAKAREVI\\u0106 MOMIR" } ,
1598 {"\\u010CAKAREVI\\u0106 NEDELJKO" } ,
1599 {"\\u010CAKI \\u0160ANDOR" } ,
1600 {"\\u010CAKI AMALIJA" } ,
1601 {"\\u010CAKI ANDRA\\u0160" } ,
1602 {"\\u010CAKI LADISLAV" } ,
1603 {"\\u010CAKI LAJO\\u0160" } ,
1604 {"\\u010CAKI LASLO" } ,
1609 int32_t i
= 0, j
= 0, k
= 0, buffSize
= 0, skSize
= 0, lowerSize
= 0, upperSize
= 0;
1610 int32_t arraySize
= UPRV_LENGTHOF(tests
);
1612 if(U_SUCCESS(status
) && coll
) {
1613 for(i
= 0; i
<arraySize
; i
++) {
1614 buffSize
= u_unescape(tests
[i
].original
, buffer
, 512);
1615 skSize
= ucol_getSortKey(coll
, buffer
, buffSize
, tests
[i
].key
, 512);
1618 qsort(tests
, arraySize
, sizeof(struct teststruct
), compare_teststruct
);
1620 for(i
= 0; i
< arraySize
-1; i
++) {
1621 for(j
= i
+1; j
< arraySize
; j
++) {
1622 lowerSize
= ucol_getBound(tests
[i
].key
, -1, UCOL_BOUND_LOWER
, 1, lower
, 512, &status
);
1623 upperSize
= ucol_getBound(tests
[j
].key
, -1, UCOL_BOUND_UPPER
, 1, upper
, 512, &status
);
1624 (void)lowerSize
; /* Suppress set but not used warning. */
1626 for(k
= i
; k
<= j
; k
++) {
1627 if(strcmp((const char *)lower
, (const char *)tests
[k
].key
) > 0) {
1628 log_err("Problem with lower! j = %i (%s vs %s)\n", k
, tests
[k
].original
, tests
[i
].original
);
1630 if(strcmp((const char *)upper
, (const char *)tests
[k
].key
) <= 0) {
1631 log_err("Problem with upper! j = %i (%s vs %s)\n", k
, tests
[k
].original
, tests
[j
].original
);
1639 for(i
= 0; i
< 1000; i
++) {
1640 lowerRND
= (rand()/(RAND_MAX
/arraySize
));
1641 upperRND
= lowerRND
+ (rand()/(RAND_MAX
/(arraySize
-lowerRND
)));
1643 lowerSize
= ucol_getBound(tests
[lowerRND
].key
, -1, UCOL_BOUND_LOWER
, 1, lower
, 512, &status
);
1644 upperSize
= ucol_getBound(tests
[upperRND
].key
, -1, UCOL_BOUND_UPPER_LONG
, 1, upper
, 512, &status
);
1646 for(j
= lowerRND
; j
<=upperRND
; j
++) {
1647 if(strcmp(lower
, tests
[j
].key
) > 0) {
1648 log_err("Problem with lower! j = %i (%s vs %s)\n", j
, tests
[j
].original
, tests
[lowerRND
].original
);
1650 if(strcmp(upper
, tests
[j
].key
) <= 0) {
1651 log_err("Problem with upper! j = %i (%s vs %s)\n", j
, tests
[j
].original
, tests
[upperRND
].original
);
1661 for(i
= 0; i
<UPRV_LENGTHOF(test
); i
++) {
1662 buffSize
= u_unescape(test
[i
], buffer
, 512);
1663 skSize
= ucol_getSortKey(coll
, buffer
, buffSize
, sortkey
, 512);
1664 lowerSize
= ucol_getBound(sortkey
, skSize
, UCOL_BOUND_LOWER
, 1, lower
, 512, &status
);
1665 upperSize
= ucol_getBound(sortkey
, skSize
, UCOL_BOUND_UPPER_LONG
, 1, upper
, 512, &status
);
1666 for(j
= i
+1; j
<UPRV_LENGTHOF(test
); j
++) {
1667 buffSize
= u_unescape(test
[j
], buffer
, 512);
1668 skSize
= ucol_getSortKey(coll
, buffer
, buffSize
, sortkey
, 512);
1669 if(strcmp((const char *)lower
, (const char *)sortkey
) > 0) {
1670 log_err("Problem with lower! i = %i, j = %i (%s vs %s)\n", i
, j
, test
[i
], test
[j
]);
1672 if(strcmp((const char *)upper
, (const char *)sortkey
) <= 0) {
1673 log_err("Problem with upper! i = %i, j = %i (%s vs %s)\n", i
, j
, test
[i
], test
[j
]);
1679 log_data_err("Couldn't open collator\n");
1684 static void doOverrunTest(UCollator
*coll
, const UChar
*uString
, int32_t strLen
) {
1685 int32_t skLen
= 0, skLen2
= 0;
1686 uint8_t sortKey
[256];
1688 uint8_t filler
= 0xFF;
1690 skLen
= ucol_getSortKey(coll
, uString
, strLen
, NULL
, 0);
1692 for(i
= 0; i
< skLen
; i
++) {
1693 memset(sortKey
, filler
, 256);
1694 skLen2
= ucol_getSortKey(coll
, uString
, strLen
, sortKey
, i
);
1695 if(skLen
!= skLen2
) {
1696 log_err("For buffer size %i, got different sortkey length. Expected %i got %i\n", i
, skLen
, skLen2
);
1698 for(j
= i
; j
< 256; j
++) {
1699 if(sortKey
[j
] != filler
) {
1700 log_err("Something run over index %i\n", j
);
1707 /* j1865 reports that if a shorter buffer is passed to
1708 * to get sort key, a buffer overrun happens in some
1709 * cases. This test tries to check this.
1711 void TestSortKeyBufferOverrun(void) {
1712 UErrorCode status
= U_ZERO_ERROR
;
1713 const char* cString
= "A very Merry liTTle-lamB..";
1716 UCollator
*coll
= ucol_open("root", &status
);
1717 strLen
= u_unescape(cString
, uString
, 256);
1719 if(U_SUCCESS(status
)) {
1720 log_verbose("testing non ignorable\n");
1721 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
1722 doOverrunTest(coll
, uString
, strLen
);
1724 log_verbose("testing shifted\n");
1725 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
1726 doOverrunTest(coll
, uString
, strLen
);
1728 log_verbose("testing shifted quaternary\n");
1729 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
1730 doOverrunTest(coll
, uString
, strLen
);
1732 log_verbose("testing with french secondaries\n");
1733 ucol_setAttribute(coll
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
1734 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
1735 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
1736 doOverrunTest(coll
, uString
, strLen
);
1742 static void TestAttribute()
1744 UErrorCode error
= U_ZERO_ERROR
;
1745 UCollator
*coll
= ucol_open(NULL
, &error
);
1747 if (U_FAILURE(error
)) {
1748 log_err_status(error
, "Creation of default collator failed\n");
1752 ucol_setAttribute(coll
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &error
);
1753 if (ucol_getAttribute(coll
, UCOL_FRENCH_COLLATION
, &error
) != UCOL_OFF
||
1755 log_err_status(error
, "Setting and retrieving of the french collation failed\n");
1758 ucol_setAttribute(coll
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &error
);
1759 if (ucol_getAttribute(coll
, UCOL_FRENCH_COLLATION
, &error
) != UCOL_ON
||
1761 log_err_status(error
, "Setting and retrieving of the french collation failed\n");
1764 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &error
);
1765 if (ucol_getAttribute(coll
, UCOL_ALTERNATE_HANDLING
, &error
) != UCOL_SHIFTED
||
1767 log_err_status(error
, "Setting and retrieving of the alternate handling failed\n");
1770 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &error
);
1771 if (ucol_getAttribute(coll
, UCOL_ALTERNATE_HANDLING
, &error
) != UCOL_NON_IGNORABLE
||
1773 log_err_status(error
, "Setting and retrieving of the alternate handling failed\n");
1776 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, &error
);
1777 if (ucol_getAttribute(coll
, UCOL_CASE_FIRST
, &error
) != UCOL_LOWER_FIRST
||
1779 log_err_status(error
, "Setting and retrieving of the case first attribute failed\n");
1782 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, &error
);
1783 if (ucol_getAttribute(coll
, UCOL_CASE_FIRST
, &error
) != UCOL_UPPER_FIRST
||
1785 log_err_status(error
, "Setting and retrieving of the case first attribute failed\n");
1788 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, UCOL_ON
, &error
);
1789 if (ucol_getAttribute(coll
, UCOL_CASE_LEVEL
, &error
) != UCOL_ON
||
1791 log_err_status(error
, "Setting and retrieving of the case level attribute failed\n");
1794 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, UCOL_OFF
, &error
);
1795 if (ucol_getAttribute(coll
, UCOL_CASE_LEVEL
, &error
) != UCOL_OFF
||
1797 log_err_status(error
, "Setting and retrieving of the case level attribute failed\n");
1800 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &error
);
1801 if (ucol_getAttribute(coll
, UCOL_NORMALIZATION_MODE
, &error
) != UCOL_ON
||
1803 log_err_status(error
, "Setting and retrieving of the normalization on/off attribute failed\n");
1806 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &error
);
1807 if (ucol_getAttribute(coll
, UCOL_NORMALIZATION_MODE
, &error
) != UCOL_OFF
||
1809 log_err_status(error
, "Setting and retrieving of the normalization on/off attribute failed\n");
1812 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &error
);
1813 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_PRIMARY
||
1815 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1818 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_SECONDARY
, &error
);
1819 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_SECONDARY
||
1821 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1824 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &error
);
1825 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_TERTIARY
||
1827 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1830 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &error
);
1831 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_QUATERNARY
||
1833 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1836 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &error
);
1837 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_IDENTICAL
||
1839 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1845 void TestGetTailoredSet() {
1848 const char *tests
[20];
1851 { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1852 { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1855 int32_t i
= 0, j
= 0;
1856 UErrorCode status
= U_ZERO_ERROR
;
1859 UCollator
*coll
= NULL
;
1861 int32_t buffLen
= 0;
1864 for(i
= 0; i
< UPRV_LENGTHOF(setTest
); i
++) {
1865 buffLen
= u_unescape(setTest
[i
].rules
, buff
, 1024);
1866 coll
= ucol_openRules(buff
, buffLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, &pError
, &status
);
1867 if(U_SUCCESS(status
)) {
1868 set
= ucol_getTailoredSet(coll
, &status
);
1869 if(uset_size(set
) < setTest
[i
].testsize
) {
1870 log_err("Tailored set size smaller (%d) than expected (%d)\n", uset_size(set
), setTest
[i
].testsize
);
1872 for(j
= 0; j
< setTest
[i
].testsize
; j
++) {
1873 buffLen
= u_unescape(setTest
[i
].tests
[j
], buff
, 1024);
1874 if(!uset_containsString(set
, buff
, buffLen
)) {
1875 log_err("Tailored set doesn't contain %s... It should\n", setTest
[i
].tests
[j
]);
1880 log_err_status(status
, "Couldn't open collator with rules %s\n", setTest
[i
].rules
);
1886 static int tMemCmp(const uint8_t *first
, const uint8_t *second
) {
1887 int32_t firstLen
= (int32_t)strlen((const char *)first
);
1888 int32_t secondLen
= (int32_t)strlen((const char *)second
);
1889 return memcmp(first
, second
, uprv_min(firstLen
, secondLen
));
1891 static const char * strengthsC
[] = {
1899 void TestMergeSortKeys(void) {
1900 UErrorCode status
= U_ZERO_ERROR
;
1901 UCollator
*coll
= ucol_open("en", &status
);
1902 if(U_SUCCESS(status
)) {
1904 const char* cases
[] = {
1909 uint32_t casesSize
= UPRV_LENGTHOF(cases
);
1910 const char* prefix
= "foo";
1911 const char* suffix
= "egg";
1912 char outBuff1
[256], outBuff2
[256];
1914 uint8_t **sortkeys
= (uint8_t **)malloc(casesSize
*sizeof(uint8_t *));
1915 uint8_t **mergedPrefixkeys
= (uint8_t **)malloc(casesSize
*sizeof(uint8_t *));
1916 uint8_t **mergedSuffixkeys
= (uint8_t **)malloc(casesSize
*sizeof(uint8_t *));
1917 uint32_t *sortKeysLen
= (uint32_t *)malloc(casesSize
*sizeof(uint32_t));
1918 uint8_t prefixKey
[256], suffixKey
[256];
1919 uint32_t prefixKeyLen
= 0, suffixKeyLen
= 0, i
= 0;
1921 uint32_t unescapedLen
= 0, l1
= 0, l2
= 0;
1922 UColAttributeValue strength
;
1924 log_verbose("ucol_mergeSortkeys test\n");
1925 log_verbose("Testing order of the test cases\n");
1926 genericLocaleStarter("en", cases
, casesSize
);
1928 for(i
= 0; i
<casesSize
; i
++) {
1929 sortkeys
[i
] = (uint8_t *)malloc(256*sizeof(uint8_t));
1930 mergedPrefixkeys
[i
] = (uint8_t *)malloc(256*sizeof(uint8_t));
1931 mergedSuffixkeys
[i
] = (uint8_t *)malloc(256*sizeof(uint8_t));
1934 unescapedLen
= u_unescape(prefix
, buffer
, 256);
1935 prefixKeyLen
= ucol_getSortKey(coll
, buffer
, unescapedLen
, prefixKey
, 256);
1937 unescapedLen
= u_unescape(suffix
, buffer
, 256);
1938 suffixKeyLen
= ucol_getSortKey(coll
, buffer
, unescapedLen
, suffixKey
, 256);
1940 log_verbose("Massaging data with prefixes and different strengths\n");
1941 strength
= UCOL_PRIMARY
;
1942 while(strength
<= UCOL_IDENTICAL
) {
1943 log_verbose("Strength %s\n", strengthsC
[strength
<=UCOL_QUATERNARY
?strength
:4]);
1944 ucol_setAttribute(coll
, UCOL_STRENGTH
, strength
, &status
);
1945 for(i
= 0; i
<casesSize
; i
++) {
1946 unescapedLen
= u_unescape(cases
[i
], buffer
, 256);
1947 sortKeysLen
[i
] = ucol_getSortKey(coll
, buffer
, unescapedLen
, sortkeys
[i
], 256);
1948 ucol_mergeSortkeys(prefixKey
, prefixKeyLen
, sortkeys
[i
], sortKeysLen
[i
], mergedPrefixkeys
[i
], 256);
1949 ucol_mergeSortkeys(sortkeys
[i
], sortKeysLen
[i
], suffixKey
, suffixKeyLen
, mergedSuffixkeys
[i
], 256);
1951 if(tMemCmp(mergedPrefixkeys
[i
-1], mergedPrefixkeys
[i
]) >= 0) {
1952 log_err("Error while comparing prefixed keys @ strength %s:\n", strengthsC
[strength
<=UCOL_QUATERNARY
?strength
:4]);
1954 ucol_sortKeyToString(coll
, mergedPrefixkeys
[i
-1], outBuff1
, l1
),
1955 ucol_sortKeyToString(coll
, mergedPrefixkeys
[i
], outBuff2
, l2
));
1957 if(tMemCmp(mergedSuffixkeys
[i
-1], mergedSuffixkeys
[i
]) >= 0) {
1958 log_err("Error while comparing suffixed keys @ strength %s:\n", strengthsC
[strength
<=UCOL_QUATERNARY
?strength
:4]);
1960 ucol_sortKeyToString(coll
, mergedSuffixkeys
[i
-1], outBuff1
, l1
),
1961 ucol_sortKeyToString(coll
, mergedSuffixkeys
[i
], outBuff2
, l2
));
1965 if(strength
== UCOL_QUATERNARY
) {
1966 strength
= UCOL_IDENTICAL
;
1973 uint8_t smallBuf
[3];
1974 uint32_t reqLen
= 0;
1975 log_verbose("testing buffer overflow\n");
1976 reqLen
= ucol_mergeSortkeys(prefixKey
, prefixKeyLen
, suffixKey
, suffixKeyLen
, smallBuf
, 3);
1977 if(reqLen
!= (prefixKeyLen
+suffixKeyLen
)) {
1978 log_err("Wrong preflight size for merged sortkey\n");
1984 uint8_t emptyKey
[20], abcKey
[50], mergedKey
[100];
1985 int32_t emptyKeyLen
= 0, abcKeyLen
= 0, mergedKeyLen
= 0;
1987 log_verbose("testing merging with sortkeys generated for empty strings\n");
1988 emptyKeyLen
= ucol_getSortKey(coll
, &empty
, 0, emptyKey
, 20);
1989 unescapedLen
= u_unescape(cases
[0], buffer
, 256);
1990 abcKeyLen
= ucol_getSortKey(coll
, buffer
, unescapedLen
, abcKey
, 50);
1991 mergedKeyLen
= ucol_mergeSortkeys(emptyKey
, emptyKeyLen
, abcKey
, abcKeyLen
, mergedKey
, 100);
1992 if(mergedKey
[0] != 2) {
1993 log_err("Empty sortkey didn't produce a level separator\n");
1995 /* try with zeros */
1996 mergedKeyLen
= ucol_mergeSortkeys(emptyKey
, 0, abcKey
, abcKeyLen
, mergedKey
, 100);
1997 if(mergedKeyLen
!= 0 || mergedKey
[0] != 0) {
1998 log_err("Empty key didn't produce null mergedKey\n");
2000 mergedKeyLen
= ucol_mergeSortkeys(abcKey
, abcKeyLen
, emptyKey
, 0, mergedKey
, 100);
2001 if(mergedKeyLen
!= 0 || mergedKey
[0] != 0) {
2002 log_err("Empty key didn't produce null mergedKey\n");
2007 for(i
= 0; i
<casesSize
; i
++) {
2009 free(mergedPrefixkeys
[i
]);
2010 free(mergedSuffixkeys
[i
]);
2013 free(mergedPrefixkeys
);
2014 free(mergedSuffixkeys
);
2017 /* need to finish this up */
2019 log_data_err("Couldn't open collator");
2022 static void TestShortString(void)
2026 const char *expectedOutput
;
2028 UErrorCode expectedStatus
;
2029 int32_t expectedOffset
;
2030 uint32_t expectedIdentifier
;
2033 * Note: The first test case sets variableTop to the dollar sign '$'.
2034 * We have agreed to drop support for variableTop in ucol_getShortDefinitionString(),
2035 * related to ticket #10372 "deprecate collation APIs for short definition strings",
2036 * and because it did not work for most spaces/punctuation/symbols,
2037 * as documented in ticket #10386 "collation short definition strings issues":
2038 * The old code wrote only 3 hex digits for primary weights below 0x0FFF,
2039 * which is a syntax error, and then failed to normalize the result.
2041 * The "B2700" was removed from the expected result ("B2700_KPHONEBOOK_LDE").
2043 * Previously, this test had to be adjusted for root collator changes because the
2044 * primary weight of the variable top character naturally changed
2045 * but was baked into the expected result.
2047 {"LDE_RDE_KPHONEBOOK_T0024_ZLATN","KPHONEBOOK_LDE", "de@collation=phonebook", U_USING_FALLBACK_WARNING
, 0, 0 },
2049 {"LEN_RUS_NO_AS_S4","AS_LROOT_NO_S4", NULL
, U_USING_DEFAULT_WARNING
, 0, 0 },
2050 // uloc_canonicalize("de__PHONEBOOK") used to return "de@collation=phonebook"
2051 // and we got U_ZERO_ERROR.
2052 // Since ICU-20187 "drop support for long-obsolete locale ID variants..."
2053 // we actually load the "de__PHONEBOOK" bundle and fall back to "de".
2054 {"LDE_VPHONEBOOK_EO_SI","EO_KPHONEBOOK_LDE_SI", "de@collation=phonebook", U_USING_FALLBACK_WARNING
, 0, 0 },
2055 {"LDE_Kphonebook","KPHONEBOOK_LDE", "de@collation=phonebook", U_ZERO_ERROR
, 0, 0 },
2056 {"Xqde_DE@collation=phonebookq_S3_EX","KPHONEBOOK_LDE", "de@collation=phonebook", U_USING_FALLBACK_WARNING
, 0, 0 },
2057 {"LFR_FO", "FO_LROOT", NULL
, U_USING_DEFAULT_WARNING
, 0, 0 },
2058 {"SO_LX_AS", "", NULL
, U_ILLEGAL_ARGUMENT_ERROR
, 8, 0 },
2059 {"S3_ASS_MMM", "", NULL
, U_ILLEGAL_ARGUMENT_ERROR
, 5, 0 }
2063 UCollator
*coll
= NULL
, *fromNormalized
= NULL
;
2064 UParseError parseError
;
2065 UErrorCode status
= U_ZERO_ERROR
;
2066 char fromShortBuffer
[256], normalizedBuffer
[256], fromNormalizedBuffer
[256];
2067 const char* locale
= NULL
;
2070 for(i
= 0; i
< UPRV_LENGTHOF(testCases
); i
++) {
2071 status
= U_ZERO_ERROR
;
2072 if(testCases
[i
].locale
) {
2073 locale
= testCases
[i
].locale
;
2078 coll
= ucol_openFromShortString(testCases
[i
].input
, FALSE
, &parseError
, &status
);
2079 if(status
!= testCases
[i
].expectedStatus
) {
2080 log_err_status(status
, "Got status '%s' that is different from expected '%s' for '%s'\n",
2081 u_errorName(status
), u_errorName(testCases
[i
].expectedStatus
), testCases
[i
].input
);
2085 if(U_SUCCESS(status
)) {
2086 ucol_getShortDefinitionString(coll
, locale
, fromShortBuffer
, 256, &status
);
2088 if(strcmp(fromShortBuffer
, testCases
[i
].expectedOutput
)) {
2089 log_err("Got short string '%s' from the collator. Expected '%s' for input '%s'\n",
2090 fromShortBuffer
, testCases
[i
].expectedOutput
, testCases
[i
].input
);
2093 ucol_normalizeShortDefinitionString(testCases
[i
].input
, normalizedBuffer
, 256, &parseError
, &status
);
2094 fromNormalized
= ucol_openFromShortString(normalizedBuffer
, FALSE
, &parseError
, &status
);
2095 ucol_getShortDefinitionString(fromNormalized
, locale
, fromNormalizedBuffer
, 256, &status
);
2097 if(strcmp(fromShortBuffer
, fromNormalizedBuffer
)) {
2098 log_err("Strings obtained from collators instantiated by short string ('%s') and from normalized string ('%s') differ\n",
2099 fromShortBuffer
, fromNormalizedBuffer
);
2103 if(!ucol_equals(coll
, fromNormalized
)) {
2104 log_err("Collator from short string ('%s') differs from one obtained through a normalized version ('%s')\n",
2105 testCases
[i
].input
, normalizedBuffer
);
2108 ucol_close(fromNormalized
);
2112 if(parseError
.offset
!= testCases
[i
].expectedOffset
) {
2113 log_err("Got parse error offset %i, but expected %i instead for '%s'\n",
2114 parseError
.offset
, testCases
[i
].expectedOffset
, testCases
[i
].input
);
2122 doSetsTest(const char *locale
, const USet
*ref
, USet
*set
, const char* inSet
, const char* outSet
, UErrorCode
*status
) {
2123 UChar buffer
[65536];
2127 bufLen
= u_unescape(inSet
, buffer
, 512);
2128 uset_applyPattern(set
, buffer
, bufLen
, 0, status
);
2129 if(U_FAILURE(*status
)) {
2130 log_err("%s: Failure setting pattern %s\n", locale
, u_errorName(*status
));
2133 if(!uset_containsAll(ref
, set
)) {
2134 log_err("%s: Some stuff from %s is not present in the set\n", locale
, inSet
);
2135 uset_removeAll(set
, ref
);
2136 bufLen
= uset_toPattern(set
, buffer
, UPRV_LENGTHOF(buffer
), TRUE
, status
);
2137 log_info(" missing: %s\n", aescstrdup(buffer
, bufLen
));
2138 bufLen
= uset_toPattern(ref
, buffer
, UPRV_LENGTHOF(buffer
), TRUE
, status
);
2139 log_info(" total: size=%i %s\n", uset_getItemCount(ref
), aescstrdup(buffer
, bufLen
));
2143 bufLen
= u_unescape(outSet
, buffer
, 512);
2144 uset_applyPattern(set
, buffer
, bufLen
, 0, status
);
2145 if(U_FAILURE(*status
)) {
2146 log_err("%s: Failure setting pattern %s\n", locale
, u_errorName(*status
));
2149 if(!uset_containsNone(ref
, set
)) {
2150 log_err("%s: Some stuff from %s is present in the set\n", locale
, outSet
);
2158 TestGetContractionsAndUnsafes(void)
2162 const char* inConts
;
2163 const char* outConts
;
2166 const char* unsafeCodeUnits
;
2167 const char* safeCodeUnits
;
2170 "[{\\u0418\\u0306}{\\u0438\\u0306}]",
2175 "[aAbB\\u0430\\u0410\\u0433\\u0413]"
2178 "[{\\u0406\\u0308}{\\u0456\\u0308}{\\u0418\\u0306}{\\u0438\\u0306}]",
2179 "[\\u0407\\u0419\\u0439\\u0457]",
2182 "[\\u0406\\u0456\\u0418\\u0438]",
2186 "[{C\\u0301}{C\\u030C}{C\\u0341}{DZ\\u030C}{Dz\\u030C}{D\\u017D}{D\\u017E}{lj}{nj}]",
2187 "[{\\u309d\\u3099}{\\u30fd\\u3099}]",
2195 * The "collv2" builder omits mappings if the collator maps their
2196 * character sequences to the same CEs.
2197 * For example, it omits Japanese contractions for NFD forms
2198 * of the voiced iteration mark (U+309E = U+309D + U+3099), such as
2199 * {\\u3053\\u3099\\u309D\\u3099}{\\u3053\\u309D\\u3099}
2200 * {\\u30B3\\u3099\\u30FD\\u3099}{\\u30B3\\u30FD\\u3099}.
2201 * It does add mappings for the precomposed forms.
2203 "[{\\u3053\\u3099\\u309D}{\\u3053\\u3099\\u309E}{\\u3053\\u3099\\u30FC}"
2204 "{\\u3053\\u309D}{\\u3053\\u309E}{\\u3053\\u30FC}"
2205 "{\\u30B3\\u3099\\u30FC}{\\u30B3\\u3099\\u30FD}{\\u30B3\\u3099\\u30FE}"
2206 "{\\u30B3\\u30FC}{\\u30B3\\u30FD}{\\u30B3\\u30FE}]",
2207 "[{\\u30FD\\u3099}{\\u309D\\u3099}{\\u3053\\u3099}{\\u30B3\\u3099}{lj}{nj}]",
2215 UErrorCode status
= U_ZERO_ERROR
;
2216 UCollator
*coll
= NULL
;
2218 int32_t noConts
= 0;
2219 USet
*conts
= uset_open(0,0);
2220 USet
*exp
= uset_open(0, 0);
2221 USet
*set
= uset_open(0,0);
2222 int32_t setBufferLen
= 65536;
2223 UChar buffer
[65536];
2226 for(i
= 0; i
< UPRV_LENGTHOF(tests
); i
++) {
2227 log_verbose("Testing locale: %s\n", tests
[i
].locale
);
2228 coll
= ucol_open(tests
[i
].locale
, &status
);
2229 if (coll
== NULL
|| U_FAILURE(status
)) {
2230 log_err_status(status
, "Unable to open collator for locale %s ==> %s\n", tests
[i
].locale
, u_errorName(status
));
2233 ucol_getContractionsAndExpansions(coll
, conts
, exp
, TRUE
, &status
);
2234 doSetsTest(tests
[i
].locale
, conts
, set
, tests
[i
].inConts
, tests
[i
].outConts
, &status
);
2235 setLen
= uset_toPattern(conts
, buffer
, setBufferLen
, TRUE
, &status
);
2236 if(U_SUCCESS(status
)) {
2237 /*log_verbose("Contractions %i: %s\n", uset_getItemCount(conts), aescstrdup(buffer, setLen));*/
2239 log_err("error %s. %i\n", u_errorName(status
), setLen
);
2240 status
= U_ZERO_ERROR
;
2242 doSetsTest(tests
[i
].locale
, exp
, set
, tests
[i
].inExp
, tests
[i
].outExp
, &status
);
2243 setLen
= uset_toPattern(exp
, buffer
, setBufferLen
, TRUE
, &status
);
2244 if(U_SUCCESS(status
)) {
2245 /*log_verbose("Expansions %i: %s\n", uset_getItemCount(exp), aescstrdup(buffer, setLen));*/
2247 log_err("error %s. %i\n", u_errorName(status
), setLen
);
2248 status
= U_ZERO_ERROR
;
2251 noConts
= ucol_getUnsafeSet(coll
, conts
, &status
);
2252 (void)noConts
; /* Suppress set but not used warning */
2253 doSetsTest(tests
[i
].locale
, conts
, set
, tests
[i
].unsafeCodeUnits
, tests
[i
].safeCodeUnits
, &status
);
2254 setLen
= uset_toPattern(conts
, buffer
, setBufferLen
, TRUE
, &status
);
2255 if(U_SUCCESS(status
)) {
2256 log_verbose("Unsafe %i: %s\n", uset_getItemCount(exp
), aescstrdup(buffer
, setLen
));
2258 log_err("error %s. %i\n", u_errorName(status
), setLen
);
2259 status
= U_ZERO_ERROR
;
2272 TestOpenBinary(void)
2275 * ucol_openBinary() documents:
2276 * "The API also takes a base collator which usually should be UCA."
2278 * "Currently it cannot be NULL."
2280 * However, the check for NULL was commented out in ICU 3.4 (r18149).
2281 * Ticket #4355 requested "Make collation work with minimal data.
2282 * Optionally without UCA, with relevant parts of UCA copied into the tailoring table."
2284 * The ICU team agreed with ticket #10517 "require base collator in ucol_openBinary() etc."
2285 * to require base!=NULL again.
2287 #define OPEN_BINARY_ACCEPTS_NULL_BASE 0
2288 UErrorCode status
= U_ZERO_ERROR
;
2290 char rule[] = "&h < d < c < b";
2291 char *wUCA[] = { "a", "h", "d", "c", "b", "i" };
2292 char *noUCA[] = {"d", "c", "b", "a", "h", "i" };
2294 /* we have to use Cyrillic letters because latin-1 always gets copied */
2295 const char rule
[] = "&\\u0452 < \\u0434 < \\u0433 < \\u0432"; /* &dje < d < g < v */
2296 const char *wUCA
[] = { "\\u0430", "\\u0452", "\\u0434", "\\u0433", "\\u0432", "\\u0435" }; /* a, dje, d, g, v, e */
2297 #if OPEN_BINARY_ACCEPTS_NULL_BASE
2298 const char *noUCA
[] = {"\\u0434", "\\u0433", "\\u0432", "\\u0430", "\\u0435", "\\u0452" }; /* d, g, v, a, e, dje */
2302 int32_t uRulesLen
= u_unescape(rule
, uRules
, 256);
2304 UCollator
*coll
= ucol_openRules(uRules
, uRulesLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2305 UCollator
*UCA
= NULL
;
2306 UCollator
*cloneNOUCA
= NULL
, *cloneWUCA
= NULL
;
2308 uint8_t imageBuffer
[32768];
2309 uint8_t *image
= imageBuffer
;
2310 int32_t imageBufferCapacity
= 32768;
2314 if((coll
==NULL
)||(U_FAILURE(status
))) {
2315 log_data_err("could not load collators or error occured: %s\n",
2316 u_errorName(status
));
2319 UCA
= ucol_open("root", &status
);
2320 if((UCA
==NULL
)||(U_FAILURE(status
))) {
2321 log_data_err("could not load UCA collator or error occured: %s\n",
2322 u_errorName(status
));
2325 imageSize
= ucol_cloneBinary(coll
, image
, imageBufferCapacity
, &status
);
2326 if(U_FAILURE(status
)) {
2327 image
= (uint8_t *)malloc(imageSize
*sizeof(uint8_t));
2328 status
= U_ZERO_ERROR
;
2329 imageSize
= ucol_cloneBinary(coll
, imageBuffer
, imageSize
, &status
);
2333 cloneWUCA
= ucol_openBinary(image
, imageSize
, UCA
, &status
);
2334 cloneNOUCA
= ucol_openBinary(image
, imageSize
, NULL
, &status
);
2335 #if !OPEN_BINARY_ACCEPTS_NULL_BASE
2336 if(status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2337 log_err("ucol_openBinary(base=NULL) unexpectedly did not fail - %s\n", u_errorName(status
));
2341 genericOrderingTest(coll
, wUCA
, UPRV_LENGTHOF(wUCA
));
2343 genericOrderingTest(cloneWUCA
, wUCA
, UPRV_LENGTHOF(wUCA
));
2344 #if OPEN_BINARY_ACCEPTS_NULL_BASE
2345 genericOrderingTest(cloneNOUCA
, noUCA
, UPRV_LENGTHOF(noUCA
));
2348 if(image
!= imageBuffer
) {
2352 ucol_close(cloneNOUCA
);
2353 ucol_close(cloneWUCA
);
2357 static void TestDefault(void) {
2358 /* Tests for code coverage. */
2359 UErrorCode status
= U_ZERO_ERROR
;
2360 UCollator
*coll
= ucol_open("es@collation=pinyin", &status
);
2361 if (coll
== NULL
|| status
== U_FILE_ACCESS_ERROR
) {
2362 log_data_err("Unable to open collator es@collation=pinyin\n");
2365 if (status
!= U_USING_DEFAULT_WARNING
) {
2366 /* What do you mean that you know about using pinyin collation in Spanish!? This should be in the zh locale. */
2367 log_err("es@collation=pinyin should return U_USING_DEFAULT_WARNING, but returned %s\n", u_errorName(status
));
2370 if (ucol_getKeywordValues("funky", &status
) != NULL
) {
2371 log_err("Collators should not know about the funky keyword.\n");
2373 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2374 log_err("funky keyword didn't fail as expected %s\n", u_errorName(status
));
2376 if (ucol_getKeywordValues("collation", &status
) != NULL
) {
2377 log_err("ucol_getKeywordValues should not work when given a bad status.\n");
2381 static void TestDefaultKeyword(void) {
2382 /* Tests for code coverage. */
2383 UErrorCode status
= U_ZERO_ERROR
;
2384 const char *loc
= "zh_TW@collation=default";
2385 UCollator
*coll
= ucol_open(loc
, &status
);
2386 if(U_FAILURE(status
)) {
2387 log_info("Warning: ucol_open(%s, ...) returned %s, at least it didn't crash.\n", loc
, u_errorName(status
));
2388 } else if (status
!= U_USING_FALLBACK_WARNING
) {
2389 /* Hmm, skip the following test for CLDR 1.9 data and/or ICU 4.6, no longer seems to apply */
2391 log_err("ucol_open(%s, ...) should return an error or some sort of U_USING_FALLBACK_WARNING, but returned %s\n", loc
, u_errorName(status
));
2397 static UBool
uenum_contains(UEnumeration
*e
, const char *s
, UErrorCode
*status
) {
2399 uenum_reset(e
, status
);
2400 while(((t
= uenum_next(e
, NULL
, status
)) != NULL
) && U_SUCCESS(*status
)) {
2401 if(uprv_strcmp(s
, t
) == 0) {
2408 static void TestGetKeywordValuesForLocale(void) {
2409 #define MAX_NUMBER_OF_KEYWORDS 9
2410 const char *PREFERRED
[][MAX_NUMBER_OF_KEYWORDS
+1] = {
2411 { "und", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
},
2412 { "en_US", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
},
2413 { "en_029", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
},
2414 { "de_DE", "standard", "phonebook", "search", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2415 { "de_Latn_DE", "standard", "phonebook", "search", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2416 { "zh", "pinyin", "stroke", "eor", "search", "standard", NULL
},
2417 { "zh_Hans", "pinyin", "stroke", "eor", "search", "standard", NULL
},
2418 { "zh_CN", "pinyin", "stroke", "eor", "search", "standard", NULL
},
2419 { "zh_Hant", "stroke", "pinyin", "eor", "search", "standard", NULL
},
2420 { "zh_TW", "stroke", "pinyin", "eor", "search", "standard", NULL
},
2421 { "zh__PINYIN", "pinyin", "stroke", "eor", "search", "standard", NULL
},
2422 { "es_ES", "standard", "search", "traditional", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2423 { "es__TRADITIONAL","traditional", "search", "standard", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2424 { "und@collation=phonebook", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
},
2425 { "de_DE@collation=pinyin", "standard", "phonebook", "search", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2426 { "zzz@collation=xxx", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
}
2429 UErrorCode status
= U_ZERO_ERROR
;
2430 UEnumeration
*keywordValues
= NULL
;
2432 const char *locale
= NULL
, *value
= NULL
;
2433 UBool errorOccurred
= FALSE
;
2435 for (i
= 0; i
< UPRV_LENGTHOF(PREFERRED
) && !errorOccurred
; i
++) {
2436 locale
= PREFERRED
[i
][0];
2440 keywordValues
= ucol_getKeywordValuesForLocale("collation", locale
, TRUE
, &status
);
2441 if (keywordValues
== NULL
|| U_FAILURE(status
)) {
2442 log_err_status(status
, "Error getting keyword values: %s\n", u_errorName(status
));
2445 size
= uenum_count(keywordValues
, &status
);
2448 for (n
= 0; (value
= PREFERRED
[i
][n
+1]) != NULL
; n
++) {
2449 if (!uenum_contains(keywordValues
, value
, &status
)) {
2450 if (U_SUCCESS(status
)) {
2451 log_err("Keyword value \"%s\" missing for locale: %s\n", value
, locale
);
2453 log_err("While getting keyword value from locale: %s got this error: %s\n", locale
, u_errorName(status
));
2454 errorOccurred
= TRUE
;
2459 uenum_close(keywordValues
);
2460 keywordValues
= NULL
;
2462 uenum_close(keywordValues
);
2465 static void TestStrcollNull(void) {
2466 UErrorCode status
= U_ZERO_ERROR
;
2469 const UChar u16asc
[] = {0x0049, 0x0042, 0x004D, 0};
2470 const int32_t u16ascLen
= 3;
2472 const UChar u16han
[] = {0x5c71, 0x5ddd, 0};
2473 const int32_t u16hanLen
= 2;
2475 const char *u8asc
= "\x49\x42\x4D";
2476 const int32_t u8ascLen
= 3;
2478 const char *u8han
= "\xE5\xB1\xB1\xE5\xB7\x9D";
2479 const int32_t u8hanLen
= 6;
2481 coll
= ucol_open(NULL
, &status
);
2482 if (U_FAILURE(status
)) {
2483 log_err_status(status
, "Default Collator creation failed.: %s\n", myErrorName(status
));
2488 if (ucol_strcoll(coll
, NULL
, 0, NULL
, 0) != 0) {
2489 log_err("ERROR : ucol_strcoll NULL/0 and NULL/0");
2492 if (ucol_strcoll(coll
, NULL
, -1, NULL
, 0) != 0) {
2493 /* No error arg, should return equal without crash */
2494 log_err("ERROR : ucol_strcoll NULL/-1 and NULL/0");
2497 if (ucol_strcoll(coll
, u16asc
, -1, NULL
, 10) != 0) {
2498 /* No error arg, should return equal without crash */
2499 log_err("ERROR : ucol_strcoll u16asc/u16ascLen and NULL/10");
2502 if (ucol_strcoll(coll
, u16asc
, -1, NULL
, 0) <= 0) {
2503 log_err("ERROR : ucol_strcoll u16asc/-1 and NULL/0");
2505 if (ucol_strcoll(coll
, NULL
, 0, u16asc
, -1) >= 0) {
2506 log_err("ERROR : ucol_strcoll NULL/0 and u16asc/-1");
2508 if (ucol_strcoll(coll
, u16asc
, u16ascLen
, NULL
, 0) <= 0) {
2509 log_err("ERROR : ucol_strcoll u16asc/u16ascLen and NULL/0");
2512 if (ucol_strcoll(coll
, u16han
, -1, NULL
, 0) <= 0) {
2513 log_err("ERROR : ucol_strcoll u16han/-1 and NULL/0");
2515 if (ucol_strcoll(coll
, NULL
, 0, u16han
, -1) >= 0) {
2516 log_err("ERROR : ucol_strcoll NULL/0 and u16han/-1");
2518 if (ucol_strcoll(coll
, NULL
, 0, u16han
, u16hanLen
) >= 0) {
2519 log_err("ERROR : ucol_strcoll NULL/0 and u16han/u16hanLen");
2523 status
= U_ZERO_ERROR
;
2524 if (ucol_strcollUTF8(coll
, NULL
, 0, NULL
, 0, &status
) != 0 || U_FAILURE(status
)) {
2525 log_err("ERROR : ucol_strcollUTF8 NULL/0 and NULL/0");
2527 status
= U_ZERO_ERROR
;
2528 ucol_strcollUTF8(coll
, NULL
, -1, NULL
, 0, &status
);
2529 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2530 log_err("ERROR: ucol_strcollUTF8 NULL/-1 and NULL/0, should return U_ILLEGAL_ARGUMENT_ERROR");
2532 status
= U_ZERO_ERROR
;
2533 ucol_strcollUTF8(coll
, u8asc
, u8ascLen
, NULL
, 10, &status
);
2534 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2535 log_err("ERROR: ucol_strcollUTF8 u8asc/u8ascLen and NULL/10, should return U_ILLEGAL_ARGUMENT_ERROR");
2538 status
= U_ZERO_ERROR
;
2539 if (ucol_strcollUTF8(coll
, u8asc
, -1, NULL
, 0, &status
) <= 0 || U_FAILURE(status
)) {
2540 log_err("ERROR : ucol_strcollUTF8 u8asc/-1 and NULL/0");
2542 status
= U_ZERO_ERROR
;
2543 if (ucol_strcollUTF8(coll
, NULL
, 0, u8asc
, -1, &status
) >= 0 || U_FAILURE(status
)) {
2544 log_err("ERROR : ucol_strcollUTF8 NULL/0 and u8asc/-1");
2546 status
= U_ZERO_ERROR
;
2547 if (ucol_strcollUTF8(coll
, u8asc
, u8ascLen
, NULL
, 0, &status
) <= 0 || U_FAILURE(status
)) {
2548 log_err("ERROR : ucol_strcollUTF8 u8asc/u8ascLen and NULL/0");
2551 status
= U_ZERO_ERROR
;
2552 if (ucol_strcollUTF8(coll
, u8han
, -1, NULL
, 0, &status
) <= 0 || U_FAILURE(status
)) {
2553 log_err("ERROR : ucol_strcollUTF8 u8han/-1 and NULL/0");
2555 status
= U_ZERO_ERROR
;
2556 if (ucol_strcollUTF8(coll
, NULL
, 0, u8han
, -1, &status
) >= 0 || U_FAILURE(status
)) {
2557 log_err("ERROR : ucol_strcollUTF8 NULL/0 and u8han/-1");
2559 status
= U_ZERO_ERROR
;
2560 if (ucol_strcollUTF8(coll
, NULL
, 0, u8han
, u8hanLen
, &status
) >= 0 || U_FAILURE(status
)) {
2561 log_err("ERROR : ucol_strcollUTF8 NULL/0 and u8han/u8hanLen");
2567 #endif /* #if !UCONFIG_NO_COLLATION */