1 /********************************************************************
2 * Copyright (c) 1997-2016, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************/
5 /*****************************************************************************
9 * Modification History:
11 * Madhu Katragadda Ported for C API
12 * Brian Rower Added TestOpenVsOpenRules
13 ******************************************************************************
14 *//* C API TEST For COLLATOR */
16 #include "unicode/utypes.h"
18 #if !UCONFIG_NO_COLLATION
23 #include "unicode/uloc.h"
24 #include "unicode/ulocdata.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ures.h"
27 #include "unicode/ucoleitr.h"
36 static void TestAttribute(void);
37 static void TestDefault(void);
38 static void TestDefaultKeyword(void);
39 static void TestBengaliSortKey(void);
42 static char* U_EXPORT2
ucol_sortKeyToString(const UCollator
*coll
, const uint8_t *sortkey
, char *buffer
, uint32_t len
) {
43 uint32_t position
= 0;
46 if (position
+ 1 < len
)
47 position
+= sprintf(buffer
+ position
, "[");
48 while ((b
= *sortkey
++) != 0) {
49 if (b
== 1 && position
+ 5 < len
) {
50 position
+= sprintf(buffer
+ position
, "%02X . ", b
);
51 } else if (b
!= 1 && position
+ 3 < len
) {
52 position
+= sprintf(buffer
+ position
, "%02X ", b
);
55 if (position
+ 3 < len
)
56 position
+= sprintf(buffer
+ position
, "%02X]", b
);
60 void addCollAPITest(TestNode
** root
)
62 /* WEIVTODO: return tests here */
63 addTest(root
, &TestProperty
, "tscoll/capitst/TestProperty");
64 addTest(root
, &TestRuleBasedColl
, "tscoll/capitst/TestRuleBasedColl");
65 addTest(root
, &TestCompare
, "tscoll/capitst/TestCompare");
66 addTest(root
, &TestSortKey
, "tscoll/capitst/TestSortKey");
67 addTest(root
, &TestHashCode
, "tscoll/capitst/TestHashCode");
68 addTest(root
, &TestElemIter
, "tscoll/capitst/TestElemIter");
69 addTest(root
, &TestGetAll
, "tscoll/capitst/TestGetAll");
70 /*addTest(root, &TestGetDefaultRules, "tscoll/capitst/TestGetDefaultRules");*/
71 addTest(root
, &TestDecomposition
, "tscoll/capitst/TestDecomposition");
72 addTest(root
, &TestSafeClone
, "tscoll/capitst/TestSafeClone");
73 addTest(root
, &TestCloneBinary
, "tscoll/capitst/TestCloneBinary");
74 addTest(root
, &TestGetSetAttr
, "tscoll/capitst/TestGetSetAttr");
75 addTest(root
, &TestBounds
, "tscoll/capitst/TestBounds");
76 addTest(root
, &TestGetLocale
, "tscoll/capitst/TestGetLocale");
77 addTest(root
, &TestSortKeyBufferOverrun
, "tscoll/capitst/TestSortKeyBufferOverrun");
78 addTest(root
, &TestAttribute
, "tscoll/capitst/TestAttribute");
79 addTest(root
, &TestGetTailoredSet
, "tscoll/capitst/TestGetTailoredSet");
80 addTest(root
, &TestMergeSortKeys
, "tscoll/capitst/TestMergeSortKeys");
81 addTest(root
, &TestShortString
, "tscoll/capitst/TestShortString");
82 addTest(root
, &TestGetContractionsAndUnsafes
, "tscoll/capitst/TestGetContractionsAndUnsafes");
83 addTest(root
, &TestOpenBinary
, "tscoll/capitst/TestOpenBinary");
84 addTest(root
, &TestDefault
, "tscoll/capitst/TestDefault");
85 addTest(root
, &TestDefaultKeyword
, "tscoll/capitst/TestDefaultKeyword");
86 addTest(root
, &TestOpenVsOpenRules
, "tscoll/capitst/TestOpenVsOpenRules");
87 addTest(root
, &TestBengaliSortKey
, "tscoll/capitst/TestBengaliSortKey");
88 addTest(root
, &TestGetKeywordValuesForLocale
, "tscoll/capitst/TestGetKeywordValuesForLocale");
89 addTest(root
, &TestStrcollNull
, "tscoll/capitst/TestStrcollNull");
92 void TestGetSetAttr(void) {
93 UErrorCode status
= U_ZERO_ERROR
;
94 UCollator
*coll
= ucol_open(NULL
, &status
);
97 UColAttributeValue val
[5];
99 UColAttributeValue nonValue
;
101 {UCOL_FRENCH_COLLATION
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},
102 {UCOL_ALTERNATE_HANDLING
, {UCOL_NON_IGNORABLE
, UCOL_SHIFTED
}, 2, UCOL_OFF
},/* attribute for handling variable elements*/
103 {UCOL_CASE_FIRST
, {UCOL_OFF
, UCOL_LOWER_FIRST
, UCOL_UPPER_FIRST
}, 3, UCOL_SHIFTED
},/* who goes first, lower case or uppercase */
104 {UCOL_CASE_LEVEL
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},/* do we have an extra case level */
105 {UCOL_NORMALIZATION_MODE
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},/* attribute for normalization */
106 {UCOL_DECOMPOSITION_MODE
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},
107 {UCOL_STRENGTH
, {UCOL_PRIMARY
, UCOL_SECONDARY
, UCOL_TERTIARY
, UCOL_QUATERNARY
, UCOL_IDENTICAL
}, 5, UCOL_SHIFTED
},/* attribute for strength */
108 {UCOL_HIRAGANA_QUATERNARY_MODE
, {UCOL_ON
, UCOL_OFF
}, 2, UCOL_SHIFTED
},/* when turned on, this attribute */
110 UColAttribute currAttr
;
111 UColAttributeValue value
;
112 uint32_t i
= 0, j
= 0;
115 log_err_status(status
, "Unable to open collator. %s\n", u_errorName(status
));
118 for(i
= 0; i
<UPRV_LENGTHOF(attrs
); i
++) {
119 currAttr
= attrs
[i
].att
;
120 ucol_setAttribute(coll
, currAttr
, UCOL_DEFAULT
, &status
);
121 if(U_FAILURE(status
)) {
122 log_err_status(status
, "ucol_setAttribute with the default value returned error: %s\n", u_errorName(status
));
125 value
= ucol_getAttribute(coll
, currAttr
, &status
);
126 if(U_FAILURE(status
)) {
127 log_err("ucol_getAttribute returned error: %s\n", u_errorName(status
));
130 for(j
= 0; j
<attrs
[i
].valueSize
; j
++) {
131 ucol_setAttribute(coll
, currAttr
, attrs
[i
].val
[j
], &status
);
132 if(U_FAILURE(status
)) {
133 log_err("ucol_setAttribute with the value %i returned error: %s\n", attrs
[i
].val
[j
], u_errorName(status
));
137 status
= U_ZERO_ERROR
;
138 ucol_setAttribute(coll
, currAttr
, attrs
[i
].nonValue
, &status
);
139 if(U_SUCCESS(status
)) {
140 log_err("ucol_setAttribute with the bad value didn't return an error\n");
143 status
= U_ZERO_ERROR
;
145 ucol_setAttribute(coll
, currAttr
, value
, &status
);
146 if(U_FAILURE(status
)) {
147 log_err("ucol_setAttribute with the default valuereturned error: %s\n", u_errorName(status
));
151 status
= U_ZERO_ERROR
;
152 value
= ucol_getAttribute(coll
, UCOL_ATTRIBUTE_COUNT
, &status
);
153 if(U_SUCCESS(status
)) {
154 log_err("ucol_getAttribute for UCOL_ATTRIBUTE_COUNT didn't return an error\n");
156 status
= U_ZERO_ERROR
;
157 ucol_setAttribute(coll
, UCOL_ATTRIBUTE_COUNT
, UCOL_DEFAULT
, &status
);
158 if(U_SUCCESS(status
)) {
159 log_err("ucol_setAttribute for UCOL_ATTRIBUTE_COUNT didn't return an error\n");
161 status
= U_ZERO_ERROR
;
166 static void doAssert(int condition
, const char *message
)
169 log_err("ERROR : %s\n", message
);
173 #define UTF8_BUF_SIZE 128
175 static void doStrcoll(const UCollator
* coll
, const UChar
* src
, int32_t srcLen
, const UChar
* tgt
, int32_t tgtLen
,
176 UCollationResult expected
, const char *message
) {
177 UErrorCode err
= U_ZERO_ERROR
;
178 char srcU8
[UTF8_BUF_SIZE
], tgtU8
[UTF8_BUF_SIZE
];
179 int32_t srcU8Len
= -1, tgtU8Len
= -1;
182 if (ucol_strcoll(coll
, src
, srcLen
, tgt
, tgtLen
) != expected
) {
183 log_err("ERROR : %s\n", message
);
186 u_strToUTF8(srcU8
, UTF8_BUF_SIZE
, &len
, src
, srcLen
, &err
);
187 if (U_FAILURE(err
) || len
>= UTF8_BUF_SIZE
) {
188 log_err("ERROR : UTF-8 conversion error\n");
194 u_strToUTF8(tgtU8
, UTF8_BUF_SIZE
, &len
, tgt
, tgtLen
, &err
);
195 if (U_FAILURE(err
) || len
>= UTF8_BUF_SIZE
) {
196 log_err("ERROR : UTF-8 conversion error\n");
203 if (ucol_strcollUTF8(coll
, srcU8
, srcU8Len
, tgtU8
, tgtU8Len
, &err
) != expected
205 log_err("ERROR: %s (strcollUTF8)\n", message
);
210 /* We don't have default rules, at least not in the previous sense */
211 void TestGetDefaultRules(){
213 UErrorCode status
=U_ZERO_ERROR
;
214 UCollator
*coll
=NULL
;
215 int32_t len1
= 0, len2
=0;
216 uint8_t *binColData
= NULL
;
218 UResourceBundle
*res
= NULL
;
219 UResourceBundle
*binColl
= NULL
;
220 uint8_t *binResult
= NULL
;
223 const UChar
* defaultRulesArray
=ucol_getDefaultRulesArray(&size
);
224 log_verbose("Test the function ucol_getDefaultRulesArray()\n");
226 coll
= ucol_openRules(defaultRulesArray
, size
, UCOL_ON
, UCOL_PRIMARY
, &status
);
227 if(U_SUCCESS(status
) && coll
!=NULL
) {
228 binColData
= (uint8_t*)ucol_cloneRuleData(coll
, &len1
, &status
);
234 res
=ures_open(NULL
, "root", &status
);
235 if(U_FAILURE(status
)){
236 log_err("ERROR: Failed to get resource for \"root Locale\" with %s", myErrorName(status
));
239 binColl
=ures_getByKey(res
, "%%Collation", binColl
, &status
);
240 if(U_SUCCESS(status
)){
241 binResult
=(uint8_t*)ures_getBinary(binColl
, &len2
, &status
);
242 if(U_FAILURE(status
)){
243 log_err("ERROR: ures_getBinary() failed\n");
246 log_err("ERROR: ures_getByKey(locale(default), %%Collation) failed");
251 log_err("Error: ucol_getDefaultRulesArray() failed to return the correct length.\n");
253 if(memcmp(binColData
, binResult
, len1
) != 0){
254 log_err("Error: ucol_getDefaultRulesArray() failed\n");
265 /* Collator Properties
266 ucol_open, ucol_strcoll, getStrength/setStrength
267 getDecomposition/setDecomposition, getDisplayName*/
270 UCollator
*col
, *ruled
;
274 UChar source
[12], target
[12];
276 UErrorCode status
= U_ZERO_ERROR
;
278 * Expected version of the English collator.
279 * Currently, the major/minor version numbers change when the builder code
281 * number 2 is from the tailoring data version and
282 * number 3 is the UCA version.
283 * This changes with every UCA version change, and the expected value
284 * needs to be adjusted.
285 * Same in intltest/apicoll.cpp.
287 UVersionInfo currVersionArray
= {0x31, 0xC0, 0x05, 0x2A}; /* from ICU 4.4/UCA 5.2 */
288 UVersionInfo versionArray
= {0, 0, 0, 0};
289 UVersionInfo versionUCAArray
= {0, 0, 0, 0};
290 UVersionInfo versionUCDArray
= {0, 0, 0, 0};
292 log_verbose("The property tests begin : \n");
293 log_verbose("Test ucol_strcoll : \n");
294 col
= ucol_open("en_US", &status
);
295 if (U_FAILURE(status
)) {
296 log_err_status(status
, "Default Collator creation failed.: %s\n", myErrorName(status
));
300 ucol_getVersion(col
, versionArray
);
301 /* Check for a version greater than some value rather than equality
302 * so that we need not update the expected version each time. */
303 if (uprv_memcmp(versionArray
, currVersionArray
, 4)<0) {
304 log_err("Testing ucol_getVersion() - unexpected result: %02x.%02x.%02x.%02x\n",
305 versionArray
[0], versionArray
[1], versionArray
[2], versionArray
[3]);
307 log_verbose("ucol_getVersion() result: %02x.%02x.%02x.%02x\n",
308 versionArray
[0], versionArray
[1], versionArray
[2], versionArray
[3]);
311 /* Assume that the UCD and UCA versions are the same,
312 * rather than hardcoding (and updating each time) a particular UCA version. */
313 u_getUnicodeVersion(versionUCDArray
);
314 ucol_getUCAVersion(col
, versionUCAArray
);
315 if (0!=uprv_memcmp(versionUCAArray
, versionUCDArray
, 4)) {
316 log_err("Testing ucol_getUCAVersion() - unexpected result: %hu.%hu.%hu.%hu\n",
317 versionUCAArray
[0], versionUCAArray
[1], versionUCAArray
[2], versionUCAArray
[3]);
320 u_uastrcpy(source
, "ab");
321 u_uastrcpy(target
, "abc");
323 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_LESS
, "ab < abc comparison failed");
325 u_uastrcpy(source
, "ab");
326 u_uastrcpy(target
, "AB");
328 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_LESS
, "ab < AB comparison failed");
330 u_uastrcpy(source
, "blackbird");
331 u_uastrcpy(target
, "black-bird");
333 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_GREATER
, "black-bird > blackbird comparison failed");
335 u_uastrcpy(source
, "black bird");
336 u_uastrcpy(target
, "black-bird");
338 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_LESS
, "black bird < black-bird comparison failed");
340 u_uastrcpy(source
, "Hello");
341 u_uastrcpy(target
, "hello");
343 doStrcoll(col
, source
, u_strlen(source
), target
, u_strlen(target
), UCOL_GREATER
, "Hello > hello comparison failed");
345 log_verbose("Test ucol_strcoll ends.\n");
347 log_verbose("testing ucol_getStrength() method ...\n");
348 doAssert( (ucol_getStrength(col
) == UCOL_TERTIARY
), "collation object has the wrong strength");
349 doAssert( (ucol_getStrength(col
) != UCOL_PRIMARY
), "collation object's strength is primary difference");
351 log_verbose("testing ucol_setStrength() method ...\n");
352 ucol_setStrength(col
, UCOL_SECONDARY
);
353 doAssert( (ucol_getStrength(col
) != UCOL_TERTIARY
), "collation object's strength is secondary difference");
354 doAssert( (ucol_getStrength(col
) != UCOL_PRIMARY
), "collation object's strength is primary difference");
355 doAssert( (ucol_getStrength(col
) == UCOL_SECONDARY
), "collation object has the wrong strength");
358 log_verbose("Get display name for the default collation in German : \n");
360 len
=ucol_getDisplayName("en_US", "de_DE", NULL
, 0, &status
);
361 if(status
==U_BUFFER_OVERFLOW_ERROR
){
363 disName
=(UChar
*)malloc(sizeof(UChar
) * (len
+1));
364 ucol_getDisplayName("en_US", "de_DE", disName
, len
+1, &status
);
365 log_verbose("the display name for default collation in german: %s\n", austrdup(disName
) );
368 if(U_FAILURE(status
)){
369 log_err("ERROR: in getDisplayName: %s\n", myErrorName(status
));
372 log_verbose("Default collation getDisplayName ended.\n");
374 ruled
= ucol_open("da_DK", &status
);
375 if(U_FAILURE(status
)) {
376 log_data_err("ucol_open(\"da_DK\") failed - %s\n", u_errorName(status
));
380 log_verbose("ucol_getRules() testing ...\n");
381 rules
= ucol_getRules(ruled
, &tempLength
);
382 if(tempLength
== 0) {
383 log_data_err("missing da_DK tailoring rule string\n");
385 UChar aa
[2] = { 0x61, 0x61 };
386 doAssert(u_strFindFirst(rules
, tempLength
, aa
, 2) != NULL
,
387 "da_DK rules do not contain 'aa'");
389 log_verbose("getRules tests end.\n");
391 UChar
*buffer
= (UChar
*)malloc(200000*sizeof(UChar
));
392 int32_t bufLen
= 200000;
394 log_verbose("ucol_getRulesEx() testing ...\n");
395 tempLength
= ucol_getRulesEx(col
,UCOL_TAILORING_ONLY
,buffer
,bufLen
);
396 doAssert( tempLength
== 0x00, "getRulesEx() result incorrect" );
397 log_verbose("getRules tests end.\n");
399 log_verbose("ucol_getRulesEx() testing ...\n");
400 tempLength
=ucol_getRulesEx(col
,UCOL_FULL_RULES
,buffer
,bufLen
);
401 if(tempLength
== 0) {
402 log_data_err("missing *full* rule string\n");
404 log_verbose("getRulesEx tests end.\n");
410 log_verbose("open an collator for french locale");
411 col
= ucol_open("fr_FR", &status
);
412 if (U_FAILURE(status
)) {
413 log_err("ERROR: Creating French collation failed.: %s\n", myErrorName(status
));
416 ucol_setStrength(col
, UCOL_PRIMARY
);
417 log_verbose("testing ucol_getStrength() method again ...\n");
418 doAssert( (ucol_getStrength(col
) != UCOL_TERTIARY
), "collation object has the wrong strength");
419 doAssert( (ucol_getStrength(col
) == UCOL_PRIMARY
), "collation object's strength is not primary difference");
421 log_verbose("testing French ucol_setStrength() method ...\n");
422 ucol_setStrength(col
, UCOL_TERTIARY
);
423 doAssert( (ucol_getStrength(col
) == UCOL_TERTIARY
), "collation object's strength is not tertiary difference");
424 doAssert( (ucol_getStrength(col
) != UCOL_PRIMARY
), "collation object's strength is primary difference");
425 doAssert( (ucol_getStrength(col
) != UCOL_SECONDARY
), "collation object's strength is secondary difference");
428 log_verbose("Get display name for the french collation in english : \n");
429 len
=ucol_getDisplayName("fr_FR", "en_US", NULL
, 0, &status
);
430 if(status
==U_BUFFER_OVERFLOW_ERROR
){
432 disName
=(UChar
*)malloc(sizeof(UChar
) * (len
+1));
433 ucol_getDisplayName("fr_FR", "en_US", disName
, len
+1, &status
);
434 log_verbose("the display name for french collation in english: %s\n", austrdup(disName
) );
437 if(U_FAILURE(status
)){
438 log_err("ERROR: in getDisplayName: %s\n", myErrorName(status
));
441 log_verbose("Default collation getDisplayName ended.\n");
445 /* Test RuleBasedCollator and getRules*/
446 void TestRuleBasedColl()
448 UCollator
*col1
, *col2
, *col3
, *col4
;
449 UCollationElements
*iter1
, *iter2
;
453 const UChar
*rule1
, *rule2
, *rule3
, *rule4
;
455 UErrorCode status
= U_ZERO_ERROR
;
456 u_uastrcpy(ruleset1
, "&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
457 u_uastrcpy(ruleset2
, "&9 < a, A < b, B < c, C < d, D, e, E");
460 col1
= ucol_openRules(ruleset1
, u_strlen(ruleset1
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
461 if (U_FAILURE(status
)) {
462 log_err_status(status
, "RuleBased Collator creation failed.: %s\n", myErrorName(status
));
466 log_verbose("PASS: RuleBased Collator creation passed\n");
468 status
= U_ZERO_ERROR
;
469 col2
= ucol_openRules(ruleset2
, u_strlen(ruleset2
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
470 if (U_FAILURE(status
)) {
471 log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status
));
475 log_verbose("PASS: RuleBased Collator creation passed\n");
478 status
= U_ZERO_ERROR
;
479 col3
= ucol_open(NULL
, &status
);
480 if (U_FAILURE(status
)) {
481 log_err("Default Collator creation failed.: %s\n", myErrorName(status
));
485 log_verbose("PASS: Default Collator creation passed\n");
487 rule1
= ucol_getRules(col1
, &tempLength
);
488 rule2
= ucol_getRules(col2
, &tempLength
);
489 rule3
= ucol_getRules(col3
, &tempLength
);
491 doAssert((u_strcmp(rule1
, rule2
) != 0), "Default collator getRules failed");
492 doAssert((u_strcmp(rule2
, rule3
) != 0), "Default collator getRules failed");
493 doAssert((u_strcmp(rule1
, rule3
) != 0), "Default collator getRules failed");
495 col4
=ucol_openRules(rule2
, u_strlen(rule2
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
496 if (U_FAILURE(status
)) {
497 log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status
));
500 rule4
= ucol_getRules(col4
, &tempLength
);
501 doAssert((u_strcmp(rule2
, rule4
) == 0), "Default collator getRules failed");
508 /* tests that modifier ! is always ignored */
509 u_uastrcpy(ruleset1
, "!&a<b");
513 col1
= ucol_openRules(ruleset1
, u_strlen(ruleset1
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
514 if (U_FAILURE(status
)) {
515 log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status
));
518 col2
= ucol_open("en_US", &status
);
519 if (U_FAILURE(status
)) {
520 log_err("en_US Collator creation failed.: %s\n", myErrorName(status
));
523 iter1
= ucol_openElements(col1
, teststr
, 3, &status
);
524 iter2
= ucol_openElements(col2
, teststr
, 3, &status
);
525 if(U_FAILURE(status
)) {
526 log_err("ERROR: CollationElement iterator creation failed.: %s\n", myErrorName(status
));
530 /* testing with en since thai has its own tailoring */
531 uint32_t ce
= ucol_next(iter1
, &status
);
532 uint32_t ce2
= ucol_next(iter2
, &status
);
533 if(U_FAILURE(status
)) {
534 log_err("ERROR: CollationElement iterator creation failed.: %s\n", myErrorName(status
));
538 log_err("! modifier test failed");
540 if (ce
== UCOL_NULLORDER
) {
544 ucol_closeElements(iter1
);
545 ucol_closeElements(iter2
);
548 /* CLDR 24+ requires a reset before the first relation */
549 u_uastrcpy(ruleset1
, "< z < a");
550 col1
= ucol_openRules(ruleset1
, u_strlen(ruleset1
), UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
551 if (status
!= U_PARSE_ERROR
&& status
!= U_INVALID_FORMAT_ERROR
) {
552 log_err("ucol_openRules(without initial reset: '< z < a') "
553 "should fail with U_PARSE_ERROR or U_INVALID_FORMAT_ERROR but yielded %s\n",
554 myErrorName(status
));
561 UErrorCode status
= U_ZERO_ERROR
;
566 log_verbose("The compare tests begin : \n");
568 col
= ucol_open("en_US", &status
);
569 if(U_FAILURE(status
)) {
570 log_err_status(status
, "ucal_open() collation creation failed.: %s\n", myErrorName(status
));
573 test1
=(UChar
*)malloc(sizeof(UChar
) * 6);
574 test2
=(UChar
*)malloc(sizeof(UChar
) * 6);
575 u_uastrcpy(test1
, "Abcda");
576 u_uastrcpy(test2
, "abcda");
578 log_verbose("Use tertiary comparison level testing ....\n");
580 doAssert( (!ucol_equal(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" != \"abcda\" ");
581 doAssert( (ucol_greater(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" >>> \"abcda\" ");
582 doAssert( (ucol_greaterOrEqual(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" >>> \"abcda\"");
584 ucol_setStrength(col
, UCOL_SECONDARY
);
585 log_verbose("Use secondary comparison level testing ....\n");
587 doAssert( (ucol_equal(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
) )), "Result should be \"Abcda\" == \"abcda\"");
588 doAssert( (!ucol_greater(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" == \"abcda\"");
589 doAssert( (ucol_greaterOrEqual(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
) )), "Result should be \"Abcda\" == \"abcda\"");
591 ucol_setStrength(col
, UCOL_PRIMARY
);
592 log_verbose("Use primary comparison level testing ....\n");
594 doAssert( (ucol_equal(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" == \"abcda\"");
595 doAssert( (!ucol_greater(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" == \"abcda\"");
596 doAssert( (ucol_greaterOrEqual(col
, test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"Abcda\" == \"abcda\"");
599 log_verbose("The compare tests end.\n");
606 ---------------------------------------------
607 tests decomposition setting
609 void TestDecomposition() {
610 UErrorCode status
= U_ZERO_ERROR
;
611 UCollator
*en_US
, *el_GR
, *vi_VN
;
612 en_US
= ucol_open("en_US", &status
);
613 el_GR
= ucol_open("el_GR", &status
);
614 vi_VN
= ucol_open("vi_VN", &status
);
616 if (U_FAILURE(status
)) {
617 log_err_status(status
, "ERROR: collation creation failed.: %s\n", myErrorName(status
));
621 if (ucol_getAttribute(vi_VN
, UCOL_NORMALIZATION_MODE
, &status
) != UCOL_ON
||
624 log_err("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
627 status
= U_ZERO_ERROR
;
628 if (ucol_getAttribute(el_GR
, UCOL_NORMALIZATION_MODE
, &status
) != UCOL_ON
||
631 log_err("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
634 status
= U_ZERO_ERROR
;
635 if (ucol_getAttribute(en_US
, UCOL_NORMALIZATION_MODE
, &status
) != UCOL_OFF
||
638 log_err("ERROR: en_US collation had canonical decomposition for normalization!\n");
646 #define CLONETEST_COLLATOR_COUNT 4
648 void TestSafeClone() {
651 static const UChar umlautUStr
[] = {0x00DC, 0};
652 static const UChar oeStr
[] = {0x0055, 0x0045, 0};
653 UCollator
* someCollators
[CLONETEST_COLLATOR_COUNT
];
654 UCollator
* someClonedCollators
[CLONETEST_COLLATOR_COUNT
];
656 UErrorCode err
= U_ZERO_ERROR
;
657 int8_t idx
= 6; /* Leave this here to test buffer alingment in memory*/
658 uint8_t buffer
[CLONETEST_COLLATOR_COUNT
] [U_COL_SAFECLONE_BUFFERSIZE
];
659 int32_t bufferSize
= U_COL_SAFECLONE_BUFFERSIZE
;
660 const char sampleRuleChars
[] = "&Z < CH";
661 UChar sampleRule
[sizeof(sampleRuleChars
)];
663 u_uastrcpy(test1
, "abCda");
664 u_uastrcpy(test2
, "abcda");
665 u_uastrcpy(sampleRule
, sampleRuleChars
);
667 /* one default collator & two complex ones */
668 someCollators
[0] = ucol_open("en_US", &err
);
669 someCollators
[1] = ucol_open("ko", &err
);
670 someCollators
[2] = ucol_open("ja_JP", &err
);
671 someCollators
[3] = ucol_openRules(sampleRule
, -1, UCOL_ON
, UCOL_TERTIARY
, NULL
, &err
);
673 for (idx
= 0; idx
< CLONETEST_COLLATOR_COUNT
; idx
++) {
674 ucol_close(someCollators
[idx
]);
676 log_data_err("Couldn't open one or more collators\n");
680 /* Check the various error & informational states: */
682 /* Null status - just returns NULL */
683 if (NULL
!= ucol_safeClone(someCollators
[0], buffer
[0], &bufferSize
, NULL
))
685 log_err("FAIL: Cloned Collator failed to deal correctly with null status\n");
687 /* error status - should return 0 & keep error the same */
688 err
= U_MEMORY_ALLOCATION_ERROR
;
689 if (NULL
!= ucol_safeClone(someCollators
[0], buffer
[0], &bufferSize
, &err
) || err
!= U_MEMORY_ALLOCATION_ERROR
)
691 log_err("FAIL: Cloned Collator failed to deal correctly with incoming error status\n");
695 /* Null buffer size pointer is ok */
696 if (NULL
== (col
= ucol_safeClone(someCollators
[0], buffer
[0], NULL
, &err
)) || U_FAILURE(err
))
698 log_err("FAIL: Cloned Collator failed to deal correctly with null bufferSize pointer\n");
703 /* buffer size pointer is 0 - fill in pbufferSize with a size */
705 if (NULL
!= ucol_safeClone(someCollators
[0], buffer
[0], &bufferSize
, &err
) ||
706 U_FAILURE(err
) || bufferSize
<= 0)
708 log_err("FAIL: Cloned Collator failed a sizing request ('preflighting')\n");
710 /* Verify our define is large enough */
711 if (U_COL_SAFECLONE_BUFFERSIZE
< bufferSize
)
713 log_err("FAIL: Pre-calculated buffer size is too small\n");
715 /* Verify we can use this run-time calculated size */
716 if (NULL
== (col
= ucol_safeClone(someCollators
[0], buffer
[0], &bufferSize
, &err
)) || U_FAILURE(err
))
718 log_err("FAIL: Collator can't be cloned with run-time size\n");
720 if (col
) ucol_close(col
);
721 /* size one byte too small - should allocate & let us know */
722 if (bufferSize
> 1) {
725 if (NULL
== (col
= ucol_safeClone(someCollators
[0], 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
727 log_err("FAIL: Cloned Collator failed to deal correctly with too-small buffer size\n");
729 if (col
) ucol_close(col
);
731 bufferSize
= U_COL_SAFECLONE_BUFFERSIZE
;
734 /* Null buffer pointer - return Collator & set error to U_SAFECLONE_ALLOCATED_ERROR */
735 if (NULL
== (col
= ucol_safeClone(someCollators
[0], 0, &bufferSize
, &err
)) || err
!= U_SAFECLONE_ALLOCATED_WARNING
)
737 log_err("FAIL: Cloned Collator failed to deal correctly with null buffer pointer\n");
739 if (col
) ucol_close(col
);
742 /* Null Collator - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
743 if (NULL
!= ucol_safeClone(NULL
, buffer
[0], &bufferSize
, &err
) || err
!= U_ILLEGAL_ARGUMENT_ERROR
)
745 log_err("FAIL: Cloned Collator failed to deal correctly with null Collator pointer\n");
750 /* Test that a cloned collator doesn't accidentally use UCA. */
751 col
=ucol_open("de@collation=phonebook", &err
);
752 bufferSize
= U_COL_SAFECLONE_BUFFERSIZE
;
753 someClonedCollators
[0] = ucol_safeClone(col
, buffer
[0], &bufferSize
, &err
);
754 doAssert( (ucol_greater(col
, umlautUStr
, u_strlen(umlautUStr
), oeStr
, u_strlen(oeStr
))), "Original German phonebook collation sorts differently than expected");
755 doAssert( (ucol_greater(someClonedCollators
[0], umlautUStr
, u_strlen(umlautUStr
), oeStr
, u_strlen(oeStr
))), "Cloned German phonebook collation sorts differently than expected");
756 if (!ucol_equals(someClonedCollators
[0], col
)) {
757 log_err("FAIL: Cloned German phonebook collator is not equal to original.\n");
760 ucol_close(someClonedCollators
[0]);
764 /* change orig & clone & make sure they are independent */
766 for (idx
= 0; idx
< CLONETEST_COLLATOR_COUNT
; idx
++)
768 ucol_setStrength(someCollators
[idx
], UCOL_IDENTICAL
);
771 ucol_close(ucol_safeClone(someCollators
[idx
], buffer
[idx
], &bufferSize
, &err
));
772 if (err
!= U_SAFECLONE_ALLOCATED_WARNING
) {
773 log_err("FAIL: collator number %d was not allocated.\n", idx
);
774 log_err("FAIL: status of Collator[%d] is %d (hex: %x).\n", idx
, err
, err
);
777 bufferSize
= U_COL_SAFECLONE_BUFFERSIZE
;
779 someClonedCollators
[idx
] = ucol_safeClone(someCollators
[idx
], buffer
[idx
], &bufferSize
, &err
);
780 if (U_FAILURE(err
)) {
781 log_err("FAIL: Unable to clone collator %d - %s\n", idx
, u_errorName(err
));
784 if (!ucol_equals(someClonedCollators
[idx
], someCollators
[idx
])) {
785 log_err("FAIL: Cloned collator is not equal to original at index = %d.\n", idx
);
788 /* Check the usability */
789 ucol_setStrength(someCollators
[idx
], UCOL_PRIMARY
);
790 ucol_setAttribute(someCollators
[idx
], UCOL_CASE_LEVEL
, UCOL_OFF
, &err
);
792 doAssert( (ucol_equal(someCollators
[idx
], test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"abcda\" == \"abCda\"");
794 /* Close the original to make sure that the clone is usable. */
795 ucol_close(someCollators
[idx
]);
797 ucol_setStrength(someClonedCollators
[idx
], UCOL_TERTIARY
);
798 ucol_setAttribute(someClonedCollators
[idx
], UCOL_CASE_LEVEL
, UCOL_OFF
, &err
);
799 doAssert( (ucol_greater(someClonedCollators
[idx
], test1
, u_strlen(test1
), test2
, u_strlen(test2
))), "Result should be \"abCda\" >>> \"abcda\" ");
801 ucol_close(someClonedCollators
[idx
]);
805 void TestCloneBinary(){
806 UErrorCode err
= U_ZERO_ERROR
;
807 UCollator
* col
= ucol_open("en_US", &err
);
812 if (U_FAILURE(err
)) {
813 log_data_err("Couldn't open collator. Error: %s\n", u_errorName(err
));
817 size
= ucol_cloneBinary(col
, NULL
, 0, &err
);
818 if(size
==0 || err
!=U_BUFFER_OVERFLOW_ERROR
) {
819 log_err("ucol_cloneBinary - couldn't check size. Error: %s\n", u_errorName(err
));
824 buffer
= (uint8_t *) malloc(size
);
825 ucol_cloneBinary(col
, buffer
, size
, &err
);
827 log_err("ucol_cloneBinary - couldn't clone.. Error: %s\n", u_errorName(err
));
832 /* how to check binary result ? */
834 c
= ucol_openBinary(buffer
, size
, col
, &err
);
836 log_err("ucol_openBinary failed. Error: %s\n", u_errorName(err
));
838 UChar t
[] = {0x41, 0x42, 0x43, 0}; /* ABC */
841 l1
= ucol_getSortKey(col
, t
, -1, NULL
,0);
842 l2
= ucol_getSortKey(c
, t
, -1, NULL
,0);
843 k1
= (uint8_t *) malloc(sizeof(uint8_t) * l1
);
844 k2
= (uint8_t *) malloc(sizeof(uint8_t) * l2
);
845 ucol_getSortKey(col
, t
, -1, k1
, l1
);
846 ucol_getSortKey(col
, t
, -1, k2
, l2
);
847 if (strcmp((char *)k1
,(char *)k2
) != 0){
848 log_err("ucol_openBinary - new collator should equal to old one\n");
859 static void TestBengaliSortKey(void)
861 const char *curLoc
= "bn";
862 UChar str1
[] = { 0x09BE, 0 };
863 UChar str2
[] = { 0x0B70, 0 };
864 UCollator
*c2
= NULL
;
866 int32_t rulesLength
=-1;
868 int32_t sortKeyLen1
= 0;
870 int32_t sortKeyLen2
= 0;
871 UErrorCode status
= U_ZERO_ERROR
;
872 char sortKeyStr1
[2048];
873 uint32_t sortKeyStrLen1
= UPRV_LENGTHOF(sortKeyStr1
);
874 char sortKeyStr2
[2048];
875 uint32_t sortKeyStrLen2
= UPRV_LENGTHOF(sortKeyStr2
);
876 UCollationResult result
;
878 static UChar preRules
[41] = { 0x26, 0x9fa, 0x3c, 0x98c, 0x3c, 0x9e1, 0x3c, 0x98f, 0x3c, 0x990, 0x3c, 0x993, 0x3c, 0x994, 0x3c, 0x9bc, 0x3c, 0x982, 0x3c, 0x983, 0x3c, 0x981, 0x3c, 0x9b0, 0x3c, 0x9b8, 0x3c, 0x9b9, 0x3c, 0x9bd, 0x3c, 0x9be, 0x3c, 0x9bf, 0x3c, 0x9c8, 0x3c, 0x9cb, 0x3d, 0x9cb , 0};
882 log_verbose("Rules: %s\n", aescstrdup(rules
, rulesLength
));
884 c2
= ucol_openRules(rules
, rulesLength
, UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
885 if (U_FAILURE(status
)) {
886 log_data_err("ERROR: Creating collator from rules failed with locale: %s : %s\n", curLoc
, myErrorName(status
));
890 sortKeyLen1
= ucol_getSortKey(c2
, str1
, -1, NULL
, 0);
891 sortKey1
= (uint8_t*)malloc(sortKeyLen1
+1);
892 ucol_getSortKey(c2
,str1
,-1,sortKey1
, sortKeyLen1
+1);
893 ucol_sortKeyToString(c2
, sortKey1
, sortKeyStr1
, sortKeyStrLen1
);
896 sortKeyLen2
= ucol_getSortKey(c2
, str2
, -1, NULL
, 0);
897 sortKey2
= (uint8_t*)malloc(sortKeyLen2
+1);
898 ucol_getSortKey(c2
,str2
,-1,sortKey2
, sortKeyLen2
+1);
900 ucol_sortKeyToString(c2
, sortKey2
, sortKeyStr2
, sortKeyStrLen2
);
904 result
=ucol_strcoll(c2
, str1
, -1, str2
, -1);
905 if(result
!=UCOL_LESS
) {
906 log_err("Error: %s was not less than %s: result=%d.\n", aescstrdup(str1
,-1), aescstrdup(str2
,-1), result
);
907 log_info("[%s] -> %s (%d, from rule)\n", aescstrdup(str1
,-1), sortKeyStr1
, sortKeyLen1
);
908 log_info("[%s] -> %s (%d, from rule)\n", aescstrdup(str2
,-1), sortKeyStr2
, sortKeyLen2
);
910 log_verbose("OK: %s was less than %s: result=%d.\n", aescstrdup(str1
,-1), aescstrdup(str2
,-1), result
);
911 log_verbose("[%s] -> %s (%d, from rule)\n", aescstrdup(str1
,-1), sortKeyStr1
, sortKeyLen1
);
912 log_verbose("[%s] -> %s (%d, from rule)\n", aescstrdup(str2
,-1), sortKeyStr2
, sortKeyLen2
);
922 TestOpenVsOpenRules ensures that collators from ucol_open and ucol_openRules
923 will generate identical sort keys
925 void TestOpenVsOpenRules(){
927 /* create an array of all the locales */
928 int32_t numLocales
= uloc_countAvailable();
929 int32_t sizeOfStdSet
;
931 UChar str
[41]; /* create an array of UChar of size maximum strSize + 1 */
938 int32_t sortKeyLen1
, sortKeyLen2
;
939 uint8_t *sortKey1
= NULL
, *sortKey2
= NULL
;
940 char sortKeyStr1
[512], sortKeyStr2
[512];
941 uint32_t sortKeyStrLen1
= UPRV_LENGTHOF(sortKeyStr1
),
942 sortKeyStrLen2
= UPRV_LENGTHOF(sortKeyStr2
);
949 UErrorCode err
= U_ZERO_ERROR
;
951 /* create a set of standard characters that aren't very interesting...
952 and then we can find some interesting ones later */
954 stdSet
= uset_open(0x61, 0x7A);
955 uset_addRange(stdSet
, 0x41, 0x5A);
956 uset_addRange(stdSet
, 0x30, 0x39);
957 sizeOfStdSet
= uset_size(stdSet
);
958 (void)sizeOfStdSet
; /* Suppress set but not used warning. */
961 if(getTestOption(QUICK_OPTION
))
966 for(x
= 0; x
< numLocales
; x
+=adder
){
967 curLoc
= (char *)uloc_getAvailable(x
);
968 log_verbose("Processing %s\n", curLoc
);
970 /* create a collator the normal API way */
971 c1
= ucol_open(curLoc
, &err
);
972 if (U_FAILURE(err
)) {
973 log_err("ERROR: Normal collation creation failed with locale: %s : %s\n", curLoc
, myErrorName(err
));
978 rules
= ucol_getRules(c1
, &rulesLength
);
979 if (rulesLength
== 0) {
980 /* The optional tailoring rule string is either empty (boring) or missing. */
985 /* use those rules to create a collator from rules */
986 c2
= ucol_openRules(rules
, rulesLength
, UCOL_DEFAULT
, UCOL_DEFAULT_STRENGTH
, NULL
, &err
);
987 if (U_FAILURE(err
)) {
988 log_err("ERROR: Creating collator from rules failed with locale: %s : %s\n", curLoc
, myErrorName(err
));
993 uld
= ulocdata_open(curLoc
, &err
);
995 /*now that we have some collators, we get several strings */
997 for(y
= 0; y
< 5; y
++){
999 /* get a set of ALL the characters in this locale */
1000 eSet
= ulocdata_getExemplarSet(uld
, NULL
, 0, ULOCDATA_ES_STANDARD
, &err
);
1001 eSize
= uset_size(eSet
);
1003 /* make a string with these characters in it */
1004 strSize
= (rand()%40
) + 1;
1006 for(z
= 0; z
< strSize
; z
++){
1007 str
[z
] = uset_charAt(eSet
, rand()%eSize
);
1010 /* change the set to only include 'abnormal' characters (not A-Z, a-z, 0-9 */
1011 uset_removeAll(eSet
, stdSet
);
1012 eSize
= uset_size(eSet
);
1014 /* if there are some non-normal characters left, put a few into the string, just to make sure we have some */
1016 str
[2%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1017 str
[3%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1018 str
[5%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1019 str
[10%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1020 str
[13%strSize
] = uset_charAt(eSet
, rand()%eSize
);
1022 /* terminate the string */
1023 str
[strSize
-1] = '\0';
1024 log_verbose("String used: %S\n", str
);
1026 /* get sort keys for both of them, and check that the keys are identicle */
1027 sortKeyLen1
= ucol_getSortKey(c1
, str
, u_strlen(str
), NULL
, 0);
1028 sortKey1
= (uint8_t*)malloc(sizeof(uint8_t) * (sortKeyLen1
+ 1));
1029 /*memset(sortKey1, 0xFE, sortKeyLen1);*/
1030 ucol_getSortKey(c1
, str
, u_strlen(str
), sortKey1
, sortKeyLen1
+ 1);
1031 ucol_sortKeyToString(c1
, sortKey1
, sortKeyStr1
, sortKeyStrLen1
);
1033 sortKeyLen2
= ucol_getSortKey(c2
, str
, u_strlen(str
), NULL
, 0);
1034 sortKey2
= (uint8_t*)malloc(sizeof(uint8_t) * (sortKeyLen2
+ 1));
1035 /*memset(sortKey2, 0xFE, sortKeyLen2);*/
1036 ucol_getSortKey(c2
, str
, u_strlen(str
), sortKey2
, sortKeyLen2
+ 1);
1037 ucol_sortKeyToString(c2
, sortKey2
, sortKeyStr2
, sortKeyStrLen2
);
1039 /* Check that the lengths are the same */
1040 if (sortKeyLen1
!= sortKeyLen2
) {
1041 log_err("ERROR : Sort key lengths %d and %d for text '%s' in locale '%s' do not match.\n",
1042 sortKeyLen1
, sortKeyLen2
, str
, curLoc
);
1045 /* check that the keys are the same */
1046 if (memcmp(sortKey1
, sortKey2
, sortKeyLen1
) != 0) {
1047 log_err("ERROR : Sort keys '%s' and '%s' for text '%s' in locale '%s' are not equivalent.\n",
1048 sortKeyStr1
, sortKeyStr2
, str
, curLoc
);
1051 /* clean up after each string */
1056 /* clean up after each locale */
1057 ulocdata_close(uld
);
1061 /* final clean up */
1065 ----------------------------------------------------------------------------
1066 ctor -- Tests the getSortKey
1070 uint8_t *sortk1
= NULL
, *sortk2
= NULL
, *sortk3
= NULL
, *sortkEmpty
= NULL
;
1071 int32_t sortklen
, osortklen
;
1073 UChar
*test1
, *test2
, *test3
;
1074 UErrorCode status
= U_ZERO_ERROR
;
1075 char toStringBuffer
[256], *resultP
;
1076 uint32_t toStringLen
=UPRV_LENGTHOF(toStringBuffer
);
1079 uint8_t s1
[] = { 0x9f, 0x00 };
1080 uint8_t s2
[] = { 0x61, 0x00 };
1083 strcmpResult
= strcmp((const char *)s1
, (const char *)s2
);
1084 log_verbose("strcmp(0x9f..., 0x61...) = %d\n", strcmpResult
);
1086 if(strcmpResult
<= 0) {
1087 log_err("ERR: expected strcmp(\"9f 00\", \"61 00\") to be >=0 (GREATER).. got %d. Calling strcmp() for sortkeys may not work! \n",
1092 log_verbose("testing SortKey begins...\n");
1093 /* this is supposed to open default date format, but later on it treats it like it is "en_US"
1094 - very bad if you try to run the tests on machine where default locale is NOT "en_US" */
1095 /* col = ucol_open(NULL, &status); */
1096 col
= ucol_open("en_US", &status
);
1097 if (U_FAILURE(status
)) {
1098 log_err_status(status
, "ERROR: Default collation creation failed.: %s\n", myErrorName(status
));
1103 if(ucol_getStrength(col
) != UCOL_DEFAULT_STRENGTH
)
1105 log_err("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1107 /* Need to use identical strength */
1108 ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
1110 test1
=(UChar
*)malloc(sizeof(UChar
) * 6);
1111 test2
=(UChar
*)malloc(sizeof(UChar
) * 6);
1112 test3
=(UChar
*)malloc(sizeof(UChar
) * 6);
1114 memset(test1
,0xFE, sizeof(UChar
)*6);
1115 memset(test2
,0xFE, sizeof(UChar
)*6);
1116 memset(test3
,0xFE, sizeof(UChar
)*6);
1119 u_uastrcpy(test1
, "Abcda");
1120 u_uastrcpy(test2
, "abcda");
1121 u_uastrcpy(test3
, "abcda");
1123 log_verbose("Use tertiary comparison level testing ....\n");
1125 sortklen
=ucol_getSortKey(col
, test1
, u_strlen(test1
), NULL
, 0);
1126 sortk1
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1127 memset(sortk1
,0xFE, sortklen
);
1128 ucol_getSortKey(col
, test1
, u_strlen(test1
), sortk1
, sortklen
+1);
1130 sortklen
=ucol_getSortKey(col
, test2
, u_strlen(test2
), NULL
, 0);
1131 sortk2
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1132 memset(sortk2
,0xFE, sortklen
);
1133 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk2
, sortklen
+1);
1135 osortklen
= sortklen
;
1136 sortklen
=ucol_getSortKey(col
, test2
, u_strlen(test3
), NULL
, 0);
1137 sortk3
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1138 memset(sortk3
,0xFE, sortklen
);
1139 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk3
, sortklen
+1);
1141 doAssert( (sortklen
== osortklen
), "Sortkey length should be the same (abcda, abcda)");
1143 doAssert( (memcmp(sortk1
, sortk2
, sortklen
) > 0), "Result should be \"Abcda\" > \"abcda\"");
1144 doAssert( (memcmp(sortk2
, sortk1
, sortklen
) < 0), "Result should be \"abcda\" < \"Abcda\"");
1145 doAssert( (memcmp(sortk2
, sortk3
, sortklen
) == 0), "Result should be \"abcda\" == \"abcda\"");
1147 resultP
= ucol_sortKeyToString(col
, sortk3
, toStringBuffer
, toStringLen
);
1148 doAssert( (resultP
!= 0), "sortKeyToString failed!");
1150 #if 1 /* verobse log of sortkeys */
1156 strcpy(junk2
, "abcda[2] ");
1157 strcpy(junk3
, " abcda[3] ");
1159 for(i
=0;i
<sortklen
;i
++)
1161 sprintf(junk2
+strlen(junk2
), "%02X ",(int)( 0xFF & sortk2
[i
]));
1162 sprintf(junk3
+strlen(junk3
), "%02X ",(int)( 0xFF & sortk3
[i
]));
1165 log_verbose("%s\n", junk2
);
1166 log_verbose("%s\n", junk3
);
1174 log_verbose("Use secondary comparision level testing ...\n");
1175 ucol_setStrength(col
, UCOL_SECONDARY
);
1176 sortklen
=ucol_getSortKey(col
, test1
, u_strlen(test1
), NULL
, 0);
1177 sortk1
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1178 ucol_getSortKey(col
, test1
, u_strlen(test1
), sortk1
, sortklen
+1);
1179 sortklen
=ucol_getSortKey(col
, test2
, u_strlen(test2
), NULL
, 0);
1180 sortk2
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen
+1));
1181 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk2
, sortklen
+1);
1183 doAssert( !(memcmp(sortk1
, sortk2
, sortklen
) > 0), "Result should be \"Abcda\" == \"abcda\"");
1184 doAssert( !(memcmp(sortk2
, sortk1
, sortklen
) < 0), "Result should be \"abcda\" == \"Abcda\"");
1185 doAssert( (memcmp(sortk1
, sortk2
, sortklen
) == 0), "Result should be \"abcda\" == \"abcda\"");
1187 log_verbose("getting sortkey for an empty string\n");
1188 ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
1189 sortklen
= ucol_getSortKey(col
, test1
, 0, NULL
, 0);
1190 sortkEmpty
= (uint8_t*)malloc(sizeof(uint8_t) * sortklen
+1);
1191 sortklen
= ucol_getSortKey(col
, test1
, 0, sortkEmpty
, sortklen
+1);
1192 if(sortklen
!= 3 || sortkEmpty
[0] != 1 || sortkEmpty
[0] != 1 || sortkEmpty
[2] != 0) {
1193 log_err("Empty string generated wrong sortkey!\n");
1197 log_verbose("testing passing invalid string\n");
1198 sortklen
= ucol_getSortKey(col
, NULL
, 10, NULL
, 0);
1200 log_err("Invalid string didn't return sortkey size of 0\n");
1204 log_verbose("testing sortkey ends...\n");
1215 uint8_t *sortk1
, *sortk2
, *sortk3
;
1216 int32_t sortk1len
, sortk2len
, sortk3len
;
1218 UChar
*test1
, *test2
, *test3
;
1219 UErrorCode status
= U_ZERO_ERROR
;
1220 log_verbose("testing getHashCode begins...\n");
1221 col
= ucol_open("en_US", &status
);
1222 if (U_FAILURE(status
)) {
1223 log_err_status(status
, "ERROR: Default collation creation failed.: %s\n", myErrorName(status
));
1226 test1
=(UChar
*)malloc(sizeof(UChar
) * 6);
1227 test2
=(UChar
*)malloc(sizeof(UChar
) * 6);
1228 test3
=(UChar
*)malloc(sizeof(UChar
) * 6);
1229 u_uastrcpy(test1
, "Abcda");
1230 u_uastrcpy(test2
, "abcda");
1231 u_uastrcpy(test3
, "abcda");
1233 log_verbose("Use tertiary comparison level testing ....\n");
1234 sortk1len
=ucol_getSortKey(col
, test1
, u_strlen(test1
), NULL
, 0);
1235 sortk1
=(uint8_t*)malloc(sizeof(uint8_t) * (sortk1len
+1));
1236 ucol_getSortKey(col
, test1
, u_strlen(test1
), sortk1
, sortk1len
+1);
1237 sortk2len
=ucol_getSortKey(col
, test2
, u_strlen(test2
), NULL
, 0);
1238 sortk2
=(uint8_t*)malloc(sizeof(uint8_t) * (sortk2len
+1));
1239 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk2
, sortk2len
+1);
1240 sortk3len
=ucol_getSortKey(col
, test2
, u_strlen(test3
), NULL
, 0);
1241 sortk3
=(uint8_t*)malloc(sizeof(uint8_t) * (sortk3len
+1));
1242 ucol_getSortKey(col
, test2
, u_strlen(test2
), sortk3
, sortk3len
+1);
1245 log_verbose("ucol_hashCode() testing ...\n");
1247 doAssert( ucol_keyHashCode(sortk1
, sortk1len
) != ucol_keyHashCode(sortk2
, sortk2len
), "Hash test1 result incorrect" );
1248 doAssert( !(ucol_keyHashCode(sortk1
, sortk1len
) == ucol_keyHashCode(sortk2
, sortk2len
)), "Hash test2 result incorrect" );
1249 doAssert( ucol_keyHashCode(sortk2
, sortk2len
) == ucol_keyHashCode(sortk3
, sortk3len
), "Hash result not equal" );
1251 log_verbose("hashCode tests end.\n");
1263 *----------------------------------------------------------------------------
1264 * Tests the UCollatorElements API.
1270 int32_t order1
, order2
, order3
;
1271 UChar
*testString1
, *testString2
;
1273 UCollationElements
*iterator1
, *iterator2
, *iterator3
;
1274 UErrorCode status
= U_ZERO_ERROR
;
1275 log_verbose("testing UCollatorElements begins...\n");
1276 col
= ucol_open("en_US", &status
);
1277 ucol_setAttribute(col
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
1278 if (U_FAILURE(status
)) {
1279 log_err_status(status
, "ERROR: Default collation creation failed.: %s\n", myErrorName(status
));
1283 testString1
=(UChar
*)malloc(sizeof(UChar
) * 150);
1284 testString2
=(UChar
*)malloc(sizeof(UChar
) * 150);
1285 u_uastrcpy(testString1
, "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
1286 u_uastrcpy(testString2
, "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
1288 log_verbose("Constructors and comparison testing....\n");
1290 iterator1
= ucol_openElements(col
, testString1
, u_strlen(testString1
), &status
);
1291 if(U_FAILURE(status
)) {
1292 log_err("ERROR: Default collationElement iterator creation failed.: %s\n", myErrorName(status
));
1296 else{ log_verbose("PASS: Default collationElement iterator1 creation passed\n");}
1298 iterator2
= ucol_openElements(col
, testString1
, u_strlen(testString1
), &status
);
1299 if(U_FAILURE(status
)) {
1300 log_err("ERROR: Default collationElement iterator creation failed.: %s\n", myErrorName(status
));
1304 else{ log_verbose("PASS: Default collationElement iterator2 creation passed\n");}
1306 iterator3
= ucol_openElements(col
, testString2
, u_strlen(testString2
), &status
);
1307 if(U_FAILURE(status
)) {
1308 log_err("ERROR: Default collationElement iterator creation failed.: %s\n", myErrorName(status
));
1312 else{ log_verbose("PASS: Default collationElement iterator3 creation passed\n");}
1314 offset
=ucol_getOffset(iterator1
);
1315 (void)offset
; /* Suppress set but not used warning. */
1316 ucol_setOffset(iterator1
, 6, &status
);
1317 if (U_FAILURE(status
)) {
1318 log_err("Error in setOffset for UCollatorElements iterator.: %s\n", myErrorName(status
));
1321 if(ucol_getOffset(iterator1
)==6)
1322 log_verbose("setOffset and getOffset working fine\n");
1324 log_err("error in set and get Offset got %d instead of 6\n", ucol_getOffset(iterator1
));
1327 ucol_setOffset(iterator1
, 0, &status
);
1328 order1
= ucol_next(iterator1
, &status
);
1329 if (U_FAILURE(status
)) {
1330 log_err("Somehow ran out of memory stepping through the iterator1.: %s\n", myErrorName(status
));
1333 order2
=ucol_getOffset(iterator2
);
1334 doAssert((order1
!= order2
), "The first iterator advance failed");
1335 order2
= ucol_next(iterator2
, &status
);
1336 if (U_FAILURE(status
)) {
1337 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1340 order3
= ucol_next(iterator3
, &status
);
1341 if (U_FAILURE(status
)) {
1342 log_err("Somehow ran out of memory stepping through the iterator3.: %s\n", myErrorName(status
));
1346 doAssert((order1
== order2
), "The second iterator advance failed should be the same as first one");
1348 doAssert( (ucol_primaryOrder(order1
) == ucol_primaryOrder(order3
)), "The primary orders should be identical");
1349 doAssert( (ucol_secondaryOrder(order1
) == ucol_secondaryOrder(order3
)), "The secondary orders should be identical");
1350 doAssert( (ucol_tertiaryOrder(order1
) == ucol_tertiaryOrder(order3
)), "The tertiary orders should be identical");
1352 order1
=ucol_next(iterator1
, &status
);
1353 if (U_FAILURE(status
)) {
1354 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1357 order3
=ucol_next(iterator3
, &status
);
1358 if (U_FAILURE(status
)) {
1359 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1362 doAssert( (ucol_primaryOrder(order1
) == ucol_primaryOrder(order3
)), "The primary orders should be identical");
1363 doAssert( (ucol_tertiaryOrder(order1
) != ucol_tertiaryOrder(order3
)), "The tertiary orders should be different");
1365 order1
=ucol_next(iterator1
, &status
);
1366 if (U_FAILURE(status
)) {
1367 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1370 order3
=ucol_next(iterator3
, &status
);
1371 if (U_FAILURE(status
)) {
1372 log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status
));
1375 /* this here, my friends, is either pure lunacy or something so obsolete that even it's mother
1376 * doesn't care about it. Essentialy, this test complains if secondary values for 'I' and '_'
1377 * are the same. According to the UCA, this is not true. Therefore, remove the test.
1378 * Besides, if primary strengths for two code points are different, it doesn't matter one bit
1379 * what is the relation between secondary or any other strengths.
1380 * killed by weiv 06/11/2002.
1383 doAssert( ((order1 & UCOL_SECONDARYMASK) != (order3 & UCOL_SECONDARYMASK)), "The secondary orders should be different");
1385 doAssert( (order1
!= UCOL_NULLORDER
), "Unexpected end of iterator reached");
1389 ucol_closeElements(iterator1
);
1390 ucol_closeElements(iterator2
);
1391 ucol_closeElements(iterator3
);
1394 log_verbose("testing CollationElementIterator ends...\n");
1397 void TestGetLocale() {
1398 UErrorCode status
= U_ZERO_ERROR
;
1399 const char *rules
= "&a<x<y<z";
1400 UChar rlz
[256] = {0};
1401 uint32_t rlzLen
= u_unescape(rules
, rlz
, 256);
1403 UCollator
*coll
= NULL
;
1404 const char *locale
= NULL
;
1408 static const struct {
1409 const char* requestedLocale
;
1410 const char* validLocale
;
1411 const char* actualLocale
;
1413 { "sr_RS", "sr_Cyrl_RS", "sr" },
1414 { "sh_YU", "sr_Latn_RS", "sr_Latn" }, /* was sh, then aliased to hr, now sr_Latn via import per cldrbug 5647: */
1415 { "en_BE_FOO", "en", "root" },
1416 { "sv_SE_NONEXISTANT", "sv", "sv" }
1419 /* test opening collators for different locales */
1420 for(i
= 0; i
<UPRV_LENGTHOF(testStruct
); i
++) {
1421 status
= U_ZERO_ERROR
;
1422 coll
= ucol_open(testStruct
[i
].requestedLocale
, &status
);
1423 if(U_FAILURE(status
)) {
1424 log_err_status(status
, "Failed to open collator for %s with %s\n", testStruct
[i
].requestedLocale
, u_errorName(status
));
1429 * The requested locale may be the same as the valid locale,
1430 * or may not be supported at all. See ticket #10477.
1432 locale
= ucol_getLocaleByType(coll
, ULOC_REQUESTED_LOCALE
, &status
);
1433 if(U_SUCCESS(status
) &&
1434 strcmp(locale
, testStruct
[i
].requestedLocale
) != 0 && strcmp(locale
, testStruct
[i
].validLocale
) != 0) {
1435 log_err("[Coll %s]: Error in requested locale, expected %s, got %s\n", testStruct
[i
].requestedLocale
, testStruct
[i
].requestedLocale
, locale
);
1437 status
= U_ZERO_ERROR
;
1438 locale
= ucol_getLocaleByType(coll
, ULOC_VALID_LOCALE
, &status
);
1439 if(strcmp(locale
, testStruct
[i
].validLocale
) != 0) {
1440 log_err("[Coll %s]: Error in valid locale, expected %s, got %s\n", testStruct
[i
].requestedLocale
, testStruct
[i
].validLocale
, locale
);
1442 locale
= ucol_getLocaleByType(coll
, ULOC_ACTUAL_LOCALE
, &status
);
1443 if(strcmp(locale
, testStruct
[i
].actualLocale
) != 0) {
1444 log_err("[Coll %s]: Error in actual locale, expected %s, got %s\n", testStruct
[i
].requestedLocale
, testStruct
[i
].actualLocale
, locale
);
1449 /* completely non-existent locale for collator should get a root collator */
1451 UCollator
*defaultColl
= ucol_open(NULL
, &status
);
1452 coll
= ucol_open("blahaha", &status
);
1453 if(U_SUCCESS(status
)) {
1454 /* See comment above about ticket #10477.
1455 if(strcmp(ucol_getLocaleByType(coll, ULOC_REQUESTED_LOCALE, &status), "blahaha")) {
1456 log_err("Nonexisting locale didn't preserve the requested locale\n");
1458 const char *name
= ucol_getLocaleByType(coll
, ULOC_VALID_LOCALE
, &status
);
1459 if(*name
!= 0 && strcmp(name
, "root") != 0) {
1460 log_err("Valid locale for nonexisting-locale collator is \"%s\" not root\n", name
);
1462 name
= ucol_getLocaleByType(coll
, ULOC_ACTUAL_LOCALE
, &status
);
1463 if(*name
!= 0 && strcmp(name
, "root") != 0) {
1464 log_err("Actual locale for nonexisting-locale collator is \"%s\" not root\n", name
);
1467 ucol_close(defaultColl
);
1469 log_data_err("Couldn't open collators\n");
1475 /* collator instantiated from rules should have all three locales NULL */
1476 coll
= ucol_openRules(rlz
, rlzLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
1478 locale
= ucol_getLocaleByType(coll
, ULOC_REQUESTED_LOCALE
, &status
);
1479 if(U_SUCCESS(status
) && locale
!= NULL
) {
1480 log_err("For collator instantiated from rules, requested locale returned %s instead of NULL\n", locale
);
1482 status
= U_ZERO_ERROR
;
1483 locale
= ucol_getLocaleByType(coll
, ULOC_VALID_LOCALE
, &status
);
1484 if(locale
!= NULL
) {
1485 log_err("For collator instantiated from rules, valid locale returned %s instead of NULL\n", locale
);
1487 locale
= ucol_getLocaleByType(coll
, ULOC_ACTUAL_LOCALE
, &status
);
1488 if(locale
!= NULL
) {
1489 log_err("For collator instantiated from rules, actual locale returned %s instead of NULL\n", locale
);
1493 log_data_err("Couldn't get collator from ucol_openRules() - %s\n", u_errorName(status
));
1501 count
=ucol_countAvailable();
1502 /* use something sensible w/o hardcoding the count */
1504 log_err("Error in countAvailable(), it returned %d\n", count
);
1507 log_verbose("PASS: countAvailable() successful, it returned %d\n", count
);
1509 for(i
=0;i
<count
;i
++)
1510 log_verbose("%s\n", ucol_getAvailable(i
));
1517 const char *original
;
1521 static int compare_teststruct(const void *string1
, const void *string2
) {
1522 return(strcmp((const char *)((struct teststruct
*)string1
)->key
, (const char *)((struct teststruct
*)string2
)->key
));
1526 UErrorCode status
= U_ZERO_ERROR
;
1528 UCollator
*coll
= ucol_open("sh", &status
);
1530 uint8_t sortkey
[512], lower
[512], upper
[512];
1533 static const char * const test
[] = {
1537 "j\\u00F6hn sm\\u00EFth",
1538 "J\\u00F6hn Sm\\u00EFth",
1539 "J\\u00D6HN SM\\u00CFTH",
1544 struct teststruct tests
[] = {
1545 {"\\u010CAKI MIHALJ" } ,
1546 {"\\u010CAKI MIHALJ" } ,
1547 {"\\u010CAKI PIRO\\u0160KA" },
1548 {"\\u010CABAI ANDRIJA" } ,
1549 {"\\u010CABAI LAJO\\u0160" } ,
1550 {"\\u010CABAI MARIJA" } ,
1551 {"\\u010CABAI STEVAN" } ,
1552 {"\\u010CABAI STEVAN" } ,
1553 {"\\u010CABARKAPA BRANKO" } ,
1554 {"\\u010CABARKAPA MILENKO" } ,
1555 {"\\u010CABARKAPA MIROSLAV" } ,
1556 {"\\u010CABARKAPA SIMO" } ,
1557 {"\\u010CABARKAPA STANKO" } ,
1558 {"\\u010CABARKAPA TAMARA" } ,
1559 {"\\u010CABARKAPA TOMA\\u0160" } ,
1560 {"\\u010CABDARI\\u0106 NIKOLA" } ,
1561 {"\\u010CABDARI\\u0106 ZORICA" } ,
1562 {"\\u010CABI NANDOR" } ,
1563 {"\\u010CABOVI\\u0106 MILAN" } ,
1564 {"\\u010CABRADI AGNEZIJA" } ,
1565 {"\\u010CABRADI IVAN" } ,
1566 {"\\u010CABRADI JELENA" } ,
1567 {"\\u010CABRADI LJUBICA" } ,
1568 {"\\u010CABRADI STEVAN" } ,
1569 {"\\u010CABRDA MARTIN" } ,
1570 {"\\u010CABRILO BOGDAN" } ,
1571 {"\\u010CABRILO BRANISLAV" } ,
1572 {"\\u010CABRILO LAZAR" } ,
1573 {"\\u010CABRILO LJUBICA" } ,
1574 {"\\u010CABRILO SPASOJA" } ,
1575 {"\\u010CADE\\u0160 ZDENKA" } ,
1576 {"\\u010CADESKI BLAGOJE" } ,
1577 {"\\u010CADOVSKI VLADIMIR" } ,
1578 {"\\u010CAGLJEVI\\u0106 TOMA" } ,
1579 {"\\u010CAGOROVI\\u0106 VLADIMIR" } ,
1580 {"\\u010CAJA VANKA" } ,
1581 {"\\u010CAJI\\u0106 BOGOLJUB" } ,
1582 {"\\u010CAJI\\u0106 BORISLAV" } ,
1583 {"\\u010CAJI\\u0106 RADOSLAV" } ,
1584 {"\\u010CAK\\u0160IRAN MILADIN" } ,
1585 {"\\u010CAKAN EUGEN" } ,
1586 {"\\u010CAKAN EVGENIJE" } ,
1587 {"\\u010CAKAN IVAN" } ,
1588 {"\\u010CAKAN JULIJAN" } ,
1589 {"\\u010CAKAN MIHAJLO" } ,
1590 {"\\u010CAKAN STEVAN" } ,
1591 {"\\u010CAKAN VLADIMIR" } ,
1592 {"\\u010CAKAN VLADIMIR" } ,
1593 {"\\u010CAKAN VLADIMIR" } ,
1594 {"\\u010CAKARA ANA" } ,
1595 {"\\u010CAKAREVI\\u0106 MOMIR" } ,
1596 {"\\u010CAKAREVI\\u0106 NEDELJKO" } ,
1597 {"\\u010CAKI \\u0160ANDOR" } ,
1598 {"\\u010CAKI AMALIJA" } ,
1599 {"\\u010CAKI ANDRA\\u0160" } ,
1600 {"\\u010CAKI LADISLAV" } ,
1601 {"\\u010CAKI LAJO\\u0160" } ,
1602 {"\\u010CAKI LASLO" } ,
1607 int32_t i
= 0, j
= 0, k
= 0, buffSize
= 0, skSize
= 0, lowerSize
= 0, upperSize
= 0;
1608 int32_t arraySize
= UPRV_LENGTHOF(tests
);
1610 if(U_SUCCESS(status
) && coll
) {
1611 for(i
= 0; i
<arraySize
; i
++) {
1612 buffSize
= u_unescape(tests
[i
].original
, buffer
, 512);
1613 skSize
= ucol_getSortKey(coll
, buffer
, buffSize
, tests
[i
].key
, 512);
1616 qsort(tests
, arraySize
, sizeof(struct teststruct
), compare_teststruct
);
1618 for(i
= 0; i
< arraySize
-1; i
++) {
1619 for(j
= i
+1; j
< arraySize
; j
++) {
1620 lowerSize
= ucol_getBound(tests
[i
].key
, -1, UCOL_BOUND_LOWER
, 1, lower
, 512, &status
);
1621 upperSize
= ucol_getBound(tests
[j
].key
, -1, UCOL_BOUND_UPPER
, 1, upper
, 512, &status
);
1622 (void)lowerSize
; /* Suppress set but not used warning. */
1624 for(k
= i
; k
<= j
; k
++) {
1625 if(strcmp((const char *)lower
, (const char *)tests
[k
].key
) > 0) {
1626 log_err("Problem with lower! j = %i (%s vs %s)\n", k
, tests
[k
].original
, tests
[i
].original
);
1628 if(strcmp((const char *)upper
, (const char *)tests
[k
].key
) <= 0) {
1629 log_err("Problem with upper! j = %i (%s vs %s)\n", k
, tests
[k
].original
, tests
[j
].original
);
1637 for(i
= 0; i
< 1000; i
++) {
1638 lowerRND
= (rand()/(RAND_MAX
/arraySize
));
1639 upperRND
= lowerRND
+ (rand()/(RAND_MAX
/(arraySize
-lowerRND
)));
1641 lowerSize
= ucol_getBound(tests
[lowerRND
].key
, -1, UCOL_BOUND_LOWER
, 1, lower
, 512, &status
);
1642 upperSize
= ucol_getBound(tests
[upperRND
].key
, -1, UCOL_BOUND_UPPER_LONG
, 1, upper
, 512, &status
);
1644 for(j
= lowerRND
; j
<=upperRND
; j
++) {
1645 if(strcmp(lower
, tests
[j
].key
) > 0) {
1646 log_err("Problem with lower! j = %i (%s vs %s)\n", j
, tests
[j
].original
, tests
[lowerRND
].original
);
1648 if(strcmp(upper
, tests
[j
].key
) <= 0) {
1649 log_err("Problem with upper! j = %i (%s vs %s)\n", j
, tests
[j
].original
, tests
[upperRND
].original
);
1659 for(i
= 0; i
<UPRV_LENGTHOF(test
); i
++) {
1660 buffSize
= u_unescape(test
[i
], buffer
, 512);
1661 skSize
= ucol_getSortKey(coll
, buffer
, buffSize
, sortkey
, 512);
1662 lowerSize
= ucol_getBound(sortkey
, skSize
, UCOL_BOUND_LOWER
, 1, lower
, 512, &status
);
1663 upperSize
= ucol_getBound(sortkey
, skSize
, UCOL_BOUND_UPPER_LONG
, 1, upper
, 512, &status
);
1664 for(j
= i
+1; j
<UPRV_LENGTHOF(test
); j
++) {
1665 buffSize
= u_unescape(test
[j
], buffer
, 512);
1666 skSize
= ucol_getSortKey(coll
, buffer
, buffSize
, sortkey
, 512);
1667 if(strcmp((const char *)lower
, (const char *)sortkey
) > 0) {
1668 log_err("Problem with lower! i = %i, j = %i (%s vs %s)\n", i
, j
, test
[i
], test
[j
]);
1670 if(strcmp((const char *)upper
, (const char *)sortkey
) <= 0) {
1671 log_err("Problem with upper! i = %i, j = %i (%s vs %s)\n", i
, j
, test
[i
], test
[j
]);
1677 log_data_err("Couldn't open collator\n");
1682 static void doOverrunTest(UCollator
*coll
, const UChar
*uString
, int32_t strLen
) {
1683 int32_t skLen
= 0, skLen2
= 0;
1684 uint8_t sortKey
[256];
1686 uint8_t filler
= 0xFF;
1688 skLen
= ucol_getSortKey(coll
, uString
, strLen
, NULL
, 0);
1690 for(i
= 0; i
< skLen
; i
++) {
1691 memset(sortKey
, filler
, 256);
1692 skLen2
= ucol_getSortKey(coll
, uString
, strLen
, sortKey
, i
);
1693 if(skLen
!= skLen2
) {
1694 log_err("For buffer size %i, got different sortkey length. Expected %i got %i\n", i
, skLen
, skLen2
);
1696 for(j
= i
; j
< 256; j
++) {
1697 if(sortKey
[j
] != filler
) {
1698 log_err("Something run over index %i\n", j
);
1705 /* j1865 reports that if a shorter buffer is passed to
1706 * to get sort key, a buffer overrun happens in some
1707 * cases. This test tries to check this.
1709 void TestSortKeyBufferOverrun(void) {
1710 UErrorCode status
= U_ZERO_ERROR
;
1711 const char* cString
= "A very Merry liTTle-lamB..";
1714 UCollator
*coll
= ucol_open("root", &status
);
1715 strLen
= u_unescape(cString
, uString
, 256);
1717 if(U_SUCCESS(status
)) {
1718 log_verbose("testing non ignorable\n");
1719 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
1720 doOverrunTest(coll
, uString
, strLen
);
1722 log_verbose("testing shifted\n");
1723 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
1724 doOverrunTest(coll
, uString
, strLen
);
1726 log_verbose("testing shifted quaternary\n");
1727 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
1728 doOverrunTest(coll
, uString
, strLen
);
1730 log_verbose("testing with french secondaries\n");
1731 ucol_setAttribute(coll
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
1732 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
1733 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
1734 doOverrunTest(coll
, uString
, strLen
);
1740 static void TestAttribute()
1742 UErrorCode error
= U_ZERO_ERROR
;
1743 UCollator
*coll
= ucol_open(NULL
, &error
);
1745 if (U_FAILURE(error
)) {
1746 log_err_status(error
, "Creation of default collator failed\n");
1750 ucol_setAttribute(coll
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &error
);
1751 if (ucol_getAttribute(coll
, UCOL_FRENCH_COLLATION
, &error
) != UCOL_OFF
||
1753 log_err_status(error
, "Setting and retrieving of the french collation failed\n");
1756 ucol_setAttribute(coll
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &error
);
1757 if (ucol_getAttribute(coll
, UCOL_FRENCH_COLLATION
, &error
) != UCOL_ON
||
1759 log_err_status(error
, "Setting and retrieving of the french collation failed\n");
1762 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &error
);
1763 if (ucol_getAttribute(coll
, UCOL_ALTERNATE_HANDLING
, &error
) != UCOL_SHIFTED
||
1765 log_err_status(error
, "Setting and retrieving of the alternate handling failed\n");
1768 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &error
);
1769 if (ucol_getAttribute(coll
, UCOL_ALTERNATE_HANDLING
, &error
) != UCOL_NON_IGNORABLE
||
1771 log_err_status(error
, "Setting and retrieving of the alternate handling failed\n");
1774 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, &error
);
1775 if (ucol_getAttribute(coll
, UCOL_CASE_FIRST
, &error
) != UCOL_LOWER_FIRST
||
1777 log_err_status(error
, "Setting and retrieving of the case first attribute failed\n");
1780 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, &error
);
1781 if (ucol_getAttribute(coll
, UCOL_CASE_FIRST
, &error
) != UCOL_UPPER_FIRST
||
1783 log_err_status(error
, "Setting and retrieving of the case first attribute failed\n");
1786 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, UCOL_ON
, &error
);
1787 if (ucol_getAttribute(coll
, UCOL_CASE_LEVEL
, &error
) != UCOL_ON
||
1789 log_err_status(error
, "Setting and retrieving of the case level attribute failed\n");
1792 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, UCOL_OFF
, &error
);
1793 if (ucol_getAttribute(coll
, UCOL_CASE_LEVEL
, &error
) != UCOL_OFF
||
1795 log_err_status(error
, "Setting and retrieving of the case level attribute failed\n");
1798 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &error
);
1799 if (ucol_getAttribute(coll
, UCOL_NORMALIZATION_MODE
, &error
) != UCOL_ON
||
1801 log_err_status(error
, "Setting and retrieving of the normalization on/off attribute failed\n");
1804 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &error
);
1805 if (ucol_getAttribute(coll
, UCOL_NORMALIZATION_MODE
, &error
) != UCOL_OFF
||
1807 log_err_status(error
, "Setting and retrieving of the normalization on/off attribute failed\n");
1810 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &error
);
1811 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_PRIMARY
||
1813 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1816 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_SECONDARY
, &error
);
1817 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_SECONDARY
||
1819 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1822 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &error
);
1823 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_TERTIARY
||
1825 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1828 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &error
);
1829 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_QUATERNARY
||
1831 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1834 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &error
);
1835 if (ucol_getAttribute(coll
, UCOL_STRENGTH
, &error
) != UCOL_IDENTICAL
||
1837 log_err_status(error
, "Setting and retrieving of the collation strength failed\n");
1843 void TestGetTailoredSet() {
1846 const char *tests
[20];
1849 { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1850 { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1853 int32_t i
= 0, j
= 0;
1854 UErrorCode status
= U_ZERO_ERROR
;
1857 UCollator
*coll
= NULL
;
1859 int32_t buffLen
= 0;
1862 for(i
= 0; i
< UPRV_LENGTHOF(setTest
); i
++) {
1863 buffLen
= u_unescape(setTest
[i
].rules
, buff
, 1024);
1864 coll
= ucol_openRules(buff
, buffLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, &pError
, &status
);
1865 if(U_SUCCESS(status
)) {
1866 set
= ucol_getTailoredSet(coll
, &status
);
1867 if(uset_size(set
) < setTest
[i
].testsize
) {
1868 log_err("Tailored set size smaller (%d) than expected (%d)\n", uset_size(set
), setTest
[i
].testsize
);
1870 for(j
= 0; j
< setTest
[i
].testsize
; j
++) {
1871 buffLen
= u_unescape(setTest
[i
].tests
[j
], buff
, 1024);
1872 if(!uset_containsString(set
, buff
, buffLen
)) {
1873 log_err("Tailored set doesn't contain %s... It should\n", setTest
[i
].tests
[j
]);
1878 log_err_status(status
, "Couldn't open collator with rules %s\n", setTest
[i
].rules
);
1884 static int tMemCmp(const uint8_t *first
, const uint8_t *second
) {
1885 int32_t firstLen
= (int32_t)strlen((const char *)first
);
1886 int32_t secondLen
= (int32_t)strlen((const char *)second
);
1887 return memcmp(first
, second
, uprv_min(firstLen
, secondLen
));
1889 static const char * strengthsC
[] = {
1897 void TestMergeSortKeys(void) {
1898 UErrorCode status
= U_ZERO_ERROR
;
1899 UCollator
*coll
= ucol_open("en", &status
);
1900 if(U_SUCCESS(status
)) {
1902 const char* cases
[] = {
1907 uint32_t casesSize
= UPRV_LENGTHOF(cases
);
1908 const char* prefix
= "foo";
1909 const char* suffix
= "egg";
1910 char outBuff1
[256], outBuff2
[256];
1912 uint8_t **sortkeys
= (uint8_t **)malloc(casesSize
*sizeof(uint8_t *));
1913 uint8_t **mergedPrefixkeys
= (uint8_t **)malloc(casesSize
*sizeof(uint8_t *));
1914 uint8_t **mergedSuffixkeys
= (uint8_t **)malloc(casesSize
*sizeof(uint8_t *));
1915 uint32_t *sortKeysLen
= (uint32_t *)malloc(casesSize
*sizeof(uint32_t));
1916 uint8_t prefixKey
[256], suffixKey
[256];
1917 uint32_t prefixKeyLen
= 0, suffixKeyLen
= 0, i
= 0;
1919 uint32_t unescapedLen
= 0, l1
= 0, l2
= 0;
1920 UColAttributeValue strength
;
1922 log_verbose("ucol_mergeSortkeys test\n");
1923 log_verbose("Testing order of the test cases\n");
1924 genericLocaleStarter("en", cases
, casesSize
);
1926 for(i
= 0; i
<casesSize
; i
++) {
1927 sortkeys
[i
] = (uint8_t *)malloc(256*sizeof(uint8_t));
1928 mergedPrefixkeys
[i
] = (uint8_t *)malloc(256*sizeof(uint8_t));
1929 mergedSuffixkeys
[i
] = (uint8_t *)malloc(256*sizeof(uint8_t));
1932 unescapedLen
= u_unescape(prefix
, buffer
, 256);
1933 prefixKeyLen
= ucol_getSortKey(coll
, buffer
, unescapedLen
, prefixKey
, 256);
1935 unescapedLen
= u_unescape(suffix
, buffer
, 256);
1936 suffixKeyLen
= ucol_getSortKey(coll
, buffer
, unescapedLen
, suffixKey
, 256);
1938 log_verbose("Massaging data with prefixes and different strengths\n");
1939 strength
= UCOL_PRIMARY
;
1940 while(strength
<= UCOL_IDENTICAL
) {
1941 log_verbose("Strength %s\n", strengthsC
[strength
<=UCOL_QUATERNARY
?strength
:4]);
1942 ucol_setAttribute(coll
, UCOL_STRENGTH
, strength
, &status
);
1943 for(i
= 0; i
<casesSize
; i
++) {
1944 unescapedLen
= u_unescape(cases
[i
], buffer
, 256);
1945 sortKeysLen
[i
] = ucol_getSortKey(coll
, buffer
, unescapedLen
, sortkeys
[i
], 256);
1946 ucol_mergeSortkeys(prefixKey
, prefixKeyLen
, sortkeys
[i
], sortKeysLen
[i
], mergedPrefixkeys
[i
], 256);
1947 ucol_mergeSortkeys(sortkeys
[i
], sortKeysLen
[i
], suffixKey
, suffixKeyLen
, mergedSuffixkeys
[i
], 256);
1949 if(tMemCmp(mergedPrefixkeys
[i
-1], mergedPrefixkeys
[i
]) >= 0) {
1950 log_err("Error while comparing prefixed keys @ strength %s:\n", strengthsC
[strength
<=UCOL_QUATERNARY
?strength
:4]);
1952 ucol_sortKeyToString(coll
, mergedPrefixkeys
[i
-1], outBuff1
, l1
),
1953 ucol_sortKeyToString(coll
, mergedPrefixkeys
[i
], outBuff2
, l2
));
1955 if(tMemCmp(mergedSuffixkeys
[i
-1], mergedSuffixkeys
[i
]) >= 0) {
1956 log_err("Error while comparing suffixed keys @ strength %s:\n", strengthsC
[strength
<=UCOL_QUATERNARY
?strength
:4]);
1958 ucol_sortKeyToString(coll
, mergedSuffixkeys
[i
-1], outBuff1
, l1
),
1959 ucol_sortKeyToString(coll
, mergedSuffixkeys
[i
], outBuff2
, l2
));
1963 if(strength
== UCOL_QUATERNARY
) {
1964 strength
= UCOL_IDENTICAL
;
1971 uint8_t smallBuf
[3];
1972 uint32_t reqLen
= 0;
1973 log_verbose("testing buffer overflow\n");
1974 reqLen
= ucol_mergeSortkeys(prefixKey
, prefixKeyLen
, suffixKey
, suffixKeyLen
, smallBuf
, 3);
1975 if(reqLen
!= (prefixKeyLen
+suffixKeyLen
)) {
1976 log_err("Wrong preflight size for merged sortkey\n");
1982 uint8_t emptyKey
[20], abcKey
[50], mergedKey
[100];
1983 int32_t emptyKeyLen
= 0, abcKeyLen
= 0, mergedKeyLen
= 0;
1985 log_verbose("testing merging with sortkeys generated for empty strings\n");
1986 emptyKeyLen
= ucol_getSortKey(coll
, &empty
, 0, emptyKey
, 20);
1987 unescapedLen
= u_unescape(cases
[0], buffer
, 256);
1988 abcKeyLen
= ucol_getSortKey(coll
, buffer
, unescapedLen
, abcKey
, 50);
1989 mergedKeyLen
= ucol_mergeSortkeys(emptyKey
, emptyKeyLen
, abcKey
, abcKeyLen
, mergedKey
, 100);
1990 if(mergedKey
[0] != 2) {
1991 log_err("Empty sortkey didn't produce a level separator\n");
1993 /* try with zeros */
1994 mergedKeyLen
= ucol_mergeSortkeys(emptyKey
, 0, abcKey
, abcKeyLen
, mergedKey
, 100);
1995 if(mergedKeyLen
!= 0 || mergedKey
[0] != 0) {
1996 log_err("Empty key didn't produce null mergedKey\n");
1998 mergedKeyLen
= ucol_mergeSortkeys(abcKey
, abcKeyLen
, emptyKey
, 0, mergedKey
, 100);
1999 if(mergedKeyLen
!= 0 || mergedKey
[0] != 0) {
2000 log_err("Empty key didn't produce null mergedKey\n");
2005 for(i
= 0; i
<casesSize
; i
++) {
2007 free(mergedPrefixkeys
[i
]);
2008 free(mergedSuffixkeys
[i
]);
2011 free(mergedPrefixkeys
);
2012 free(mergedSuffixkeys
);
2015 /* need to finish this up */
2017 log_data_err("Couldn't open collator");
2020 static void TestShortString(void)
2024 const char *expectedOutput
;
2026 UErrorCode expectedStatus
;
2027 int32_t expectedOffset
;
2028 uint32_t expectedIdentifier
;
2031 * Note: The first test case sets variableTop to the dollar sign '$'.
2032 * We have agreed to drop support for variableTop in ucol_getShortDefinitionString(),
2033 * related to ticket #10372 "deprecate collation APIs for short definition strings",
2034 * and because it did not work for most spaces/punctuation/symbols,
2035 * as documented in ticket #10386 "collation short definition strings issues":
2036 * The old code wrote only 3 hex digits for primary weights below 0x0FFF,
2037 * which is a syntax error, and then failed to normalize the result.
2039 * The "B2700" was removed from the expected result ("B2700_KPHONEBOOK_LDE").
2041 * Previously, this test had to be adjusted for root collator changes because the
2042 * primary weight of the variable top character naturally changed
2043 * but was baked into the expected result.
2045 {"LDE_RDE_KPHONEBOOK_T0024_ZLATN","KPHONEBOOK_LDE", "de@collation=phonebook", U_USING_FALLBACK_WARNING
, 0, 0 },
2047 {"LEN_RUS_NO_AS_S4","AS_LROOT_NO_S4", NULL
, U_USING_DEFAULT_WARNING
, 0, 0 },
2048 {"LDE_VPHONEBOOK_EO_SI","EO_KPHONEBOOK_LDE_SI", "de@collation=phonebook", U_ZERO_ERROR
, 0, 0 },
2049 {"LDE_Kphonebook","KPHONEBOOK_LDE", "de@collation=phonebook", U_ZERO_ERROR
, 0, 0 },
2050 {"Xqde_DE@collation=phonebookq_S3_EX","KPHONEBOOK_LDE", "de@collation=phonebook", U_USING_FALLBACK_WARNING
, 0, 0 },
2051 {"LFR_FO", "FO_LROOT", NULL
, U_USING_DEFAULT_WARNING
, 0, 0 },
2052 {"SO_LX_AS", "", NULL
, U_ILLEGAL_ARGUMENT_ERROR
, 8, 0 },
2053 {"S3_ASS_MMM", "", NULL
, U_ILLEGAL_ARGUMENT_ERROR
, 5, 0 }
2057 UCollator
*coll
= NULL
, *fromNormalized
= NULL
;
2058 UParseError parseError
;
2059 UErrorCode status
= U_ZERO_ERROR
;
2060 char fromShortBuffer
[256], normalizedBuffer
[256], fromNormalizedBuffer
[256];
2061 const char* locale
= NULL
;
2064 for(i
= 0; i
< UPRV_LENGTHOF(testCases
); i
++) {
2065 status
= U_ZERO_ERROR
;
2066 if(testCases
[i
].locale
) {
2067 locale
= testCases
[i
].locale
;
2072 coll
= ucol_openFromShortString(testCases
[i
].input
, FALSE
, &parseError
, &status
);
2073 if(status
!= testCases
[i
].expectedStatus
) {
2074 log_err_status(status
, "Got status '%s' that is different from expected '%s' for '%s'\n",
2075 u_errorName(status
), u_errorName(testCases
[i
].expectedStatus
), testCases
[i
].input
);
2079 if(U_SUCCESS(status
)) {
2080 ucol_getShortDefinitionString(coll
, locale
, fromShortBuffer
, 256, &status
);
2082 if(strcmp(fromShortBuffer
, testCases
[i
].expectedOutput
)) {
2083 log_err("Got short string '%s' from the collator. Expected '%s' for input '%s'\n",
2084 fromShortBuffer
, testCases
[i
].expectedOutput
, testCases
[i
].input
);
2087 ucol_normalizeShortDefinitionString(testCases
[i
].input
, normalizedBuffer
, 256, &parseError
, &status
);
2088 fromNormalized
= ucol_openFromShortString(normalizedBuffer
, FALSE
, &parseError
, &status
);
2089 ucol_getShortDefinitionString(fromNormalized
, locale
, fromNormalizedBuffer
, 256, &status
);
2091 if(strcmp(fromShortBuffer
, fromNormalizedBuffer
)) {
2092 log_err("Strings obtained from collators instantiated by short string ('%s') and from normalized string ('%s') differ\n",
2093 fromShortBuffer
, fromNormalizedBuffer
);
2097 if(!ucol_equals(coll
, fromNormalized
)) {
2098 log_err("Collator from short string ('%s') differs from one obtained through a normalized version ('%s')\n",
2099 testCases
[i
].input
, normalizedBuffer
);
2102 ucol_close(fromNormalized
);
2106 if(parseError
.offset
!= testCases
[i
].expectedOffset
) {
2107 log_err("Got parse error offset %i, but expected %i instead for '%s'\n",
2108 parseError
.offset
, testCases
[i
].expectedOffset
, testCases
[i
].input
);
2116 doSetsTest(const char *locale
, const USet
*ref
, USet
*set
, const char* inSet
, const char* outSet
, UErrorCode
*status
) {
2117 UChar buffer
[65536];
2121 bufLen
= u_unescape(inSet
, buffer
, 512);
2122 uset_applyPattern(set
, buffer
, bufLen
, 0, status
);
2123 if(U_FAILURE(*status
)) {
2124 log_err("%s: Failure setting pattern %s\n", locale
, u_errorName(*status
));
2127 if(!uset_containsAll(ref
, set
)) {
2128 log_err("%s: Some stuff from %s is not present in the set\n", locale
, inSet
);
2129 uset_removeAll(set
, ref
);
2130 bufLen
= uset_toPattern(set
, buffer
, UPRV_LENGTHOF(buffer
), TRUE
, status
);
2131 log_info(" missing: %s\n", aescstrdup(buffer
, bufLen
));
2132 bufLen
= uset_toPattern(ref
, buffer
, UPRV_LENGTHOF(buffer
), TRUE
, status
);
2133 log_info(" total: size=%i %s\n", uset_getItemCount(ref
), aescstrdup(buffer
, bufLen
));
2137 bufLen
= u_unescape(outSet
, buffer
, 512);
2138 uset_applyPattern(set
, buffer
, bufLen
, 0, status
);
2139 if(U_FAILURE(*status
)) {
2140 log_err("%s: Failure setting pattern %s\n", locale
, u_errorName(*status
));
2143 if(!uset_containsNone(ref
, set
)) {
2144 log_err("%s: Some stuff from %s is present in the set\n", locale
, outSet
);
2152 TestGetContractionsAndUnsafes(void)
2156 const char* inConts
;
2157 const char* outConts
;
2160 const char* unsafeCodeUnits
;
2161 const char* safeCodeUnits
;
2164 "[{\\u0418\\u0306}{\\u0438\\u0306}]",
2169 "[aAbB\\u0430\\u0410\\u0433\\u0413]"
2172 "[{\\u0406\\u0308}{\\u0456\\u0308}{\\u0418\\u0306}{\\u0438\\u0306}]",
2173 "[\\u0407\\u0419\\u0439\\u0457]",
2176 "[\\u0406\\u0456\\u0418\\u0438]",
2180 "[{C\\u0301}{C\\u030C}{C\\u0341}{DZ\\u030C}{Dz\\u030C}{D\\u017D}{D\\u017E}{lj}{nj}]",
2181 "[{\\u309d\\u3099}{\\u30fd\\u3099}]",
2189 * The "collv2" builder omits mappings if the collator maps their
2190 * character sequences to the same CEs.
2191 * For example, it omits Japanese contractions for NFD forms
2192 * of the voiced iteration mark (U+309E = U+309D + U+3099), such as
2193 * {\\u3053\\u3099\\u309D\\u3099}{\\u3053\\u309D\\u3099}
2194 * {\\u30B3\\u3099\\u30FD\\u3099}{\\u30B3\\u30FD\\u3099}.
2195 * It does add mappings for the precomposed forms.
2197 "[{\\u3053\\u3099\\u309D}{\\u3053\\u3099\\u309E}{\\u3053\\u3099\\u30FC}"
2198 "{\\u3053\\u309D}{\\u3053\\u309E}{\\u3053\\u30FC}"
2199 "{\\u30B3\\u3099\\u30FC}{\\u30B3\\u3099\\u30FD}{\\u30B3\\u3099\\u30FE}"
2200 "{\\u30B3\\u30FC}{\\u30B3\\u30FD}{\\u30B3\\u30FE}]",
2201 "[{\\u30FD\\u3099}{\\u309D\\u3099}{\\u3053\\u3099}{\\u30B3\\u3099}{lj}{nj}]",
2209 UErrorCode status
= U_ZERO_ERROR
;
2210 UCollator
*coll
= NULL
;
2212 int32_t noConts
= 0;
2213 USet
*conts
= uset_open(0,0);
2214 USet
*exp
= uset_open(0, 0);
2215 USet
*set
= uset_open(0,0);
2216 int32_t setBufferLen
= 65536;
2217 UChar buffer
[65536];
2220 for(i
= 0; i
< UPRV_LENGTHOF(tests
); i
++) {
2221 log_verbose("Testing locale: %s\n", tests
[i
].locale
);
2222 coll
= ucol_open(tests
[i
].locale
, &status
);
2223 if (coll
== NULL
|| U_FAILURE(status
)) {
2224 log_err_status(status
, "Unable to open collator for locale %s ==> %s\n", tests
[i
].locale
, u_errorName(status
));
2227 ucol_getContractionsAndExpansions(coll
, conts
, exp
, TRUE
, &status
);
2228 doSetsTest(tests
[i
].locale
, conts
, set
, tests
[i
].inConts
, tests
[i
].outConts
, &status
);
2229 setLen
= uset_toPattern(conts
, buffer
, setBufferLen
, TRUE
, &status
);
2230 if(U_SUCCESS(status
)) {
2231 /*log_verbose("Contractions %i: %s\n", uset_getItemCount(conts), aescstrdup(buffer, setLen));*/
2233 log_err("error %s. %i\n", u_errorName(status
), setLen
);
2234 status
= U_ZERO_ERROR
;
2236 doSetsTest(tests
[i
].locale
, exp
, set
, tests
[i
].inExp
, tests
[i
].outExp
, &status
);
2237 setLen
= uset_toPattern(exp
, buffer
, setBufferLen
, TRUE
, &status
);
2238 if(U_SUCCESS(status
)) {
2239 /*log_verbose("Expansions %i: %s\n", uset_getItemCount(exp), aescstrdup(buffer, setLen));*/
2241 log_err("error %s. %i\n", u_errorName(status
), setLen
);
2242 status
= U_ZERO_ERROR
;
2245 noConts
= ucol_getUnsafeSet(coll
, conts
, &status
);
2246 (void)noConts
; /* Suppress set but not used warning */
2247 doSetsTest(tests
[i
].locale
, conts
, set
, tests
[i
].unsafeCodeUnits
, tests
[i
].safeCodeUnits
, &status
);
2248 setLen
= uset_toPattern(conts
, buffer
, setBufferLen
, TRUE
, &status
);
2249 if(U_SUCCESS(status
)) {
2250 log_verbose("Unsafe %i: %s\n", uset_getItemCount(exp
), aescstrdup(buffer
, setLen
));
2252 log_err("error %s. %i\n", u_errorName(status
), setLen
);
2253 status
= U_ZERO_ERROR
;
2266 TestOpenBinary(void)
2269 * ucol_openBinary() documents:
2270 * "The API also takes a base collator which usually should be UCA."
2272 * "Currently it cannot be NULL."
2274 * However, the check for NULL was commented out in ICU 3.4 (r18149).
2275 * Ticket #4355 requested "Make collation work with minimal data.
2276 * Optionally without UCA, with relevant parts of UCA copied into the tailoring table."
2278 * The ICU team agreed with ticket #10517 "require base collator in ucol_openBinary() etc."
2279 * to require base!=NULL again.
2281 #define OPEN_BINARY_ACCEPTS_NULL_BASE 0
2282 UErrorCode status
= U_ZERO_ERROR
;
2284 char rule[] = "&h < d < c < b";
2285 char *wUCA[] = { "a", "h", "d", "c", "b", "i" };
2286 char *noUCA[] = {"d", "c", "b", "a", "h", "i" };
2288 /* we have to use Cyrillic letters because latin-1 always gets copied */
2289 const char rule
[] = "&\\u0452 < \\u0434 < \\u0433 < \\u0432"; /* &dje < d < g < v */
2290 const char *wUCA
[] = { "\\u0430", "\\u0452", "\\u0434", "\\u0433", "\\u0432", "\\u0435" }; /* a, dje, d, g, v, e */
2291 #if OPEN_BINARY_ACCEPTS_NULL_BASE
2292 const char *noUCA
[] = {"\\u0434", "\\u0433", "\\u0432", "\\u0430", "\\u0435", "\\u0452" }; /* d, g, v, a, e, dje */
2296 int32_t uRulesLen
= u_unescape(rule
, uRules
, 256);
2298 UCollator
*coll
= ucol_openRules(uRules
, uRulesLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2299 UCollator
*UCA
= NULL
;
2300 UCollator
*cloneNOUCA
= NULL
, *cloneWUCA
= NULL
;
2302 uint8_t imageBuffer
[32768];
2303 uint8_t *image
= imageBuffer
;
2304 int32_t imageBufferCapacity
= 32768;
2308 if((coll
==NULL
)||(U_FAILURE(status
))) {
2309 log_data_err("could not load collators or error occured: %s\n",
2310 u_errorName(status
));
2313 UCA
= ucol_open("root", &status
);
2314 if((UCA
==NULL
)||(U_FAILURE(status
))) {
2315 log_data_err("could not load UCA collator or error occured: %s\n",
2316 u_errorName(status
));
2319 imageSize
= ucol_cloneBinary(coll
, image
, imageBufferCapacity
, &status
);
2320 if(U_FAILURE(status
)) {
2321 image
= (uint8_t *)malloc(imageSize
*sizeof(uint8_t));
2322 status
= U_ZERO_ERROR
;
2323 imageSize
= ucol_cloneBinary(coll
, imageBuffer
, imageSize
, &status
);
2327 cloneWUCA
= ucol_openBinary(image
, imageSize
, UCA
, &status
);
2328 cloneNOUCA
= ucol_openBinary(image
, imageSize
, NULL
, &status
);
2329 #if !OPEN_BINARY_ACCEPTS_NULL_BASE
2330 if(status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2331 log_err("ucol_openBinary(base=NULL) unexpectedly did not fail - %s\n", u_errorName(status
));
2335 genericOrderingTest(coll
, wUCA
, UPRV_LENGTHOF(wUCA
));
2337 genericOrderingTest(cloneWUCA
, wUCA
, UPRV_LENGTHOF(wUCA
));
2338 #if OPEN_BINARY_ACCEPTS_NULL_BASE
2339 genericOrderingTest(cloneNOUCA
, noUCA
, UPRV_LENGTHOF(noUCA
));
2342 if(image
!= imageBuffer
) {
2346 ucol_close(cloneNOUCA
);
2347 ucol_close(cloneWUCA
);
2351 static void TestDefault(void) {
2352 /* Tests for code coverage. */
2353 UErrorCode status
= U_ZERO_ERROR
;
2354 UCollator
*coll
= ucol_open("es@collation=pinyin", &status
);
2355 if (coll
== NULL
|| status
== U_FILE_ACCESS_ERROR
) {
2356 log_data_err("Unable to open collator es@collation=pinyin\n");
2359 if (status
!= U_USING_DEFAULT_WARNING
) {
2360 /* What do you mean that you know about using pinyin collation in Spanish!? This should be in the zh locale. */
2361 log_err("es@collation=pinyin should return U_USING_DEFAULT_WARNING, but returned %s\n", u_errorName(status
));
2364 if (ucol_getKeywordValues("funky", &status
) != NULL
) {
2365 log_err("Collators should not know about the funky keyword.\n");
2367 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2368 log_err("funky keyword didn't fail as expected %s\n", u_errorName(status
));
2370 if (ucol_getKeywordValues("collation", &status
) != NULL
) {
2371 log_err("ucol_getKeywordValues should not work when given a bad status.\n");
2375 static void TestDefaultKeyword(void) {
2376 /* Tests for code coverage. */
2377 UErrorCode status
= U_ZERO_ERROR
;
2378 const char *loc
= "zh_TW@collation=default";
2379 UCollator
*coll
= ucol_open(loc
, &status
);
2380 if(U_FAILURE(status
)) {
2381 log_info("Warning: ucol_open(%s, ...) returned %s, at least it didn't crash.\n", loc
, u_errorName(status
));
2382 } else if (status
!= U_USING_FALLBACK_WARNING
) {
2383 /* Hmm, skip the following test for CLDR 1.9 data and/or ICU 4.6, no longer seems to apply */
2385 log_err("ucol_open(%s, ...) should return an error or some sort of U_USING_FALLBACK_WARNING, but returned %s\n", loc
, u_errorName(status
));
2391 static UBool
uenum_contains(UEnumeration
*e
, const char *s
, UErrorCode
*status
) {
2393 uenum_reset(e
, status
);
2394 while(((t
= uenum_next(e
, NULL
, status
)) != NULL
) && U_SUCCESS(*status
)) {
2395 if(uprv_strcmp(s
, t
) == 0) {
2402 static void TestGetKeywordValuesForLocale(void) {
2403 #define MAX_NUMBER_OF_KEYWORDS 9
2404 const char *PREFERRED
[][MAX_NUMBER_OF_KEYWORDS
+1] = {
2405 { "und", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
},
2406 { "en_US", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
},
2407 { "en_029", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
},
2408 { "de_DE", "standard", "phonebook", "search", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2409 { "de_Latn_DE", "standard", "phonebook", "search", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2410 { "zh", "pinyin", "stroke", "eor", "search", "standard", NULL
},
2411 { "zh_Hans", "pinyin", "stroke", "eor", "search", "standard", NULL
},
2412 { "zh_CN", "pinyin", "stroke", "eor", "search", "standard", NULL
},
2413 { "zh_Hant", "stroke", "pinyin", "eor", "search", "standard", NULL
},
2414 { "zh_TW", "stroke", "pinyin", "eor", "search", "standard", NULL
},
2415 { "zh__PINYIN", "pinyin", "stroke", "eor", "search", "standard", NULL
},
2416 { "es_ES", "standard", "search", "traditional", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2417 { "es__TRADITIONAL","traditional", "search", "standard", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2418 { "und@collation=phonebook", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
},
2419 { "de_DE@collation=pinyin", "standard", "phonebook", "search", "eor", NULL
, NULL
, NULL
, NULL
, NULL
},
2420 { "zzz@collation=xxx", "standard", "eor", "search", NULL
, NULL
, NULL
, NULL
, NULL
, NULL
}
2423 UErrorCode status
= U_ZERO_ERROR
;
2424 UEnumeration
*keywordValues
= NULL
;
2426 const char *locale
= NULL
, *value
= NULL
;
2427 UBool errorOccurred
= FALSE
;
2429 for (i
= 0; i
< UPRV_LENGTHOF(PREFERRED
) && !errorOccurred
; i
++) {
2430 locale
= PREFERRED
[i
][0];
2434 keywordValues
= ucol_getKeywordValuesForLocale("collation", locale
, TRUE
, &status
);
2435 if (keywordValues
== NULL
|| U_FAILURE(status
)) {
2436 log_err_status(status
, "Error getting keyword values: %s\n", u_errorName(status
));
2439 size
= uenum_count(keywordValues
, &status
);
2442 for (n
= 0; (value
= PREFERRED
[i
][n
+1]) != NULL
; n
++) {
2443 if (!uenum_contains(keywordValues
, value
, &status
)) {
2444 if (U_SUCCESS(status
)) {
2445 log_err("Keyword value \"%s\" missing for locale: %s\n", value
, locale
);
2447 log_err("While getting keyword value from locale: %s got this error: %s\n", locale
, u_errorName(status
));
2448 errorOccurred
= TRUE
;
2453 uenum_close(keywordValues
);
2454 keywordValues
= NULL
;
2456 uenum_close(keywordValues
);
2459 static void TestStrcollNull(void) {
2460 UErrorCode status
= U_ZERO_ERROR
;
2463 const UChar u16asc
[] = {0x0049, 0x0042, 0x004D, 0};
2464 const int32_t u16ascLen
= 3;
2466 const UChar u16han
[] = {0x5c71, 0x5ddd, 0};
2467 const int32_t u16hanLen
= 2;
2469 const char *u8asc
= "\x49\x42\x4D";
2470 const int32_t u8ascLen
= 3;
2472 const char *u8han
= "\xE5\xB1\xB1\xE5\xB7\x9D";
2473 const int32_t u8hanLen
= 6;
2475 coll
= ucol_open(NULL
, &status
);
2476 if (U_FAILURE(status
)) {
2477 log_err_status(status
, "Default Collator creation failed.: %s\n", myErrorName(status
));
2482 if (ucol_strcoll(coll
, NULL
, 0, NULL
, 0) != 0) {
2483 log_err("ERROR : ucol_strcoll NULL/0 and NULL/0");
2486 if (ucol_strcoll(coll
, NULL
, -1, NULL
, 0) != 0) {
2487 /* No error arg, should return equal without crash */
2488 log_err("ERROR : ucol_strcoll NULL/-1 and NULL/0");
2491 if (ucol_strcoll(coll
, u16asc
, -1, NULL
, 10) != 0) {
2492 /* No error arg, should return equal without crash */
2493 log_err("ERROR : ucol_strcoll u16asc/u16ascLen and NULL/10");
2496 if (ucol_strcoll(coll
, u16asc
, -1, NULL
, 0) <= 0) {
2497 log_err("ERROR : ucol_strcoll u16asc/-1 and NULL/0");
2499 if (ucol_strcoll(coll
, NULL
, 0, u16asc
, -1) >= 0) {
2500 log_err("ERROR : ucol_strcoll NULL/0 and u16asc/-1");
2502 if (ucol_strcoll(coll
, u16asc
, u16ascLen
, NULL
, 0) <= 0) {
2503 log_err("ERROR : ucol_strcoll u16asc/u16ascLen and NULL/0");
2506 if (ucol_strcoll(coll
, u16han
, -1, NULL
, 0) <= 0) {
2507 log_err("ERROR : ucol_strcoll u16han/-1 and NULL/0");
2509 if (ucol_strcoll(coll
, NULL
, 0, u16han
, -1) >= 0) {
2510 log_err("ERROR : ucol_strcoll NULL/0 and u16han/-1");
2512 if (ucol_strcoll(coll
, NULL
, 0, u16han
, u16hanLen
) >= 0) {
2513 log_err("ERROR : ucol_strcoll NULL/0 and u16han/u16hanLen");
2517 status
= U_ZERO_ERROR
;
2518 if (ucol_strcollUTF8(coll
, NULL
, 0, NULL
, 0, &status
) != 0 || U_FAILURE(status
)) {
2519 log_err("ERROR : ucol_strcollUTF8 NULL/0 and NULL/0");
2521 status
= U_ZERO_ERROR
;
2522 ucol_strcollUTF8(coll
, NULL
, -1, NULL
, 0, &status
);
2523 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2524 log_err("ERROR: ucol_strcollUTF8 NULL/-1 and NULL/0, should return U_ILLEGAL_ARGUMENT_ERROR");
2526 status
= U_ZERO_ERROR
;
2527 ucol_strcollUTF8(coll
, u8asc
, u8ascLen
, NULL
, 10, &status
);
2528 if (status
!= U_ILLEGAL_ARGUMENT_ERROR
) {
2529 log_err("ERROR: ucol_strcollUTF8 u8asc/u8ascLen and NULL/10, should return U_ILLEGAL_ARGUMENT_ERROR");
2532 status
= U_ZERO_ERROR
;
2533 if (ucol_strcollUTF8(coll
, u8asc
, -1, NULL
, 0, &status
) <= 0 || U_FAILURE(status
)) {
2534 log_err("ERROR : ucol_strcollUTF8 u8asc/-1 and NULL/0");
2536 status
= U_ZERO_ERROR
;
2537 if (ucol_strcollUTF8(coll
, NULL
, 0, u8asc
, -1, &status
) >= 0 || U_FAILURE(status
)) {
2538 log_err("ERROR : ucol_strcollUTF8 NULL/0 and u8asc/-1");
2540 status
= U_ZERO_ERROR
;
2541 if (ucol_strcollUTF8(coll
, u8asc
, u8ascLen
, NULL
, 0, &status
) <= 0 || U_FAILURE(status
)) {
2542 log_err("ERROR : ucol_strcollUTF8 u8asc/u8ascLen and NULL/0");
2545 status
= U_ZERO_ERROR
;
2546 if (ucol_strcollUTF8(coll
, u8han
, -1, NULL
, 0, &status
) <= 0 || U_FAILURE(status
)) {
2547 log_err("ERROR : ucol_strcollUTF8 u8han/-1 and NULL/0");
2549 status
= U_ZERO_ERROR
;
2550 if (ucol_strcollUTF8(coll
, NULL
, 0, u8han
, -1, &status
) >= 0 || U_FAILURE(status
)) {
2551 log_err("ERROR : ucol_strcollUTF8 NULL/0 and u8han/-1");
2553 status
= U_ZERO_ERROR
;
2554 if (ucol_strcollUTF8(coll
, NULL
, 0, u8han
, u8hanLen
, &status
) >= 0 || U_FAILURE(status
)) {
2555 log_err("ERROR : ucol_strcollUTF8 NULL/0 and u8han/u8hanLen");
2561 #endif /* #if !UCONFIG_NO_COLLATION */