1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
12 * Modification History:
14 * Madhu Katragadda Ported for C API
15 ********************************************************************************
19 * Important: This file is included into intltest/allcoll.cpp so that the
20 * test data is shared. This makes it easier to maintain the test data,
21 * especially since the Unicode data must be portable and quoted character
22 * literals will not work.
23 * If it is included, then there will be a #define INCLUDE_CALLCOLL_C
24 * that must prevent the actual code in here from being part of the
25 * allcoll.cpp compilation.
29 * CollationDummyTest is a third level test class. This tests creation of
30 * a customized collator object. For example, number 1 to be sorted
31 * equlivalent to word 'one'.
37 #include "unicode/utypes.h"
39 #if !UCONFIG_NO_COLLATION
41 #include "unicode/ucol.h"
42 #include "unicode/uloc.h"
43 #include "unicode/ures.h"
44 #include "unicode/udata.h"
45 #include "unicode/ucoleitr.h"
46 #include "unicode/ustring.h"
47 #include "unicode/uclean.h"
48 #include "unicode/putil.h"
49 #include "unicode/uenum.h"
58 /* set to 1 to test offsets in backAndForth() */
59 #define TEST_OFFSETS 0
61 /* perform test with strength PRIMARY */
62 static void TestPrimary(void);
64 /* perform test with strength SECONDARY */
65 static void TestSecondary(void);
67 /* perform test with strength tertiary */
68 static void TestTertiary(void);
70 /*perform tests with strength Identical */
71 static void TestIdentical(void);
73 /* perform extra tests */
74 static void TestExtra(void);
76 /* Test jitterbug 581 */
77 static void TestJB581(void);
79 /* Test jitterbug 1401 */
80 static void TestJB1401(void);
82 /* Test [variable top] in the rule syntax */
83 static void TestVariableTop(void);
86 static void TestSurrogates(void);
88 static void TestInvalidRules(void);
90 static void TestJitterbug1098(void);
92 static void TestFCDCrash(void);
94 static void TestJ5298(void);
96 static void TestBadKey(void);
98 const UCollationResult results
[] = {
100 UCOL_LESS
, /*UCOL_GREATER,*/
116 /* test primary > 17 */
126 /* test secondary > 26 */
142 void uprv_appendByteToHexString(char *dst
, uint8_t val
) {
143 uint32_t len
= (uint32_t)uprv_strlen(dst
);
144 *(dst
+len
) = T_CString_itosOffset((val
>> 4));
145 *(dst
+len
+1) = T_CString_itosOffset((val
& 0xF));
149 /* this function makes a string with representation of a sortkey */
150 static char* U_EXPORT2
sortKeyToString(const UCollator
*coll
, const uint8_t *sortkey
, char *buffer
, uint32_t *len
) {
151 int32_t strength
= UCOL_PRIMARY
;
152 uint32_t res_size
= 0;
153 UBool doneCase
= FALSE
;
154 UErrorCode errorCode
= U_ZERO_ERROR
;
156 char *current
= buffer
;
157 const uint8_t *currentSk
= sortkey
;
159 uprv_strcpy(current
, "[");
161 while(strength
<= UCOL_QUATERNARY
&& strength
<= ucol_getStrength(coll
)) {
162 if(strength
> UCOL_PRIMARY
) {
163 uprv_strcat(current
, " . ");
165 while(*currentSk
!= 0x01 && *currentSk
!= 0x00) { /* print a level */
166 uprv_appendByteToHexString(current
, *currentSk
++);
167 uprv_strcat(current
, " ");
169 if(ucol_getAttribute(coll
, UCOL_CASE_LEVEL
, &errorCode
) == UCOL_ON
&& strength
== UCOL_SECONDARY
&& doneCase
== FALSE
) {
171 } else if(ucol_getAttribute(coll
, UCOL_CASE_LEVEL
, &errorCode
) == UCOL_OFF
|| doneCase
== TRUE
|| strength
!= UCOL_SECONDARY
) {
175 uprv_appendByteToHexString(current
, *currentSk
++); /* This should print '01' */
177 if(strength
== UCOL_QUATERNARY
&& ucol_getAttribute(coll
, UCOL_ALTERNATE_HANDLING
, &errorCode
) == UCOL_NON_IGNORABLE
) {
182 if(ucol_getStrength(coll
) == UCOL_IDENTICAL
) {
183 uprv_strcat(current
, " . ");
184 while(*currentSk
!= 0) {
185 uprv_appendByteToHexString(current
, *currentSk
++);
186 uprv_strcat(current
, " ");
189 uprv_appendByteToHexString(current
, *currentSk
++);
191 uprv_strcat(current
, "]");
193 if(res_size
> *len
) {
200 void addAllCollTest(TestNode
** root
)
202 addTest(root
, &TestPrimary
, "tscoll/callcoll/TestPrimary");
203 addTest(root
, &TestSecondary
, "tscoll/callcoll/TestSecondary");
204 addTest(root
, &TestTertiary
, "tscoll/callcoll/TestTertiary");
205 addTest(root
, &TestIdentical
, "tscoll/callcoll/TestIdentical");
206 addTest(root
, &TestExtra
, "tscoll/callcoll/TestExtra");
207 addTest(root
, &TestJB581
, "tscoll/callcoll/TestJB581");
208 addTest(root
, &TestVariableTop
, "tscoll/callcoll/TestVariableTop");
209 addTest(root
, &TestSurrogates
, "tscoll/callcoll/TestSurrogates");
210 addTest(root
, &TestInvalidRules
, "tscoll/callcoll/TestInvalidRules");
211 addTest(root
, &TestJB1401
, "tscoll/callcoll/TestJB1401");
212 addTest(root
, &TestJitterbug1098
, "tscoll/callcoll/TestJitterbug1098");
213 addTest(root
, &TestFCDCrash
, "tscoll/callcoll/TestFCDCrash");
214 addTest(root
, &TestJ5298
, "tscoll/callcoll/TestJ5298");
215 addTest(root
, &TestBadKey
, "tscoll/callcoll/TestBadKey");
218 UBool
hasCollationElements(const char *locName
) {
220 UErrorCode status
= U_ZERO_ERROR
;
222 UResourceBundle
*loc
= ures_open(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING
"coll", locName
, &status
);;
224 if(U_SUCCESS(status
)) {
225 status
= U_ZERO_ERROR
;
226 loc
= ures_getByKey(loc
, "collations", loc
, &status
);
228 if(status
== U_ZERO_ERROR
) { /* do the test - there are real elements */
235 static UCollationResult
compareUsingPartials(UCollator
*coll
, const UChar source
[], int32_t sLen
, const UChar target
[], int32_t tLen
, int32_t pieceSize
, UErrorCode
*status
) {
236 int32_t partialSKResult
= 0;
237 UCharIterator sIter
, tIter
;
238 uint32_t sState
[2], tState
[2];
239 int32_t sSize
= pieceSize
, tSize
= pieceSize
;
241 uint8_t sBuf
[16384], tBuf
[16384];
242 if(pieceSize
> 16384) {
243 log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
244 *status
= U_BUFFER_OVERFLOW_ERROR
;
247 *status
= U_ZERO_ERROR
;
248 sState
[0] = 0; sState
[1] = 0;
249 tState
[0] = 0; tState
[1] = 0;
250 while(sSize
== pieceSize
&& tSize
== pieceSize
&& partialSKResult
== 0) {
251 uiter_setString(&sIter
, source
, sLen
);
252 uiter_setString(&tIter
, target
, tLen
);
253 sSize
= ucol_nextSortKeyPart(coll
, &sIter
, sState
, sBuf
, pieceSize
, status
);
254 tSize
= ucol_nextSortKeyPart(coll
, &tIter
, tState
, tBuf
, pieceSize
, status
);
256 if(sState
[0] != 0 || tState
[0] != 0) {
257 /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
259 /*log_verbose("%i ", i++);*/
261 partialSKResult
= memcmp(sBuf
, tBuf
, pieceSize
);
264 if(partialSKResult
< 0) {
266 } else if(partialSKResult
> 0) {
273 static void doTestVariant(UCollator
* myCollation
, const UChar source
[], const UChar target
[], UCollationResult result
)
275 int32_t sortklen1
, sortklen2
, sortklenmax
, sortklenmin
;
276 int temp
=0, gSortklen1
=0,gSortklen2
=0;
277 UCollationResult compareResult
, compareResulta
, keyResult
, compareResultIter
= result
;
278 uint8_t *sortKey1
, *sortKey2
, *sortKey1a
, *sortKey2a
;
279 uint32_t sLen
= u_strlen(source
);
280 uint32_t tLen
= u_strlen(target
);
283 UErrorCode status
= U_ZERO_ERROR
;
284 UColAttributeValue norm
= ucol_getAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, &status
);
286 UCharIterator sIter
, tIter
;
288 compareResult
= ucol_strcoll(myCollation
, source
, sLen
, target
, tLen
);
289 if (compareResult
!= result
) {
290 log_err("ucol_strcoll with explicit length returned wrong result (%i exp. %i): %s, %s\n",
291 compareResult
, result
, aescstrdup(source
,-1), aescstrdup(target
,-1));
293 compareResulta
= ucol_strcoll(myCollation
, source
, -1, target
, -1);
294 if (compareResulta
!= result
) {
295 log_err("ucol_strcoll with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
296 compareResult
, result
, aescstrdup(source
,-1), aescstrdup(target
,-1));
299 uiter_setString(&sIter
, source
, sLen
);
300 uiter_setString(&tIter
, target
, tLen
);
301 compareResultIter
= ucol_strcollIter(myCollation
, &sIter
, &tIter
, &status
);
302 if(compareResultIter
!= result
) {
303 log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source
,-1), aescstrdup(target
,-1));
306 /* convert the strings to UTF-8 and do try comparing with char iterator and ucol_strcollUTF8 */
308 char utf8Source
[256], utf8Target
[256];
309 int32_t utf8SourceLen
= 0, utf8TargetLen
= 0;
311 u_strToUTF8(utf8Source
, 256, &utf8SourceLen
, source
, sLen
, &status
);
312 if(U_FAILURE(status
)) { /* probably buffer is not big enough */
313 log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
315 u_strToUTF8(utf8Target
, 256, &utf8TargetLen
, target
, tLen
, &status
);
316 if(U_SUCCESS(status
)) {
318 /* ucol_strcollUTF8 */
319 compareResulta
= ucol_strcollUTF8(myCollation
, utf8Source
, utf8SourceLen
, utf8Target
, utf8TargetLen
, &status
);
320 if (U_FAILURE(status
)) {
321 log_err("Error in ucol_strcollUTF8 with explicit length\n");
322 status
= U_ZERO_ERROR
;
323 } else if (compareResulta
!= result
) {
324 log_err("ucol_strcollUTF8 with explicit length returned wrong result (%i exp. %i): %s, %s\n",
325 compareResulta
, result
, aescstrdup(source
,-1), aescstrdup(target
,-1));
327 compareResulta
= ucol_strcollUTF8(myCollation
, utf8Source
, -1, utf8Target
, -1, &status
);
328 if (U_FAILURE(status
)) {
329 log_err("Error in ucol_strcollUTF8 with null terminated strings\n");
330 status
= U_ZERO_ERROR
;
331 } else if (compareResulta
!= result
) {
332 log_err("ucol_strcollUTF8 with null terminated strings returned wrong result (%i exp. %i): %s, %s\n",
333 compareResulta
, result
, aescstrdup(source
,-1), aescstrdup(target
,-1));
338 /* char iterator over UTF8 */
339 UCollationResult compareResultUTF8Iter
= result
, compareResultUTF8IterNorm
= result
;
341 uiter_setUTF8(&sIter
, utf8Source
, utf8SourceLen
);
342 uiter_setUTF8(&tIter
, utf8Target
, utf8TargetLen
);
343 compareResultUTF8Iter
= ucol_strcollIter(myCollation
, &sIter
, &tIter
, &status
);
345 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
346 sIter
.move(&sIter
, 0, UITER_START
);
347 tIter
.move(&tIter
, 0, UITER_START
);
348 compareResultUTF8IterNorm
= ucol_strcollIter(myCollation
, &sIter
, &tIter
, &status
);
350 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, norm
, &status
);
351 if(compareResultUTF8Iter
!= compareResultIter
) {
352 log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source
,-1), aescstrdup(target
,-1));
354 if(compareResultUTF8Iter
!= compareResultUTF8IterNorm
) {
355 log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source
,-1), aescstrdup(target
,-1));
359 log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
361 if(U_FAILURE(status
)) {
362 log_verbose("UTF-8 strcoll failed! Ignoring result\n");
367 /* testing the partial sortkeys */
370 int32_t partialSizes
[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
371 int32_t partialSizesSize
= 1;
372 if(getTestOption(QUICK_OPTION
) <= 0) {
373 partialSizesSize
= 7;
375 /*log_verbose("partial sortkey test piecesize=");*/
376 for(i
= 0; i
< partialSizesSize
; i
++) {
377 UCollationResult partialSKResult
= result
, partialNormalizedSKResult
= result
;
378 /*log_verbose("%i ", partialSizes[i]);*/
380 partialSKResult
= compareUsingPartials(myCollation
, source
, sLen
, target
, tLen
, partialSizes
[i
], &status
);
381 if(partialSKResult
!= result
) {
382 log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n",
383 partialSKResult
, result
,
384 aescstrdup(source
,-1), aescstrdup(target
,-1), partialSizes
[i
]);
387 if(getTestOption(QUICK_OPTION
) <= 0 && norm
!= UCOL_ON
) {
388 /*log_verbose("N ");*/
389 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
390 partialNormalizedSKResult
= compareUsingPartials(myCollation
, source
, sLen
, target
, tLen
, partialSizes
[i
], &status
);
391 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, norm
, &status
);
392 if(partialSKResult
!= partialNormalizedSKResult
) {
393 log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
394 aescstrdup(source
,-1), aescstrdup(target
,-1), partialSizes
[i
]);
398 /*log_verbose("\n");*/
401 sortklen1
=ucol_getSortKey(myCollation
, source
, sLen
, NULL
, 0);
402 sortklen2
=ucol_getSortKey(myCollation
, target
, tLen
, NULL
, 0);
404 sortklenmax
= (sortklen1
>sortklen2
?sortklen1
:sortklen2
);
405 sortklenmin
= (sortklen1
<sortklen2
?sortklen1
:sortklen2
);
406 (void)sortklenmin
; /* Suppress set but not used warning. */
408 sortKey1
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
409 sortKey1a
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
410 ucol_getSortKey(myCollation
, source
, sLen
, sortKey1
, sortklen1
+1);
411 ucol_getSortKey(myCollation
, source
, -1, sortKey1a
, sortklen1
+1);
413 sortKey2
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
414 sortKey2a
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
415 ucol_getSortKey(myCollation
, target
, tLen
, sortKey2
, sortklen2
+1);
416 ucol_getSortKey(myCollation
, target
, -1, sortKey2a
, sortklen2
+1);
418 /* Check that sort key generated with null terminated string is identical */
419 /* to that generted with a length specified. */
420 if (uprv_strcmp((const char *)sortKey1
, (const char *)sortKey1a
) != 0 ||
421 uprv_strcmp((const char *)sortKey2
, (const char *)sortKey2a
) != 0 ) {
422 log_err("Sort Keys from null terminated and explicit length strings differ.\n");
425 /*memcmp(sortKey1, sortKey2,sortklenmax);*/
426 temp
= uprv_strcmp((const char *)sortKey1
, (const char *)sortKey2
);
427 gSortklen1
= uprv_strlen((const char *)sortKey1
)+1;
428 gSortklen2
= uprv_strlen((const char *)sortKey2
)+1;
429 if(sortklen1
!= gSortklen1
){
430 log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1
, gSortklen1
);
431 log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation
, sortKey1
, buffer
, &len
));
433 if(sortklen2
!= gSortklen2
){
434 log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2
, gSortklen2
);
435 log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation
, sortKey2
, buffer
, &len
));
442 keyResult
= UCOL_GREATER
;
445 keyResult
= UCOL_EQUAL
;
447 reportCResult( source
, target
, sortKey1
, sortKey2
, compareResult
, keyResult
, compareResultIter
, result
);
455 void doTest(UCollator
* myCollation
, const UChar source
[], const UChar target
[], UCollationResult result
)
458 doTestVariant(myCollation
, source
, target
, result
);
459 if(result
== UCOL_LESS
) {
460 doTestVariant(myCollation
, target
, source
, UCOL_GREATER
);
461 } else if(result
== UCOL_GREATER
) {
462 doTestVariant(myCollation
, target
, source
, UCOL_LESS
);
464 doTestVariant(myCollation
, target
, source
, UCOL_EQUAL
);
467 log_data_err("No collator! Any data around?\n");
473 * Return an integer array containing all of the collation orders
474 * returned by calls to next on the specified iterator
476 OrderAndOffset
* getOrders(UCollationElements
*iter
, int32_t *orderLength
)
480 int32_t maxSize
= 100;
482 int32_t offset
= ucol_getOffset(iter
);
483 OrderAndOffset
*temp
;
484 OrderAndOffset
*orders
=(OrderAndOffset
*)malloc(sizeof(OrderAndOffset
) * maxSize
);
485 status
= U_ZERO_ERROR
;
488 while ((order
=ucol_next(iter
, &status
)) != UCOL_NULLORDER
)
493 temp
= (OrderAndOffset
*)malloc(sizeof(OrderAndOffset
) * maxSize
);
495 memcpy(temp
, orders
, size
* sizeof(OrderAndOffset
));
501 orders
[size
].order
= order
;
502 orders
[size
].offset
= offset
;
504 offset
= ucol_getOffset(iter
);
508 if (maxSize
> size
&& size
> 0)
510 temp
= (OrderAndOffset
*)malloc(sizeof(OrderAndOffset
) * size
);
512 memcpy(temp
, orders
, size
* sizeof(OrderAndOffset
));
525 backAndForth(UCollationElements
*iter
)
527 /* Run through the iterator forwards and stick it into an array */
529 UErrorCode status
= U_ZERO_ERROR
;
530 int32_t orderLength
= 0;
531 OrderAndOffset
*orders
= getOrders(iter
, &orderLength
);
534 /* Now go through it backwards and make sure we get the same values */
538 /* synwee : changed */
539 while ((o
= ucol_previous(iter
, &status
)) != UCOL_NULLORDER
) {
543 ucol_getOffset(iter
);
546 if (o
!= orders
[idx
].order
) {
550 while (idx
> 0 && orders
[-- idx
].order
== 0) {
554 if (o
!= orders
[idx
].order
) {
555 log_err("Mismatched order at index %d: 0x%8.8X vs. 0x%8.8X\n", idx
,
556 orders
[idx
].order
, o
);
563 if (offset
!= orders
[idx
].offset
) {
564 log_err("Mismatched offset at index %d: %d vs. %d\n", idx
,
565 orders
[idx
].offset
, offset
);
572 while (idx
!= 0 && orders
[idx
- 1].order
== 0) {
577 log_err("Didn't get back to beginning - index is %d\n", idx
);
582 if ((o
= ucol_next(iter
, &status
)) != UCOL_NULLORDER
) {
583 log_err("Error at %x\n", o
);
588 if ((o
= ucol_previous(iter
, &status
)) != UCOL_NULLORDER
) {
589 log_err("Error at %x\n", o
);
599 void genericOrderingTestWithResult(UCollator
*coll
, const char * const s
[], uint32_t size
, UCollationResult result
) {
600 UChar t1
[2048] = {0};
601 UChar t2
[2048] = {0};
602 UCollationElements
*iter
;
603 UErrorCode status
= U_ZERO_ERROR
;
605 uint32_t i
= 0, j
= 0;
606 log_verbose("testing sequence:\n");
607 for(i
= 0; i
< size
; i
++) {
608 log_verbose("%s\n", s
[i
]);
611 iter
= ucol_openElements(coll
, t1
, u_strlen(t1
), &status
);
612 if (U_FAILURE(status
)) {
613 log_err("Creation of iterator failed\n");
615 for(i
= 0; i
< size
-1; i
++) {
616 for(j
= i
+1; j
< size
; j
++) {
617 u_unescape(s
[i
], t1
, 2048);
618 u_unescape(s
[j
], t2
, 2048);
619 doTest(coll
, t1
, t2
, result
);
620 /* synwee : added collation element iterator test */
621 ucol_setText(iter
, t1
, u_strlen(t1
), &status
);
623 ucol_setText(iter
, t2
, u_strlen(t2
), &status
);
627 ucol_closeElements(iter
);
630 void genericOrderingTest(UCollator
*coll
, const char * const s
[], uint32_t size
) {
631 genericOrderingTestWithResult(coll
, s
, size
, UCOL_LESS
);
634 void genericLocaleStarter(const char *locale
, const char * const s
[], uint32_t size
) {
635 UErrorCode status
= U_ZERO_ERROR
;
636 UCollator
*coll
= ucol_open(locale
, &status
);
638 log_verbose("Locale starter for %s\n", locale
);
640 if(U_SUCCESS(status
)) {
641 genericOrderingTest(coll
, s
, size
);
642 } else if(status
== U_FILE_ACCESS_ERROR
) {
643 log_data_err("Is your data around?\n");
646 log_err("Unable to open collator for locale %s\n", locale
);
651 void genericLocaleStarterWithResult(const char *locale
, const char * const s
[], uint32_t size
, UCollationResult result
) {
652 UErrorCode status
= U_ZERO_ERROR
;
653 UCollator
*coll
= ucol_open(locale
, &status
);
655 log_verbose("Locale starter for %s\n", locale
);
657 if(U_SUCCESS(status
)) {
658 genericOrderingTestWithResult(coll
, s
, size
, result
);
659 } else if(status
== U_FILE_ACCESS_ERROR
) {
660 log_data_err("Is your data around?\n");
663 log_err("Unable to open collator for locale %s\n", locale
);
668 /* currently not used with options */
669 void genericRulesStarterWithOptionsAndResult(const char *rules
, const char * const s
[], uint32_t size
, const UColAttribute
*attrs
, const UColAttributeValue
*values
, uint32_t attsize
, UCollationResult result
) {
670 UErrorCode status
= U_ZERO_ERROR
;
671 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
672 uint32_t rlen
= u_unescape(rules
, rlz
, RULE_BUFFER_LEN
);
675 UCollator
*coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
677 log_verbose("Rules starter for %s\n", rules
);
679 if(U_SUCCESS(status
)) {
680 log_verbose("Setting attributes\n");
681 for(i
= 0; i
< attsize
; i
++) {
682 ucol_setAttribute(coll
, attrs
[i
], values
[i
], &status
);
685 genericOrderingTestWithResult(coll
, s
, size
, result
);
687 log_err_status(status
, "Unable to open collator with rules %s\n", rules
);
692 void genericLocaleStarterWithOptionsAndResult(const char *locale
, const char * const s
[], uint32_t size
, const UColAttribute
*attrs
, const UColAttributeValue
*values
, uint32_t attsize
, UCollationResult result
) {
693 UErrorCode status
= U_ZERO_ERROR
;
696 UCollator
*coll
= ucol_open(locale
, &status
);
698 log_verbose("Locale starter for %s\n", locale
);
700 if(U_SUCCESS(status
)) {
702 log_verbose("Setting attributes\n");
703 for(i
= 0; i
< attsize
; i
++) {
704 ucol_setAttribute(coll
, attrs
[i
], values
[i
], &status
);
707 genericOrderingTestWithResult(coll
, s
, size
, result
);
709 log_err_status(status
, "Unable to open collator for locale %s\n", locale
);
714 void genericLocaleStarterWithOptions(const char *locale
, const char * const s
[], uint32_t size
, const UColAttribute
*attrs
, const UColAttributeValue
*values
, uint32_t attsize
) {
715 genericLocaleStarterWithOptionsAndResult(locale
, s
, size
, attrs
, values
, attsize
, UCOL_LESS
);
718 void genericRulesStarterWithResult(const char *rules
, const char * const s
[], uint32_t size
, UCollationResult result
) {
719 UErrorCode status
= U_ZERO_ERROR
;
720 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
721 uint32_t rlen
= u_unescape(rules
, rlz
, RULE_BUFFER_LEN
);
723 UCollator
*coll
= NULL
;
724 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
725 log_verbose("Rules starter for %s\n", rules
);
727 if(U_SUCCESS(status
)) {
728 genericOrderingTestWithResult(coll
, s
, size
, result
);
730 } else if(status
== U_FILE_ACCESS_ERROR
) {
731 log_data_err("Is your data around?\n");
733 log_err("Unable to open collator with rules %s\n", rules
);
737 void genericRulesStarter(const char *rules
, const char * const s
[], uint32_t size
) {
738 genericRulesStarterWithResult(rules
, s
, size
, UCOL_LESS
);
741 static void TestTertiary()
744 UCollator
*myCollation
;
745 UErrorCode status
=U_ZERO_ERROR
;
746 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
747 UChar rules
[sizeof(str
)];
749 u_uastrcpy(rules
, str
);
751 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
752 if(U_FAILURE(status
)){
753 log_err_status(status
, "ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
757 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
758 for (i
= 0; i
< 17 ; i
++)
760 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
762 ucol_close(myCollation
);
766 static void TestPrimary( )
769 UCollator
*myCollation
;
770 UErrorCode status
=U_ZERO_ERROR
;
771 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
772 UChar rules
[sizeof(str
)];
774 u_uastrcpy(rules
, str
);
776 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
777 if(U_FAILURE(status
)){
778 log_err_status(status
, "ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
781 ucol_setStrength(myCollation
, UCOL_PRIMARY
);
783 for (i
= 17; i
< 26 ; i
++)
786 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
788 ucol_close(myCollation
);
792 static void TestSecondary()
796 UCollator
*myCollation
;
797 UErrorCode status
=U_ZERO_ERROR
;
798 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
799 UChar rules
[sizeof(str
)];
801 u_uastrcpy(rules
, str
);
803 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
804 if(U_FAILURE(status
)){
805 log_err_status(status
, "ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
808 ucol_setStrength(myCollation
, UCOL_SECONDARY
);
809 for (i
= 26; i
< 34 ; i
++)
811 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
813 ucol_close(myCollation
);
817 static void TestIdentical()
821 UCollator
*myCollation
;
822 UErrorCode status
=U_ZERO_ERROR
;
823 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
824 UChar rules
[sizeof(str
)];
826 u_uastrcpy(rules
, str
);
828 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_IDENTICAL
, NULL
,&status
);
829 if(U_FAILURE(status
)){
830 log_err_status(status
, "ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
833 for(i
= 34; i
<37; i
++)
835 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
837 ucol_close(myCollation
);
841 static void TestExtra()
845 UCollator
*myCollation
;
846 UErrorCode status
= U_ZERO_ERROR
;
847 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
848 UChar rules
[sizeof(str
)];
850 u_uastrcpy(rules
, str
);
852 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
853 if(U_FAILURE(status
)){
854 log_err_status(status
, "ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
857 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
858 for (i
= 0; i
< COUNT_TEST_CASES
-1 ; i
++)
860 for (j
= i
+ 1; j
< COUNT_TEST_CASES
; j
+= 1)
863 doTest(myCollation
, testCases
[i
], testCases
[j
], UCOL_LESS
);
866 ucol_close(myCollation
);
870 static void TestJB581(void)
872 int32_t bufferLen
= 0;
875 UCollationResult result
= UCOL_EQUAL
;
876 uint8_t sourceKeyArray
[100];
877 uint8_t targetKeyArray
[100];
878 int32_t sourceKeyOut
= 0,
880 UCollator
*myCollator
= 0;
881 UErrorCode status
= U_ZERO_ERROR
;
883 /*u_uastrcpy(source, "This is a test.");*/
884 /*u_uastrcpy(target, "THISISATEST.");*/
885 u_uastrcpy(source
, "THISISATEST.");
886 u_uastrcpy(target
, "Thisisatest.");
888 myCollator
= ucol_open("en_US", &status
);
889 if (U_FAILURE(status
)){
890 log_err_status(status
, "ERROR: Failed to create the collator : %s\n", u_errorName(status
));
893 result
= ucol_strcoll(myCollator
, source
, -1, target
, -1);
894 /* result is 1, secondary differences only for ignorable space characters*/
897 log_err("Comparing two strings with only secondary differences in C failed.\n");
899 /* To compare them with just primary differences */
900 ucol_setStrength(myCollator
, UCOL_PRIMARY
);
901 result
= ucol_strcoll(myCollator
, source
, -1, target
, -1);
905 log_err("Comparing two strings with no differences in C failed.\n");
907 /* Now, do the same comparison with keys */
908 sourceKeyOut
= ucol_getSortKey(myCollator
, source
, -1, sourceKeyArray
, 100);
909 (void)sourceKeyOut
; /* Suppress set but not used warning. */
910 targetKeyOut
= ucol_getSortKey(myCollator
, target
, -1, targetKeyArray
, 100);
911 bufferLen
= ((targetKeyOut
> 100) ? 100 : targetKeyOut
);
912 if (memcmp(sourceKeyArray
, targetKeyArray
, bufferLen
) != 0)
914 log_err("Comparing two strings with sort keys in C failed.\n");
916 ucol_close(myCollator
);
919 static void TestJB1401(void)
921 UCollator
*myCollator
= 0;
922 UErrorCode status
= U_ZERO_ERROR
;
923 static UChar NFD_UnsafeStartChars
[] = {
924 0x0f73, /* Tibetan Vowel Sign II */
925 0x0f75, /* Tibetan Vowel Sign UU */
926 0x0f81, /* Tibetan Vowel Sign Reversed II */
932 myCollator
= ucol_open("en_US", &status
);
933 if (U_FAILURE(status
)){
934 log_err_status(status
, "ERROR: Failed to create the collator : %s\n", u_errorName(status
));
937 ucol_setAttribute(myCollator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
938 if (U_FAILURE(status
)){
939 log_err("ERROR: Failed to set normalization mode ON for collator.\n");
949 /* Get the next funny character to be tested, and set up the
950 * three test strings X, Y, Z, consisting of an A-grave + test char,
951 * in original form, NFD, and then NFC form.
953 c
= NFD_UnsafeStartChars
[i
];
956 X
[0]=0xC0; X
[1]=c
; X
[2]=0; /* \u00C0 is A Grave*/
958 unorm_normalize(X
, -1, UNORM_NFD
, 0, Y
, 20, &status
);
959 unorm_normalize(Y
, -1, UNORM_NFC
, 0, Z
, 20, &status
);
960 if (U_FAILURE(status
)){
961 log_err("ERROR: Failed to normalize test of character %x\n", c
);
965 /* Collation test. All three strings should be equal.
966 * doTest does both strcoll and sort keys, with params in both orders.
968 doTest(myCollator
, X
, Y
, UCOL_EQUAL
);
969 doTest(myCollator
, X
, Z
, UCOL_EQUAL
);
970 doTest(myCollator
, Y
, Z
, UCOL_EQUAL
);
972 /* Run collation element iterators over the three strings. Results should be same for each.
975 UCollationElements
*ceiX
, *ceiY
, *ceiZ
;
976 int32_t ceX
, ceY
, ceZ
;
979 ceiX
= ucol_openElements(myCollator
, X
, -1, &status
);
980 ceiY
= ucol_openElements(myCollator
, Y
, -1, &status
);
981 ceiZ
= ucol_openElements(myCollator
, Z
, -1, &status
);
982 if (U_FAILURE(status
)) {
983 log_err("ERROR: uucol_openElements failed.\n");
988 ceX
= ucol_next(ceiX
, &status
);
989 ceY
= ucol_next(ceiY
, &status
);
990 ceZ
= ucol_next(ceiZ
, &status
);
991 if (U_FAILURE(status
)) {
992 log_err("ERROR: ucol_next failed for iteration #%d.\n", j
);
995 if (ceX
!= ceY
|| ceY
!= ceZ
) {
996 log_err("ERROR: ucol_next failed for iteration #%d.\n", j
);
999 if (ceX
== UCOL_NULLORDER
) {
1003 ucol_closeElements(ceiX
);
1004 ucol_closeElements(ceiY
);
1005 ucol_closeElements(ceiZ
);
1008 ucol_close(myCollator
);
1014 * Tests the [variable top] tag in rule syntax. Since the default [alternate]
1015 * tag has the value shifted, any codepoints before [variable top] should give
1016 * a primary ce of 0.
1018 static void TestVariableTop(void)
1022 * Starting with ICU 53, setting the variable top via a pseudo relation string
1023 * is not supported any more.
1024 * It was replaced by the [maxVariable symbol] setting.
1025 * See ICU tickets #9958 and #8032.
1027 static const char str
[] = "&z = [variable top]";
1028 int len
= strlen(str
);
1029 UChar rules
[sizeof(str
)];
1030 UCollator
*myCollation
;
1031 UCollator
*enCollation
;
1032 UErrorCode status
= U_ZERO_ERROR
;
1036 uint8_t expected
[20];
1038 u_uastrcpy(rules
, str
);
1040 enCollation
= ucol_open("en_US", &status
);
1041 if (U_FAILURE(status
)) {
1042 log_err_status(status
, "ERROR: in creation of collator :%s\n",
1043 myErrorName(status
));
1046 myCollation
= ucol_openRules(rules
, len
, UCOL_OFF
,
1047 UCOL_PRIMARY
,NULL
, &status
);
1048 if (U_FAILURE(status
)) {
1049 ucol_close(enCollation
);
1050 log_err("ERROR: in creation of rule based collator :%s\n",
1051 myErrorName(status
));
1055 ucol_setStrength(enCollation
, UCOL_PRIMARY
);
1056 ucol_setAttribute(enCollation
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
,
1058 ucol_setAttribute(myCollation
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
,
1061 if (ucol_getAttribute(myCollation
, UCOL_ALTERNATE_HANDLING
, &status
) !=
1062 UCOL_SHIFTED
|| U_FAILURE(status
)) {
1063 log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
1066 uprv_memset(expected
, 0, 20);
1068 /* space is supposed to be a variable */
1070 len
= ucol_getSortKey(enCollation
, source
, 1, result
,
1073 if (uprv_memcmp(expected
, result
, len
) != 0) {
1074 log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
1080 len
= ucol_getSortKey(myCollation
, source
, 1, result
,
1082 if (uprv_memcmp(expected
, result
, len
) != 0) {
1083 log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
1089 ucol_close(enCollation
);
1090 ucol_close(myCollation
);
1097 * Tests surrogate support.
1098 * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
1099 * Therefore, another (unassigned) code point was used for this test.
1101 static void TestSurrogates(void)
1103 static const char str
[] =
1104 "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
1105 int len
= strlen(str
);
1107 UChar rules
[sizeof(str
)];
1108 UCollator
*myCollation
;
1109 UCollator
*enCollation
;
1110 UErrorCode status
= U_ZERO_ERROR
;
1112 {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
1114 {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
1116 uint8_t enresult
[20], myresult
[20];
1119 /* tests for open rules with surrogate rules */
1120 rlen
= u_unescape(str
, rules
, len
);
1122 enCollation
= ucol_open("en_US", &status
);
1123 if (U_FAILURE(status
)) {
1124 log_err_status(status
, "ERROR: in creation of collator :%s\n",
1125 myErrorName(status
));
1128 myCollation
= ucol_openRules(rules
, rlen
, UCOL_OFF
,
1129 UCOL_TERTIARY
,NULL
, &status
);
1130 if (U_FAILURE(status
)) {
1131 ucol_close(enCollation
);
1132 log_err("ERROR: in creation of rule based collator :%s\n",
1133 myErrorName(status
));
1138 this test is to verify the supplementary sort key order in the english
1141 log_verbose("start of english collation supplementary characters test\n");
1143 doTest(enCollation
, source
[count
], target
[count
], UCOL_LESS
);
1146 doTest(enCollation
, source
[count
], target
[count
], UCOL_GREATER
);
1148 log_verbose("start of tailored collation supplementary characters test\n");
1150 /* tests getting collation elements for surrogates for tailored rules */
1152 doTest(myCollation
, source
[count
], target
[count
], UCOL_LESS
);
1156 /* tests that \uD800\uDC02 still has the same value, not changed */
1157 enlen
= ucol_getSortKey(enCollation
, source
[3], 2, enresult
, 20);
1158 mylen
= ucol_getSortKey(myCollation
, source
[3], 2, myresult
, 20);
1159 if (enlen
!= mylen
||
1160 uprv_memcmp(enresult
, myresult
, enlen
) != 0) {
1161 log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
1164 ucol_close(enCollation
);
1165 ucol_close(myCollation
);
1171 *### TODO: Add more invalid rules to test all different scenarios.
1176 #define MAX_ERROR_STATES 2
1178 static const char* rulesArr
[MAX_ERROR_STATES
] = {
1179 "& C < ch, cH, Ch[this should fail]<d",
1180 "& C < ch, cH, & Ch[variable top]"
1182 static const char* preContextArr
[MAX_ERROR_STATES
] = {
1187 static const char* postContextArr
[MAX_ERROR_STATES
] = {
1193 for(i
= 0;i
<MAX_ERROR_STATES
;i
++){
1194 UChar rules
[1000] = { '\0' };
1195 UChar preContextExp
[1000] = { '\0' };
1196 UChar postContextExp
[1000] = { '\0' };
1197 UParseError parseError
;
1198 UErrorCode status
= U_ZERO_ERROR
;
1200 u_charsToUChars(rulesArr
[i
],rules
,uprv_strlen(rulesArr
[i
])+1);
1201 u_charsToUChars(preContextArr
[i
],preContextExp
,uprv_strlen(preContextArr
[i
])+1);
1202 u_charsToUChars(postContextArr
[i
],postContextExp
,uprv_strlen(postContextArr
[i
])+1);
1203 /* clean up stuff in parseError */
1204 u_memset(parseError
.preContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1205 u_memset(parseError
.postContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1206 /* open the rules and test */
1207 coll
= ucol_openRules(rules
,u_strlen(rules
),UCOL_OFF
,UCOL_DEFAULT_STRENGTH
,&parseError
,&status
);
1208 (void)coll
; /* Suppress set but not used warning. */
1209 if(u_strcmp(parseError
.preContext
,preContextExp
)!=0){
1210 log_err_status(status
, "preContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1211 aescstrdup(parseError
.preContext
, -1));
1213 if(u_strcmp(parseError
.postContext
,postContextExp
)!=0){
1214 log_err_status(status
, "postContext in UParseError for ucol_openRules does not match: \"%s\"\n",
1215 aescstrdup(parseError
.postContext
, -1));
1221 TestJitterbug1098(){
1223 UCollator
* c1
= NULL
;
1224 UErrorCode status
= U_ZERO_ERROR
;
1225 UParseError parseError
;
1226 char preContext
[200]={0};
1227 char postContext
[200]={0};
1229 const char* rules
[] = {
1237 const UCollationResult results1098
[] = {
1243 const UChar input
[][2]= {
1251 u_memset(parseError
.preContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1252 u_memset(parseError
.postContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1253 for(;rules
[i
]!=0;i
++){
1254 u_uastrcpy(rule
, rules
[i
]);
1255 c1
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, &parseError
, &status
);
1256 if(U_FAILURE(status
)){
1257 log_err_status(status
, "Could not parse the rules syntax. Error: %s\n", u_errorName(status
));
1259 if (status
== U_PARSE_ERROR
) {
1260 u_UCharsToChars(parseError
.preContext
,preContext
,20);
1261 u_UCharsToChars(parseError
.postContext
,postContext
,20);
1262 log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext
,postContext
);
1269 doTest(c1
,X
,Y
,results1098
[i
]);
1275 TestFCDCrash(void) {
1276 static const char *test
[] = {
1277 "Gr\\u00F6\\u00DFe",
1281 UErrorCode status
= U_ZERO_ERROR
;
1282 UCollator
*coll
= ucol_open("es", &status
);
1283 if(U_FAILURE(status
)) {
1284 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
1290 coll
= ucol_open("de_DE", &status
);
1291 if(U_FAILURE(status
)) {
1292 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
1295 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
1296 genericOrderingTest(coll
, test
, 2);
1301 find(UEnumeration* list, const char* str, UErrorCode* status){
1302 const char* value = NULL;
1304 if(U_FAILURE(*status)){
1307 uenum_reset(list, status);
1308 while( (value= uenum_next(list, &length, status))!=NULL){
1309 if(strcmp(value, str)==0){
1316 static void TestJ5298(void)
1318 UErrorCode status
= U_ZERO_ERROR
;
1319 char input
[256], output
[256];
1322 UEnumeration
* values
= NULL
;
1323 const char *keywordValue
= NULL
;
1324 log_verbose("Number of collator locales returned : %i \n", ucol_countAvailable());
1325 values
= ucol_getKeywordValues("collation", &status
);
1326 while ((keywordValue
= uenum_next(values
, NULL
, &status
)) != NULL
) {
1327 if (strncmp(keywordValue
, "private-", 8) == 0) {
1328 log_err("ucol_getKeywordValues() returns private collation keyword: %s\n", keywordValue
);
1331 for (i
= 0; i
< ucol_countAvailable(); i
++) {
1332 uenum_reset(values
, &status
);
1333 while ((keywordValue
= uenum_next(values
, NULL
, &status
)) != NULL
) {
1334 strcpy(input
, ucol_getAvailable(i
));
1335 if (strcmp(keywordValue
, "standard") != 0) {
1336 strcat(input
, "@collation=");
1337 strcat(input
, keywordValue
);
1340 ucol_getFunctionalEquivalent(output
, 256, "collation", input
, &isAvailable
, &status
);
1341 if (strcmp(input
, output
) == 0) { /* Unique locale, print it out */
1342 log_verbose("%s, \n", output
);
1346 uenum_close(values
);
1350 static const char* badKeyLocales
[] = {
1351 "@calendar=japanese;collation=search", // OK
1352 "@calendar=japanese", // OK
1353 "en@calendar=x", // OK
1354 "ja@calendar=x", // OK
1355 "en@collation=x", // OK
1356 "ja@collation=x", // OK
1357 "en@calendar=\x81", // OK
1358 "ja@collation=private-kana", // fail, this string is cause of <rdar://problem/40930320>
1359 "en@collation=\x81", // fail
1360 "ja@calendar=japanese;collation=\x81", // fail
1364 static void TestBadKey(void)
1367 const char** badLocsPtr
= badKeyLocales
;
1368 while ((badLoc
= *badLocsPtr
++) != NULL
) {
1369 UErrorCode status
= U_ZERO_ERROR
;
1370 UCollator
* uc
= ucol_open(badLoc
, &status
);
1371 if ( U_SUCCESS(status
) ) {
1373 log_err("ucol_open sets SUCCESS but returns NULL, locale: %s\n", badLoc
);
1376 } else if (uc
!= NULL
) {
1377 log_err("ucol_open sets FAILURE but returns non-NULL, locale: %s\n", badLoc
);
1381 #endif /* #if !UCONFIG_NO_COLLATION */