1 /********************************************************************
3 * Copyright (c) 1997-2008, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*******************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 ********************************************************************************
17 * Important: This file is included into intltest/allcoll.cpp so that the
18 * test data is shared. This makes it easier to maintain the test data,
19 * especially since the Unicode data must be portable and quoted character
20 * literals will not work.
21 * If it is included, then there will be a #define INCLUDE_CALLCOLL_C
22 * that must prevent the actual code in here from being part of the
23 * allcoll.cpp compilation.
27 * CollationDummyTest is a third level test class. This tests creation of
28 * a customized collator object. For example, number 1 to be sorted
29 * equlivalent to word 'one'.
35 #include "unicode/utypes.h"
37 #if !UCONFIG_NO_COLLATION
39 #include "unicode/ucol.h"
40 #include "unicode/uloc.h"
41 #include "unicode/ures.h"
42 #include "unicode/udata.h"
43 #include "unicode/ucoleitr.h"
44 #include "unicode/ustring.h"
45 #include "unicode/uclean.h"
46 #include "unicode/putil.h"
47 #include "unicode/uenum.h"
57 /* set to 1 to test offsets in backAndForth() */
58 #define TEST_OFFSETS 0
60 /* perform test with strength PRIMARY */
61 static void TestPrimary(void);
63 /* perform test with strength SECONDARY */
64 static void TestSecondary(void);
66 /* perform test with strength tertiary */
67 static void TestTertiary(void);
69 /*perform tests with strength Identical */
70 static void TestIdentical(void);
72 /* perform extra tests */
73 static void TestExtra(void);
75 /* Test jitterbug 581 */
76 static void TestJB581(void);
78 /* Test jitterbug 1401 */
79 static void TestJB1401(void);
81 /* Test [variable top] in the rule syntax */
82 static void TestVariableTop(void);
85 static void TestSurrogates(void);
87 static void TestInvalidRules(void);
89 static void TestJitterbug1098(void);
91 static void TestFCDCrash(void);
93 static void TestJ5298(void);
95 const UCollationResult results
[] = {
97 UCOL_LESS
, /*UCOL_GREATER,*/
113 /* test primary > 17 */
123 /* test secondary > 26 */
139 void uprv_appendByteToHexString(char *dst
, uint8_t val
) {
140 uint32_t len
= (uint32_t)uprv_strlen(dst
);
141 *(dst
+len
) = T_CString_itosOffset((val
>> 4));
142 *(dst
+len
+1) = T_CString_itosOffset((val
& 0xF));
146 /* this function makes a string with representation of a sortkey */
147 static char* U_EXPORT2
sortKeyToString(const UCollator
*coll
, const uint8_t *sortkey
, char *buffer
, uint32_t *len
) {
148 int32_t strength
= UCOL_PRIMARY
;
149 uint32_t res_size
= 0;
150 UBool doneCase
= FALSE
;
152 char *current
= buffer
;
153 const uint8_t *currentSk
= sortkey
;
155 uprv_strcpy(current
, "[");
157 while(strength
<= UCOL_QUATERNARY
&& strength
<= coll
->strength
) {
158 if(strength
> UCOL_PRIMARY
) {
159 uprv_strcat(current
, " . ");
161 while(*currentSk
!= 0x01 && *currentSk
!= 0x00) { /* print a level */
162 uprv_appendByteToHexString(current
, *currentSk
++);
163 uprv_strcat(current
, " ");
165 if(coll
->caseLevel
== UCOL_ON
&& strength
== UCOL_SECONDARY
&& doneCase
== FALSE
) {
167 } else if(coll
->caseLevel
== UCOL_OFF
|| doneCase
== TRUE
|| strength
!= UCOL_SECONDARY
) {
171 uprv_appendByteToHexString(current
, *currentSk
++); /* This should print '01' */
173 if(strength
== UCOL_QUATERNARY
&& coll
->alternateHandling
== UCOL_NON_IGNORABLE
) {
178 if(coll
->strength
== UCOL_IDENTICAL
) {
179 uprv_strcat(current
, " . ");
180 while(*currentSk
!= 0) {
181 uprv_appendByteToHexString(current
, *currentSk
++);
182 uprv_strcat(current
, " ");
185 uprv_appendByteToHexString(current
, *currentSk
++);
187 uprv_strcat(current
, "]");
189 if(res_size
> *len
) {
196 void addAllCollTest(TestNode
** root
)
198 addTest(root
, &TestPrimary
, "tscoll/callcoll/TestPrimary");
199 addTest(root
, &TestSecondary
, "tscoll/callcoll/TestSecondary");
200 addTest(root
, &TestTertiary
, "tscoll/callcoll/TestTertiary");
201 addTest(root
, &TestIdentical
, "tscoll/callcoll/TestIdentical");
202 addTest(root
, &TestExtra
, "tscoll/callcoll/TestExtra");
203 addTest(root
, &TestJB581
, "tscoll/callcoll/TestJB581");
204 addTest(root
, &TestVariableTop
, "tscoll/callcoll/TestVariableTop");
205 addTest(root
, &TestSurrogates
, "tscoll/callcoll/TestSurrogates");
206 addTest(root
, &TestInvalidRules
, "tscoll/callcoll/TestInvalidRules");
207 addTest(root
, &TestJB1401
, "tscoll/callcoll/TestJB1401");
208 addTest(root
, &TestJitterbug1098
, "tscoll/callcoll/TestJitterbug1098");
209 addTest(root
, &TestFCDCrash
, "tscoll/callcoll/TestFCDCrash");
210 addTest(root
, &TestJ5298
, "tscoll/callcoll/TestJ5298");
213 UBool
hasCollationElements(const char *locName
) {
215 UErrorCode status
= U_ZERO_ERROR
;
217 UResourceBundle
*loc
= ures_open(U_ICUDATA_COLL
, locName
, &status
);;
219 if(U_SUCCESS(status
)) {
220 status
= U_ZERO_ERROR
;
221 loc
= ures_getByKey(loc
, "collations", loc
, &status
);
223 if(status
== U_ZERO_ERROR
) { /* do the test - there are real elements */
230 static UCollationResult
compareUsingPartials(UCollator
*coll
, const UChar source
[], int32_t sLen
, const UChar target
[], int32_t tLen
, int32_t pieceSize
, UErrorCode
*status
) {
231 int32_t partialSKResult
= 0;
232 UCharIterator sIter
, tIter
;
233 uint32_t sState
[2], tState
[2];
234 int32_t sSize
= pieceSize
, tSize
= pieceSize
;
236 uint8_t sBuf
[16384], tBuf
[16384];
237 if(pieceSize
> 16384) {
238 log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
239 *status
= U_BUFFER_OVERFLOW_ERROR
;
242 *status
= U_ZERO_ERROR
;
243 sState
[0] = 0; sState
[1] = 0;
244 tState
[0] = 0; tState
[1] = 0;
245 while(sSize
== pieceSize
&& tSize
== pieceSize
&& partialSKResult
== 0) {
246 uiter_setString(&sIter
, source
, sLen
);
247 uiter_setString(&tIter
, target
, tLen
);
248 sSize
= ucol_nextSortKeyPart(coll
, &sIter
, sState
, sBuf
, pieceSize
, status
);
249 tSize
= ucol_nextSortKeyPart(coll
, &tIter
, tState
, tBuf
, pieceSize
, status
);
251 if(sState
[0] != 0 || tState
[0] != 0) {
252 /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
254 /*log_verbose("%i ", i++);*/
256 partialSKResult
= memcmp(sBuf
, tBuf
, pieceSize
);
259 if(partialSKResult
< 0) {
261 } else if(partialSKResult
> 0) {
268 static void doTestVariant(UCollator
* myCollation
, const UChar source
[], const UChar target
[], UCollationResult result
)
270 int32_t sortklen1
, sortklen2
, sortklenmax
, sortklenmin
;
271 int temp
=0, gSortklen1
=0,gSortklen2
=0;
272 UCollationResult compareResult
, compareResulta
, keyResult
, compareResultIter
= result
;
273 uint8_t *sortKey1
, *sortKey2
, *sortKey1a
, *sortKey2a
;
274 uint32_t sLen
= u_strlen(source
);
275 uint32_t tLen
= u_strlen(target
);
278 UErrorCode status
= U_ZERO_ERROR
;
279 UColAttributeValue norm
= ucol_getAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, &status
);
281 UCharIterator sIter
, tIter
;
282 uiter_setString(&sIter
, source
, sLen
);
283 uiter_setString(&tIter
, target
, tLen
);
284 compareResultIter
= ucol_strcollIter(myCollation
, &sIter
, &tIter
, &status
);
285 if(compareResultIter
!= result
) {
286 log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source
,-1), aescstrdup(target
,-1));
289 /* convert the strings to UTF-8 and do try comparing with char iterator */
290 if(QUICK
<= 0) { /*!QUICK*/
291 char utf8Source
[256], utf8Target
[256];
292 int32_t utf8SourceLen
= 0, utf8TargetLen
= 0;
293 u_strToUTF8(utf8Source
, 256, &utf8SourceLen
, source
, sLen
, &status
);
294 if(U_FAILURE(status
)) { /* probably buffer is not big enough */
295 log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
297 u_strToUTF8(utf8Target
, 256, &utf8TargetLen
, target
, tLen
, &status
);
298 if(U_SUCCESS(status
)) { /* probably buffer is not big enough */
299 UCollationResult compareResultUTF8
= result
, compareResultUTF8Norm
= result
;
300 /*UCharIterator sIter, tIter;*/
301 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
302 uiter_setUTF8(&sIter
, utf8Source
, utf8SourceLen
);
303 uiter_setUTF8(&tIter
, utf8Target
, utf8TargetLen
);
304 /*uiter_setString(&sIter, source, sLen);
305 uiter_setString(&tIter, target, tLen);*/
306 compareResultUTF8
= ucol_strcollIter(myCollation
, &sIter
, &tIter
, &status
);
307 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
308 sIter
.move(&sIter
, 0, UITER_START
);
309 tIter
.move(&tIter
, 0, UITER_START
);
310 compareResultUTF8Norm
= ucol_strcollIter(myCollation
, &sIter
, &tIter
, &status
);
311 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, norm
, &status
);
312 if(compareResultUTF8
!= compareResultIter
) {
313 log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source
,-1), aescstrdup(target
,-1));
315 if(compareResultUTF8
!= compareResultUTF8Norm
) {
316 log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source
,-1), aescstrdup(target
,-1));
319 log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
321 if(U_FAILURE(status
)) {
322 log_verbose("UTF-8 strcoll failed! Ignoring result\n");
327 /* testing the partial sortkeys */
330 int32_t partialSizes
[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
331 int32_t partialSizesSize
= 1;
333 partialSizesSize
= 7;
335 /*log_verbose("partial sortkey test piecesize=");*/
336 for(i
= 0; i
< partialSizesSize
; i
++) {
337 UCollationResult partialSKResult
= result
, partialNormalizedSKResult
= result
;
338 /*log_verbose("%i ", partialSizes[i]);*/
340 partialSKResult
= compareUsingPartials(myCollation
, source
, sLen
, target
, tLen
, partialSizes
[i
], &status
);
341 if(partialSKResult
!= result
) {
342 log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n",
343 partialSKResult
, result
,
344 aescstrdup(source
,-1), aescstrdup(target
,-1), partialSizes
[i
]);
347 if(QUICK
<= 0 && norm
!= UCOL_ON
) {
348 /*log_verbose("N ");*/
349 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
350 partialNormalizedSKResult
= compareUsingPartials(myCollation
, source
, sLen
, target
, tLen
, partialSizes
[i
], &status
);
351 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, norm
, &status
);
352 if(partialSKResult
!= partialNormalizedSKResult
) {
353 log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
354 aescstrdup(source
,-1), aescstrdup(target
,-1), partialSizes
[i
]);
358 /*log_verbose("\n");*/
362 compareResult
= ucol_strcoll(myCollation
, source
, sLen
, target
, tLen
);
363 compareResulta
= ucol_strcoll(myCollation
, source
, -1, target
, -1);
364 if (compareResult
!= compareResulta
) {
365 log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
368 sortklen1
=ucol_getSortKey(myCollation
, source
, sLen
, NULL
, 0);
369 sortklen2
=ucol_getSortKey(myCollation
, target
, tLen
, NULL
, 0);
371 sortklenmax
= (sortklen1
>sortklen2
?sortklen1
:sortklen2
);
372 sortklenmin
= (sortklen1
<sortklen2
?sortklen1
:sortklen2
);
374 sortKey1
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
375 sortKey1a
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
376 ucol_getSortKey(myCollation
, source
, sLen
, sortKey1
, sortklen1
+1);
377 ucol_getSortKey(myCollation
, source
, -1, sortKey1a
, sortklen1
+1);
379 sortKey2
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
380 sortKey2a
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
381 ucol_getSortKey(myCollation
, target
, tLen
, sortKey2
, sortklen2
+1);
382 ucol_getSortKey(myCollation
, target
, -1, sortKey2a
, sortklen2
+1);
384 /* Check that sort key generated with null terminated string is identical */
385 /* to that generted with a length specified. */
386 if (uprv_strcmp((const char *)sortKey1
, (const char *)sortKey1a
) != 0 ||
387 uprv_strcmp((const char *)sortKey2
, (const char *)sortKey2a
) != 0 ) {
388 log_err("Sort Keys from null terminated and explicit length strings differ.\n");
391 /*memcmp(sortKey1, sortKey2,sortklenmax);*/
392 temp
= uprv_strcmp((const char *)sortKey1
, (const char *)sortKey2
);
393 gSortklen1
= uprv_strlen((const char *)sortKey1
)+1;
394 gSortklen2
= uprv_strlen((const char *)sortKey2
)+1;
395 if(sortklen1
!= gSortklen1
){
396 log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1
, gSortklen1
);
397 log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation
, sortKey1
, buffer
, &len
));
399 if(sortklen2
!= gSortklen2
){
400 log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2
, gSortklen2
);
401 log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation
, sortKey2
, buffer
, &len
));
408 keyResult
= UCOL_GREATER
;
411 keyResult
= UCOL_EQUAL
;
413 reportCResult( source
, target
, sortKey1
, sortKey2
, compareResult
, keyResult
, compareResultIter
, result
);
421 void doTest(UCollator
* myCollation
, const UChar source
[], const UChar target
[], UCollationResult result
)
424 doTestVariant(myCollation
, source
, target
, result
);
425 if(result
== UCOL_LESS
) {
426 doTestVariant(myCollation
, target
, source
, UCOL_GREATER
);
427 } else if(result
== UCOL_GREATER
) {
428 doTestVariant(myCollation
, target
, source
, UCOL_LESS
);
430 doTestVariant(myCollation
, target
, source
, UCOL_EQUAL
);
433 log_data_err("No collator! Any data around?\n");
439 * Return an integer array containing all of the collation orders
440 * returned by calls to next on the specified iterator
442 OrderAndOffset
* getOrders(UCollationElements
*iter
, int32_t *orderLength
)
446 int32_t maxSize
= 100;
448 int32_t offset
= ucol_getOffset(iter
);
449 OrderAndOffset
*temp
;
450 OrderAndOffset
*orders
=(OrderAndOffset
*)malloc(sizeof(OrderAndOffset
) * maxSize
);
451 status
= U_ZERO_ERROR
;
454 while ((order
=ucol_next(iter
, &status
)) != UCOL_NULLORDER
)
459 temp
= (OrderAndOffset
*)malloc(sizeof(OrderAndOffset
) * maxSize
);
461 memcpy(temp
, orders
, size
* sizeof(OrderAndOffset
));
467 orders
[size
].order
= order
;
468 orders
[size
].offset
= offset
;
470 offset
= ucol_getOffset(iter
);
474 if (maxSize
> size
&& size
> 0)
476 temp
= (OrderAndOffset
*)malloc(sizeof(OrderAndOffset
) * size
);
478 memcpy(temp
, orders
, size
* sizeof(OrderAndOffset
));
491 backAndForth(UCollationElements
*iter
)
493 /* Run through the iterator forwards and stick it into an array */
495 UErrorCode status
= U_ZERO_ERROR
;
496 int32_t orderLength
= 0;
497 OrderAndOffset
*orders
= getOrders(iter
, &orderLength
);
500 /* Now go through it backwards and make sure we get the same values */
504 /* synwee : changed */
505 while ((o
= ucol_previous(iter
, &status
)) != UCOL_NULLORDER
) {
506 int32_t offset
= ucol_getOffset(iter
);
509 if (o
!= orders
[index
].order
) {
513 while (index
> 0 && orders
[-- index
].order
== 0) {
517 if (o
!= orders
[index
].order
) {
518 log_err("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X\n", index
,
519 orders
[index
].order
, o
);
526 if (offset
!= orders
[index
].offset
) {
527 log_err("Mismatched offset at index %d: %d vs. %d\n", index
,
528 orders
[index
].offset
, offset
);
535 while (index
!= 0 && orders
[index
- 1].order
== 0) {
540 log_err("Didn't get back to beginning - index is %d\n", index
);
545 if ((o
= ucol_next(iter
, &status
)) != UCOL_NULLORDER
) {
546 log_err("Error at %x\n", o
);
551 if ((o
= ucol_previous(iter
, &status
)) != UCOL_NULLORDER
) {
552 log_err("Error at %x\n", o
);
562 void genericOrderingTestWithResult(UCollator
*coll
, const char * const s
[], uint32_t size
, UCollationResult result
) {
563 UChar t1
[2048] = {0};
564 UChar t2
[2048] = {0};
565 UCollationElements
*iter
;
566 UErrorCode status
= U_ZERO_ERROR
;
568 uint32_t i
= 0, j
= 0;
569 log_verbose("testing sequence:\n");
570 for(i
= 0; i
< size
; i
++) {
571 log_verbose("%s\n", s
[i
]);
574 iter
= ucol_openElements(coll
, t1
, u_strlen(t1
), &status
);
575 if (U_FAILURE(status
)) {
576 log_err("Creation of iterator failed\n");
578 for(i
= 0; i
< size
-1; i
++) {
579 for(j
= i
+1; j
< size
; j
++) {
580 u_unescape(s
[i
], t1
, 2048);
581 u_unescape(s
[j
], t2
, 2048);
582 doTest(coll
, t1
, t2
, result
);
583 /* synwee : added collation element iterator test */
584 ucol_setText(iter
, t1
, u_strlen(t1
), &status
);
586 ucol_setText(iter
, t2
, u_strlen(t2
), &status
);
590 ucol_closeElements(iter
);
593 void genericOrderingTest(UCollator
*coll
, const char * const s
[], uint32_t size
) {
594 genericOrderingTestWithResult(coll
, s
, size
, UCOL_LESS
);
597 void genericLocaleStarter(const char *locale
, const char * const s
[], uint32_t size
) {
598 UErrorCode status
= U_ZERO_ERROR
;
599 UCollator
*coll
= ucol_open(locale
, &status
);
601 log_verbose("Locale starter for %s\n", locale
);
603 if(U_SUCCESS(status
)) {
604 genericOrderingTest(coll
, s
, size
);
605 } else if(status
== U_FILE_ACCESS_ERROR
) {
606 log_data_err("Is your data around?\n");
609 log_err("Unable to open collator for locale %s\n", locale
);
614 void genericLocaleStarterWithResult(const char *locale
, const char * const s
[], uint32_t size
, UCollationResult result
) {
615 UErrorCode status
= U_ZERO_ERROR
;
616 UCollator
*coll
= ucol_open(locale
, &status
);
618 log_verbose("Locale starter for %s\n", locale
);
620 if(U_SUCCESS(status
)) {
621 genericOrderingTestWithResult(coll
, s
, size
, result
);
622 } else if(status
== U_FILE_ACCESS_ERROR
) {
623 log_data_err("Is your data around?\n");
626 log_err("Unable to open collator for locale %s\n", locale
);
631 /* currently not used with options */
632 void genericRulesStarterWithOptionsAndResult(const char *rules
, const char * const s
[], uint32_t size
, const UColAttribute
*attrs
, const UColAttributeValue
*values
, uint32_t attsize
, UCollationResult result
) {
633 UErrorCode status
= U_ZERO_ERROR
;
634 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
635 uint32_t rlen
= u_unescape(rules
, rlz
, RULE_BUFFER_LEN
);
638 UCollator
*coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
640 log_verbose("Rules starter for %s\n", rules
);
642 if(U_SUCCESS(status
)) {
643 log_verbose("Setting attributes\n");
644 for(i
= 0; i
< attsize
; i
++) {
645 ucol_setAttribute(coll
, attrs
[i
], values
[i
], &status
);
648 genericOrderingTestWithResult(coll
, s
, size
, result
);
650 log_err("Unable to open collator with rules %s\n", rules
);
655 void genericLocaleStarterWithOptionsAndResult(const char *locale
, const char * const s
[], uint32_t size
, const UColAttribute
*attrs
, const UColAttributeValue
*values
, uint32_t attsize
, UCollationResult result
) {
656 UErrorCode status
= U_ZERO_ERROR
;
659 UCollator
*coll
= ucol_open(locale
, &status
);
661 log_verbose("Locale starter for %s\n", locale
);
663 if(U_SUCCESS(status
)) {
665 log_verbose("Setting attributes\n");
666 for(i
= 0; i
< attsize
; i
++) {
667 ucol_setAttribute(coll
, attrs
[i
], values
[i
], &status
);
670 genericOrderingTestWithResult(coll
, s
, size
, result
);
672 log_err("Unable to open collator for locale %s\n", locale
);
677 void genericLocaleStarterWithOptions(const char *locale
, const char * const s
[], uint32_t size
, const UColAttribute
*attrs
, const UColAttributeValue
*values
, uint32_t attsize
) {
678 genericLocaleStarterWithOptionsAndResult(locale
, s
, size
, attrs
, values
, attsize
, UCOL_LESS
);
681 void genericRulesStarterWithResult(const char *rules
, const char * const s
[], uint32_t size
, UCollationResult result
) {
682 UErrorCode status
= U_ZERO_ERROR
;
683 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
684 uint32_t rlen
= u_unescape(rules
, rlz
, RULE_BUFFER_LEN
);
686 UCollator
*coll
= NULL
;
687 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
688 log_verbose("Rules starter for %s\n", rules
);
690 if(U_SUCCESS(status
)) {
691 genericOrderingTestWithResult(coll
, s
, size
, result
);
693 } else if(status
== U_FILE_ACCESS_ERROR
) {
694 log_data_err("Is your data around?\n");
696 log_err("Unable to open collator with rules %s\n", rules
);
700 void genericRulesStarter(const char *rules
, const char * const s
[], uint32_t size
) {
701 genericRulesStarterWithResult(rules
, s
, size
, UCOL_LESS
);
704 static void TestTertiary()
707 UCollator
*myCollation
;
708 UErrorCode status
=U_ZERO_ERROR
;
709 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
710 UChar rules
[sizeof(str
)];
712 u_uastrcpy(rules
, str
);
714 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
715 if(U_FAILURE(status
)){
716 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
720 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
721 for (i
= 0; i
< 17 ; i
++)
723 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
725 ucol_close(myCollation
);
729 static void TestPrimary( )
732 UCollator
*myCollation
;
733 UErrorCode status
=U_ZERO_ERROR
;
734 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
735 UChar rules
[sizeof(str
)];
737 u_uastrcpy(rules
, str
);
739 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
740 if(U_FAILURE(status
)){
741 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
744 ucol_setStrength(myCollation
, UCOL_PRIMARY
);
746 for (i
= 17; i
< 26 ; i
++)
749 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
751 ucol_close(myCollation
);
755 static void TestSecondary()
759 UCollator
*myCollation
;
760 UErrorCode status
=U_ZERO_ERROR
;
761 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
762 UChar rules
[sizeof(str
)];
764 u_uastrcpy(rules
, str
);
766 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
767 if(U_FAILURE(status
)){
768 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
771 ucol_setStrength(myCollation
, UCOL_SECONDARY
);
772 for (i
= 26; i
< 34 ; i
++)
774 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
776 ucol_close(myCollation
);
780 static void TestIdentical()
784 UCollator
*myCollation
;
785 UErrorCode status
=U_ZERO_ERROR
;
786 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
787 UChar rules
[sizeof(str
)];
789 u_uastrcpy(rules
, str
);
791 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_IDENTICAL
, NULL
,&status
);
792 if(U_FAILURE(status
)){
793 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
796 for(i
= 34; i
<37; i
++)
798 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
800 ucol_close(myCollation
);
804 static void TestExtra()
808 UCollator
*myCollation
;
809 UErrorCode status
= U_ZERO_ERROR
;
810 static const char str
[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
811 UChar rules
[sizeof(str
)];
813 u_uastrcpy(rules
, str
);
815 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
816 if(U_FAILURE(status
)){
817 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
820 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
821 for (i
= 0; i
< COUNT_TEST_CASES
-1 ; i
++)
823 for (j
= i
+ 1; j
< COUNT_TEST_CASES
; j
+= 1)
826 doTest(myCollation
, testCases
[i
], testCases
[j
], UCOL_LESS
);
829 ucol_close(myCollation
);
833 static void TestJB581(void)
835 int32_t bufferLen
= 0;
838 UCollationResult result
= UCOL_EQUAL
;
839 uint8_t sourceKeyArray
[100];
840 uint8_t targetKeyArray
[100];
841 int32_t sourceKeyOut
= 0,
843 UCollator
*myCollator
= 0;
844 UErrorCode status
= U_ZERO_ERROR
;
846 /*u_uastrcpy(source, "This is a test.");*/
847 /*u_uastrcpy(target, "THISISATEST.");*/
848 u_uastrcpy(source
, "THISISATEST.");
849 u_uastrcpy(target
, "Thisisatest.");
851 myCollator
= ucol_open("en_US", &status
);
852 if (U_FAILURE(status
)){
853 log_err("ERROR: Failed to create the collator : %s\n", u_errorName(status
));
856 result
= ucol_strcoll(myCollator
, source
, -1, target
, -1);
857 /* result is 1, secondary differences only for ignorable space characters*/
860 log_err("Comparing two strings with only secondary differences in C failed.\n");
862 /* To compare them with just primary differences */
863 ucol_setStrength(myCollator
, UCOL_PRIMARY
);
864 result
= ucol_strcoll(myCollator
, source
, -1, target
, -1);
868 log_err("Comparing two strings with no differences in C failed.\n");
870 /* Now, do the same comparison with keys */
871 sourceKeyOut
= ucol_getSortKey(myCollator
, source
, -1, sourceKeyArray
, 100);
872 targetKeyOut
= ucol_getSortKey(myCollator
, target
, -1, targetKeyArray
, 100);
873 bufferLen
= ((targetKeyOut
> 100) ? 100 : targetKeyOut
);
874 if (memcmp(sourceKeyArray
, targetKeyArray
, bufferLen
) != 0)
876 log_err("Comparing two strings with sort keys in C failed.\n");
878 ucol_close(myCollator
);
881 static void TestJB1401(void)
883 UCollator
*myCollator
= 0;
884 UErrorCode status
= U_ZERO_ERROR
;
885 static UChar NFD_UnsafeStartChars
[] = {
886 0x0f73, /* Tibetan Vowel Sign II */
887 0x0f75, /* Tibetan Vowel Sign UU */
888 0x0f81, /* Tibetan Vowel Sign Reversed II */
894 myCollator
= ucol_open("en_US", &status
);
895 if (U_FAILURE(status
)){
896 log_err("ERROR: Failed to create the collator : %s\n", u_errorName(status
));
899 ucol_setAttribute(myCollator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
900 if (U_FAILURE(status
)){
901 log_err("ERROR: Failed to set normalization mode ON for collator.\n");
911 /* Get the next funny character to be tested, and set up the
912 * three test strings X, Y, Z, consisting of an A-grave + test char,
913 * in original form, NFD, and then NFC form.
915 c
= NFD_UnsafeStartChars
[i
];
918 X
[0]=0xC0; X
[1]=c
; X
[2]=0; /* \u00C0 is A Grave*/
920 unorm_normalize(X
, -1, UNORM_NFD
, 0, Y
, 20, &status
);
921 unorm_normalize(Y
, -1, UNORM_NFC
, 0, Z
, 20, &status
);
922 if (U_FAILURE(status
)){
923 log_err("ERROR: Failed to normalize test of character %x\n", c
);
927 /* Collation test. All three strings should be equal.
928 * doTest does both strcoll and sort keys, with params in both orders.
930 doTest(myCollator
, X
, Y
, UCOL_EQUAL
);
931 doTest(myCollator
, X
, Z
, UCOL_EQUAL
);
932 doTest(myCollator
, Y
, Z
, UCOL_EQUAL
);
934 /* Run collation element iterators over the three strings. Results should be same for each.
937 UCollationElements
*ceiX
, *ceiY
, *ceiZ
;
938 int32_t ceX
, ceY
, ceZ
;
941 ceiX
= ucol_openElements(myCollator
, X
, -1, &status
);
942 ceiY
= ucol_openElements(myCollator
, Y
, -1, &status
);
943 ceiZ
= ucol_openElements(myCollator
, Z
, -1, &status
);
944 if (U_FAILURE(status
)) {
945 log_err("ERROR: uucol_openElements failed.\n");
950 ceX
= ucol_next(ceiX
, &status
);
951 ceY
= ucol_next(ceiY
, &status
);
952 ceZ
= ucol_next(ceiZ
, &status
);
953 if (U_FAILURE(status
)) {
954 log_err("ERROR: ucol_next failed for iteration #%d.\n", j
);
957 if (ceX
!= ceY
|| ceY
!= ceZ
) {
958 log_err("ERROR: ucol_next failed for iteration #%d.\n", j
);
961 if (ceX
== UCOL_NULLORDER
) {
965 ucol_closeElements(ceiX
);
966 ucol_closeElements(ceiY
);
967 ucol_closeElements(ceiZ
);
970 ucol_close(myCollator
);
976 * Tests the [variable top] tag in rule syntax. Since the default [alternate]
977 * tag has the value shifted, any codepoints before [variable top] should give
980 static void TestVariableTop(void)
982 static const char str
[] = "&z = [variable top]";
983 int len
= strlen(str
);
984 UChar rules
[sizeof(str
)];
985 UCollator
*myCollation
;
986 UCollator
*enCollation
;
987 UErrorCode status
= U_ZERO_ERROR
;
991 uint8_t expected
[20];
993 u_uastrcpy(rules
, str
);
995 enCollation
= ucol_open("en_US", &status
);
996 if (U_FAILURE(status
)) {
997 log_err("ERROR: in creation of collator :%s\n",
998 myErrorName(status
));
1001 myCollation
= ucol_openRules(rules
, len
, UCOL_OFF
,
1002 UCOL_PRIMARY
,NULL
, &status
);
1003 if (U_FAILURE(status
)) {
1004 ucol_close(enCollation
);
1005 log_err("ERROR: in creation of rule based collator :%s\n",
1006 myErrorName(status
));
1010 ucol_setStrength(enCollation
, UCOL_PRIMARY
);
1011 ucol_setAttribute(enCollation
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
,
1013 ucol_setAttribute(myCollation
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
,
1016 if (ucol_getAttribute(myCollation
, UCOL_ALTERNATE_HANDLING
, &status
) !=
1017 UCOL_SHIFTED
|| U_FAILURE(status
)) {
1018 log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
1021 uprv_memset(expected
, 0, 20);
1023 /* space is supposed to be a variable */
1025 len
= ucol_getSortKey(enCollation
, source
, 1, result
,
1028 if (uprv_memcmp(expected
, result
, len
) != 0) {
1029 log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
1035 len
= ucol_getSortKey(myCollation
, source
, 1, result
,
1037 if (uprv_memcmp(expected
, result
, len
) != 0) {
1038 log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
1044 ucol_close(enCollation
);
1045 ucol_close(myCollation
);
1051 * Tests surrogate support.
1052 * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
1053 * Therefore, another (unassigned) code point was used for this test.
1055 static void TestSurrogates(void)
1057 static const char str
[] =
1058 "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
1059 int len
= strlen(str
);
1061 UChar rules
[sizeof(str
)];
1062 UCollator
*myCollation
;
1063 UCollator
*enCollation
;
1064 UErrorCode status
= U_ZERO_ERROR
;
1066 {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
1068 {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
1070 uint8_t enresult
[20], myresult
[20];
1073 /* tests for open rules with surrogate rules */
1074 rlen
= u_unescape(str
, rules
, len
);
1076 enCollation
= ucol_open("en_US", &status
);
1077 if (U_FAILURE(status
)) {
1078 log_err("ERROR: in creation of collator :%s\n",
1079 myErrorName(status
));
1082 myCollation
= ucol_openRules(rules
, rlen
, UCOL_OFF
,
1083 UCOL_TERTIARY
,NULL
, &status
);
1084 if (U_FAILURE(status
)) {
1085 ucol_close(enCollation
);
1086 log_err("ERROR: in creation of rule based collator :%s\n",
1087 myErrorName(status
));
1092 this test is to verify the supplementary sort key order in the english
1095 log_verbose("start of english collation supplementary characters test\n");
1097 doTest(enCollation
, source
[count
], target
[count
], UCOL_LESS
);
1100 doTest(enCollation
, source
[count
], target
[count
], UCOL_GREATER
);
1102 log_verbose("start of tailored collation supplementary characters test\n");
1104 /* tests getting collation elements for surrogates for tailored rules */
1106 doTest(myCollation
, source
[count
], target
[count
], UCOL_LESS
);
1110 /* tests that \uD800\uDC02 still has the same value, not changed */
1111 enlen
= ucol_getSortKey(enCollation
, source
[3], 2, enresult
, 20);
1112 mylen
= ucol_getSortKey(myCollation
, source
[3], 2, myresult
, 20);
1113 if (enlen
!= mylen
||
1114 uprv_memcmp(enresult
, myresult
, enlen
) != 0) {
1115 log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
1118 ucol_close(enCollation
);
1119 ucol_close(myCollation
);
1125 *### TODO: Add more invalid rules to test all different scenarios.
1130 #define MAX_ERROR_STATES 2
1132 static const char* rulesArr
[MAX_ERROR_STATES
] = {
1133 "& C < ch, cH, Ch[this should fail]<d",
1134 "& C < ch, cH, & Ch[variable top]"
1136 static const char* preContextArr
[MAX_ERROR_STATES
] = {
1141 static const char* postContextArr
[MAX_ERROR_STATES
] = {
1147 for(i
= 0;i
<MAX_ERROR_STATES
;i
++){
1148 UChar rules
[1000] = { '\0' };
1149 UChar preContextExp
[1000] = { '\0' };
1150 UChar postContextExp
[1000] = { '\0' };
1151 UParseError parseError
;
1152 UErrorCode status
= U_ZERO_ERROR
;
1154 u_charsToUChars(rulesArr
[i
],rules
,uprv_strlen(rulesArr
[i
])+1);
1155 u_charsToUChars(preContextArr
[i
],preContextExp
,uprv_strlen(preContextArr
[i
])+1);
1156 u_charsToUChars(postContextArr
[i
],postContextExp
,uprv_strlen(postContextArr
[i
])+1);
1157 /* clean up stuff in parseError */
1158 u_memset(parseError
.preContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1159 u_memset(parseError
.postContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1160 /* open the rules and test */
1161 coll
= ucol_openRules(rules
,u_strlen(rules
),UCOL_OFF
,UCOL_DEFAULT_STRENGTH
,&parseError
,&status
);
1162 if(u_strcmp(parseError
.preContext
,preContextExp
)!=0){
1163 log_err("preContext in UParseError for ucol_openRules does not match\n");
1165 if(u_strcmp(parseError
.postContext
,postContextExp
)!=0){
1166 log_err("postContext in UParseError for ucol_openRules does not match\n");
1172 TestJitterbug1098(){
1174 UCollator
* c1
= NULL
;
1175 UErrorCode status
= U_ZERO_ERROR
;
1176 UParseError parseError
;
1177 char preContext
[200]={0};
1178 char postContext
[200]={0};
1180 const char* rules
[] = {
1188 const UCollationResult results1098
[] = {
1194 const UChar input
[][2]= {
1202 u_memset(parseError
.preContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1203 u_memset(parseError
.postContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1204 for(;rules
[i
]!=0;i
++){
1205 u_uastrcpy(rule
, rules
[i
]);
1206 c1
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, &parseError
, &status
);
1207 if(U_FAILURE(status
)){
1208 log_err("Could not parse the rules syntax. Error: %s\n", u_errorName(status
));
1210 if (status
== U_PARSE_ERROR
) {
1211 u_UCharsToChars(parseError
.preContext
,preContext
,20);
1212 u_UCharsToChars(parseError
.postContext
,postContext
,20);
1213 log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext
,postContext
);
1220 doTest(c1
,X
,Y
,results1098
[i
]);
1226 TestFCDCrash(void) {
1227 static const char *test
[] = {
1228 "Gr\\u00F6\\u00DFe",
1232 UErrorCode status
= U_ZERO_ERROR
;
1233 UCollator
*coll
= ucol_open("es", &status
);
1234 if(U_FAILURE(status
)) {
1235 log_err("Couldn't open collator\n");
1241 coll
= ucol_open("de_DE", &status
);
1242 if(U_FAILURE(status
)) {
1243 log_err("Couldn't open collator\n");
1246 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
1247 genericOrderingTest(coll
, test
, 2);
1252 find(UEnumeration* list, const char* str, UErrorCode* status){
1253 const char* value = NULL;
1255 if(U_FAILURE(*status)){
1258 uenum_reset(list, status);
1259 while( (value= uenum_next(list, &length, status))!=NULL){
1260 if(strcmp(value, str)==0){
1267 static void TestJ5298(void)
1269 UErrorCode status
= U_ZERO_ERROR
;
1270 char input
[256], output
[256];
1273 UEnumeration
* values
= NULL
;
1274 const char *keywordValue
= NULL
;
1275 log_verbose("Number of collator locales returned : %i \n", ucol_countAvailable());
1276 values
= ucol_getKeywordValues("collation", &status
);
1277 for (i
= 0; i
< ucol_countAvailable(); i
++) {
1278 uenum_reset(values
, &status
);
1279 while ((keywordValue
= uenum_next(values
, NULL
, &status
)) != NULL
) {
1280 strcpy(input
, ucol_getAvailable(i
));
1281 if (strcmp(keywordValue
, "standard") != 0) {
1282 strcat(input
, "@collation=");
1283 strcat(input
, keywordValue
);
1286 ucol_getFunctionalEquivalent(output
, 256, "collation", input
, &isAvailable
, &status
);
1287 if (strcmp(input
, output
) == 0) { /* Unique locale, print it out */
1288 log_verbose("%s, \n", output
);
1292 uenum_close(values
);
1295 #endif /* #if !UCONFIG_NO_COLLATION */