1 /********************************************************************
3 * Copyright (c) 1997-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API
13 *********************************************************************************
17 * Important: This file is included into intltest/allcoll.cpp so that the
18 * test data is shared. This makes it easier to maintain the test data,
19 * especially since the Unicode data must be portable and quoted character
20 * literals will not work.
21 * If it is included, then there will be a #define INCLUDE_CALLCOLL_C
22 * that must prevent the actual code in here from being part of the
23 * allcoll.cpp compilation.
27 * CollationDummyTest is a third level test class. This tests creation of
28 * a customized collator object. For example, number 1 to be sorted
29 * equlivalent to word 'one'.
35 #include "unicode/utypes.h"
37 #if !UCONFIG_NO_COLLATION
39 #include "unicode/ucol.h"
40 #include "unicode/uloc.h"
41 #include "unicode/ucoleitr.h"
42 #include "unicode/ustring.h"
43 #include "unicode/uclean.h"
44 #include "unicode/putil.h"
54 /* perform test with strength PRIMARY */
55 static void TestPrimary(void);
57 /* perform test with strength SECONDARY */
58 static void TestSecondary(void);
60 /* perform test with strength tertiary */
61 static void TestTertiary(void);
63 /*perform tests with strength Identical */
64 static void TestIdentical(void);
66 /* perform extra tests */
67 static void TestExtra(void);
69 /* Test jitterbug 581 */
70 static void TestJB581(void);
72 /* Test jitterbug 1401 */
73 static void TestJB1401(void);
75 /* Test [variable top] in the rule syntax */
76 static void TestVariableTop(void);
79 static void TestSurrogates(void);
81 static void TestInvalidRules(void);
83 static void TestJitterbug1098(void);
85 static void TestFCDCrash(void);
87 const UCollationResult results
[] = {
89 UCOL_LESS
, /*UCOL_GREATER,*/
105 /* test primary > 17 */
115 /* test secondary > 26 */
130 void addAllCollTest(TestNode
** root
)
134 addTest(root
, &TestPrimary
, "tscoll/callcoll/TestPrimary");
135 addTest(root
, &TestSecondary
, "tscoll/callcoll/TestSecondary");
136 addTest(root
, &TestTertiary
, "tscoll/callcoll/TestTertiary");
137 addTest(root
, &TestIdentical
, "tscoll/callcoll/TestIdentical");
138 addTest(root
, &TestExtra
, "tscoll/callcoll/TestExtra");
139 addTest(root
, &TestJB581
, "tscoll/callcoll/TestJB581");
140 addTest(root
, &TestVariableTop
, "tscoll/callcoll/TestVariableTop");
141 addTest(root
, &TestSurrogates
, "tscoll/callcoll/TestSurrogates");
142 addTest(root
, &TestInvalidRules
, "tscoll/callcoll/TestInvalidRules");
143 addTest(root
, &TestJB1401
, "tscoll/callcoll/TestJB1401");
144 addTest(root
, &TestJitterbug1098
, "tscoll/callcoll/TestJitterbug1098");
145 addTest(root
, &TestFCDCrash
, "tscoll/callcoll/TestFCDCrash");
149 UBool
hasCollationElements(const char *locName
) {
151 UErrorCode status
= U_ZERO_ERROR
;
152 UResourceBundle
*ColEl
= NULL
;
154 UResourceBundle
*loc
= ures_open(NULL
, locName
, &status
);;
156 if(U_SUCCESS(status
)) {
157 status
= U_ZERO_ERROR
;
158 ColEl
= ures_getByKey(loc
, "collations", ColEl
, &status
);
159 if(status
== U_ZERO_ERROR
) { /* do the test - there are real elements */
170 static UCollationResult
compareUsingPartials(UCollator
*coll
, const UChar source
[], int32_t sLen
, const UChar target
[], int32_t tLen
, int32_t pieceSize
, UErrorCode
*status
) {
171 int32_t partialSKResult
= 0;
172 UCharIterator sIter
, tIter
;
173 uint32_t sState
[2], tState
[2];
174 int32_t sSize
= pieceSize
, tSize
= pieceSize
;
176 uint8_t sBuf
[16384], tBuf
[16384];
177 if(pieceSize
> 16384) {
178 log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
179 *status
= U_BUFFER_OVERFLOW_ERROR
;
182 *status
= U_ZERO_ERROR
;
183 sState
[0] = 0; sState
[1] = 0;
184 tState
[0] = 0; tState
[1] = 0;
185 while(sSize
== pieceSize
&& tSize
== pieceSize
&& partialSKResult
== 0) {
186 uiter_setString(&sIter
, source
, sLen
);
187 uiter_setString(&tIter
, target
, tLen
);
188 sSize
= ucol_nextSortKeyPart(coll
, &sIter
, sState
, sBuf
, pieceSize
, status
);
189 tSize
= ucol_nextSortKeyPart(coll
, &tIter
, tState
, tBuf
, pieceSize
, status
);
191 if(sState
[0] != 0 || tState
[0] != 0) {
192 /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
194 /*log_verbose("%i ", i++);*/
196 partialSKResult
= memcmp(sBuf
, tBuf
, pieceSize
);
199 if(partialSKResult
< 0) {
201 } else if(partialSKResult
> 0) {
208 static void doTestVariant(UCollator
* myCollation
, const UChar source
[], const UChar target
[], UCollationResult result
)
210 int32_t sortklen1
, sortklen2
, sortklenmax
, sortklenmin
;
211 int temp
=0, gSortklen1
=0,gSortklen2
=0;
212 UCollationResult compareResult
, compareResulta
, keyResult
, compareResultIter
= result
;
213 uint8_t *sortKey1
, *sortKey2
, *sortKey1a
, *sortKey2a
;
214 uint32_t sLen
= u_strlen(source
);
215 uint32_t tLen
= u_strlen(target
);
218 UErrorCode status
= U_ZERO_ERROR
;
219 UColAttributeValue norm
= ucol_getAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, &status
);
221 UCharIterator sIter
, tIter
;
222 uiter_setString(&sIter
, source
, sLen
);
223 uiter_setString(&tIter
, target
, tLen
);
224 compareResultIter
= ucol_strcollIter(myCollation
, &sIter
, &tIter
, &status
);
225 if(compareResultIter
!= result
) {
226 log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source
,-1), aescstrdup(target
,-1));
229 /* convert the strings to UTF-8 and do try comparing with char iterator */
230 if(QUICK
<= 0) { /*!QUICK*/
231 char utf8Source
[256], utf8Target
[256];
232 int32_t utf8SourceLen
= 0, utf8TargetLen
= 0;
233 u_strToUTF8(utf8Source
, 256, &utf8SourceLen
, source
, sLen
, &status
);
234 if(U_FAILURE(status
)) { /* probably buffer is not big enough */
235 log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
237 u_strToUTF8(utf8Target
, 256, &utf8TargetLen
, target
, tLen
, &status
);
238 if(U_SUCCESS(status
)) { /* probably buffer is not big enough */
239 UCollationResult compareResultUTF8
= result
, compareResultUTF8Norm
= result
;
240 /*UCharIterator sIter, tIter;*/
241 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
242 uiter_setUTF8(&sIter
, utf8Source
, utf8SourceLen
);
243 uiter_setUTF8(&tIter
, utf8Target
, utf8TargetLen
);
244 /*uiter_setString(&sIter, source, sLen);
245 uiter_setString(&tIter, target, tLen);*/
246 compareResultUTF8
= ucol_strcollIter(myCollation
, &sIter
, &tIter
, &status
);
247 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
248 sIter
.move(&sIter
, 0, UITER_START
);
249 tIter
.move(&tIter
, 0, UITER_START
);
250 compareResultUTF8Norm
= ucol_strcollIter(myCollation
, &sIter
, &tIter
, &status
);
251 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, norm
, &status
);
252 if(compareResultUTF8
!= compareResultIter
) {
253 log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source
,-1), aescstrdup(target
,-1));
255 if(compareResultUTF8
!= compareResultUTF8Norm
) {
256 log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source
,-1), aescstrdup(target
,-1));
259 log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
261 if(U_FAILURE(status
)) {
262 log_verbose("UTF-8 strcoll failed! Ignoring result\n");
267 /* testing the partial sortkeys */
270 int32_t partialSizes
[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
271 int32_t partialSizesSize
= 1;
273 partialSizesSize
= 7;
275 /*log_verbose("partial sortkey test piecesize=");*/
276 for(i
= 0; i
< partialSizesSize
; i
++) {
277 UCollationResult partialSKResult
= result
, partialNormalizedSKResult
= result
;
278 /*log_verbose("%i ", partialSizes[i]);*/
280 partialSKResult
= compareUsingPartials(myCollation
, source
, sLen
, target
, tLen
, partialSizes
[i
], &status
);
281 if(partialSKResult
!= result
) {
282 log_err("Partial sortkey comparison returned wrong result: %s, %s (size %i)\n",
283 aescstrdup(source
,-1), aescstrdup(target
,-1), partialSizes
[i
]);
286 if(QUICK
<= 0 && norm
!= UCOL_ON
) {
287 /*log_verbose("N ");*/
288 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
289 partialNormalizedSKResult
= compareUsingPartials(myCollation
, source
, sLen
, target
, tLen
, partialSizes
[i
], &status
);
290 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, norm
, &status
);
291 if(partialSKResult
!= partialNormalizedSKResult
) {
292 log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
293 aescstrdup(source
,-1), aescstrdup(target
,-1), partialSizes
[i
]);
297 /*log_verbose("\n");*/
301 compareResult
= ucol_strcoll(myCollation
, source
, sLen
, target
, tLen
);
302 compareResulta
= ucol_strcoll(myCollation
, source
, -1, target
, -1);
303 if (compareResult
!= compareResulta
) {
304 log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
307 sortklen1
=ucol_getSortKey(myCollation
, source
, sLen
, NULL
, 0);
308 sortklen2
=ucol_getSortKey(myCollation
, target
, tLen
, NULL
, 0);
310 sortklenmax
= (sortklen1
>sortklen2
?sortklen1
:sortklen2
);
311 sortklenmin
= (sortklen1
<sortklen2
?sortklen1
:sortklen2
);
313 sortKey1
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
314 sortKey1a
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
315 ucol_getSortKey(myCollation
, source
, sLen
, sortKey1
, sortklen1
+1);
316 ucol_getSortKey(myCollation
, source
, -1, sortKey1a
, sortklen1
+1);
318 sortKey2
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
319 sortKey2a
=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax
+1));
320 ucol_getSortKey(myCollation
, target
, tLen
, sortKey2
, sortklen2
+1);
321 ucol_getSortKey(myCollation
, target
, -1, sortKey2a
, sortklen2
+1);
323 /* Check that sort key generated with null terminated string is identical */
324 /* to that generted with a length specified. */
325 if (uprv_strcmp((const char *)sortKey1
, (const char *)sortKey1a
) != 0 ||
326 uprv_strcmp((const char *)sortKey2
, (const char *)sortKey2a
) != 0 ) {
327 log_err("Sort Keys from null terminated and explicit length strings differ.\n");
330 /*memcmp(sortKey1, sortKey2,sortklenmax);*/
331 temp
= uprv_strcmp((const char *)sortKey1
, (const char *)sortKey2
);
332 gSortklen1
= uprv_strlen((const char *)sortKey1
)+1;
333 gSortklen2
= uprv_strlen((const char *)sortKey2
)+1;
334 if(sortklen1
!= gSortklen1
){
335 log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1
, gSortklen1
);
336 log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation
, sortKey1
, buffer
, &len
));
338 if(sortklen2
!= gSortklen2
){
339 log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2
, gSortklen2
);
340 log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation
, sortKey2
, buffer
, &len
));
347 keyResult
= UCOL_GREATER
;
350 keyResult
= UCOL_EQUAL
;
352 reportCResult( source
, target
, sortKey1
, sortKey2
, compareResult
, keyResult
, compareResultIter
, result
);
360 void doTest(UCollator
* myCollation
, const UChar source
[], const UChar target
[], UCollationResult result
)
363 doTestVariant(myCollation
, source
, target
, result
);
364 if(result
== UCOL_LESS
) {
365 doTestVariant(myCollation
, target
, source
, UCOL_GREATER
);
366 } else if(result
== UCOL_GREATER
) {
367 doTestVariant(myCollation
, target
, source
, UCOL_LESS
);
369 doTestVariant(myCollation
, target
, source
, UCOL_EQUAL
);
372 log_data_err("No collator! Any data around?\n");
378 * Return an integer array containing all of the collation orders
379 * returned by calls to next on the specified iterator
381 int32_t* getOrders(UCollationElements
*iter
, int32_t *orderLength
)
385 int32_t maxSize
= 100;
388 int32_t *orders
=(int32_t*)malloc(sizeof(int32_t) * maxSize
);
389 status
= U_ZERO_ERROR
;
392 while ((order
=ucol_next(iter
, &status
)) != UCOL_NULLORDER
)
397 temp
= (int32_t*)malloc(sizeof(int32_t) * maxSize
);
399 memcpy(temp
, orders
, size
* sizeof(int32_t));
405 orders
[size
++] = order
;
408 if (maxSize
> size
&& size
> 0)
410 temp
= (int32_t*)malloc(sizeof(int32_t) * size
);
412 memcpy(temp
, orders
, size
* sizeof(int32_t));
425 backAndForth(UCollationElements
*iter
)
427 /* Run through the iterator forwards and stick it into an array */
429 UErrorCode status
= U_ZERO_ERROR
;
430 int32_t orderLength
= 0;
432 orders
= getOrders(iter
, &orderLength
);
435 /* Now go through it backwards and make sure we get the same values */
439 /* synwee : changed */
440 while ((o
= ucol_previous(iter
, &status
)) != UCOL_NULLORDER
)
442 if (o
!= orders
[-- index
])
448 while (index
> 0 && orders
[-- index
] == 0)
451 if (o
!= orders
[index
])
453 log_err("Mismatch at index : 0x%x\n", index
);
461 while (index
!= 0 && orders
[index
- 1] == 0) {
467 log_err("Didn't get back to beginning - index is %d\n", index
);
471 if ((o
= ucol_next(iter
, &status
)) != UCOL_NULLORDER
)
473 log_err("Error at %x\n", o
);
476 if ((o
= ucol_previous(iter
, &status
)) != UCOL_NULLORDER
)
478 log_err("Error at %x\n", o
);
486 void genericOrderingTestWithResult(UCollator
*coll
, const char *s
[], uint32_t size
, UCollationResult result
) {
487 UChar t1
[2048] = {0};
488 UChar t2
[2048] = {0};
489 UCollationElements
*iter
;
490 UErrorCode status
= U_ZERO_ERROR
;
492 uint32_t i
= 0, j
= 0;
493 log_verbose("testing sequence:\n");
494 for(i
= 0; i
< size
; i
++) {
495 log_verbose("%s\n", s
[i
]);
498 iter
= ucol_openElements(coll
, t1
, u_strlen(t1
), &status
);
499 if (U_FAILURE(status
)) {
500 log_err("Creation of iterator failed\n");
502 for(i
= 0; i
< size
-1; i
++) {
503 for(j
= i
+1; j
< size
; j
++) {
504 u_unescape(s
[i
], t1
, 2048);
505 u_unescape(s
[j
], t2
, 2048);
506 doTest(coll
, t1
, t2
, result
);
507 /* synwee : added collation element iterator test */
508 ucol_setText(iter
, t1
, u_strlen(t1
), &status
);
510 ucol_setText(iter
, t2
, u_strlen(t2
), &status
);
514 ucol_closeElements(iter
);
517 void genericOrderingTest(UCollator
*coll
, const char *s
[], uint32_t size
) {
518 genericOrderingTestWithResult(coll
, s
, size
, UCOL_LESS
);
521 void genericLocaleStarter(const char *locale
, const char *s
[], uint32_t size
) {
522 UErrorCode status
= U_ZERO_ERROR
;
523 UCollator
*coll
= ucol_open(locale
, &status
);
525 log_verbose("Locale starter for %s\n", locale
);
527 if(U_SUCCESS(status
)) {
528 genericOrderingTest(coll
, s
, size
);
529 } else if(status
== U_FILE_ACCESS_ERROR
) {
530 log_data_err("Is your data around?\n");
533 log_err("Unable to open collator for locale %s\n", locale
);
538 void genericLocaleStarterWithResult(const char *locale
, const char *s
[], uint32_t size
, UCollationResult result
) {
539 UErrorCode status
= U_ZERO_ERROR
;
540 UCollator
*coll
= ucol_open(locale
, &status
);
542 log_verbose("Locale starter for %s\n", locale
);
544 if(U_SUCCESS(status
)) {
545 genericOrderingTestWithResult(coll
, s
, size
, result
);
546 } else if(status
== U_FILE_ACCESS_ERROR
) {
547 log_data_err("Is your data around?\n");
550 log_err("Unable to open collator for locale %s\n", locale
);
556 /* currently not used with options */
557 void genericRulesStarterWithOptions(const char *rules
, const char *s
[], uint32_t size
, const UColAttribute
*attrs
, const UColAttributeValue
*values
, uint32_t attsize
) {
558 UErrorCode status
= U_ZERO_ERROR
;
559 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
560 uint32_t rlen
= u_unescape(rules
, rlz
, RULE_BUFFER_LEN
);
563 UCollator
*coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
565 log_verbose("Rules starter for %s\n", rules
);
567 if(U_SUCCESS(status
)) {
568 log_verbose("Setting attributes\n");
569 for(i
= 0; i
< attsize
; i
++) {
570 ucol_setAttribute(coll
, attrs
[i
], values
[i
], &status
);
573 genericOrderingTest(coll
, s
, size
);
575 log_err("Unable to open collator with rules %s\n", rules
);
581 void genericLocaleStarterWithOptions(const char *locale
, const char *s
[], uint32_t size
, const UColAttribute
*attrs
, const UColAttributeValue
*values
, uint32_t attsize
) {
582 UErrorCode status
= U_ZERO_ERROR
;
585 UCollator
*coll
= ucol_open(locale
, &status
);
587 log_verbose("Locale starter for %s\n", locale
);
589 if(U_SUCCESS(status
)) {
591 log_verbose("Setting attributes\n");
592 for(i
= 0; i
< attsize
; i
++) {
593 ucol_setAttribute(coll
, attrs
[i
], values
[i
], &status
);
596 genericOrderingTest(coll
, s
, size
);
598 log_err("Unable to open collator for locale %s\n", locale
);
603 void genericRulesTestWithResult(const char *rules
, const char *s
[], uint32_t size
, UCollationResult result
) {
604 UErrorCode status
= U_ZERO_ERROR
;
605 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
606 uint32_t rlen
= u_unescape(rules
, rlz
, RULE_BUFFER_LEN
);
608 UCollator
*coll
= NULL
;
609 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
610 log_verbose("Rules starter for %s\n", rules
);
612 if(U_SUCCESS(status
)) {
613 genericOrderingTestWithResult(coll
, s
, size
, result
);
615 } else if(status
== U_FILE_ACCESS_ERROR
) {
616 log_data_err("Is your data around?\n");
618 log_err("Unable to open collator with rules %s\n", rules
);
622 void genericRulesStarter(const char *rules
, const char *s
[], uint32_t size
) {
623 genericRulesTestWithResult(rules
, s
, size
, UCOL_LESS
);
626 static void TestTertiary()
630 UCollator
*myCollation
;
631 UErrorCode status
=U_ZERO_ERROR
;
632 const char* str
="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
634 rules
=(UChar
*)malloc(sizeof(UChar
*) * (len
+1));
635 u_uastrcpy(rules
, str
);
637 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
, &status
);
638 if(U_FAILURE(status
)){
639 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
642 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
643 for (i
= 0; i
< 17 ; i
++)
645 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
648 ucol_close(myCollation
);
652 static void TestPrimary( )
656 UCollator
*myCollation
;
657 UErrorCode status
=U_ZERO_ERROR
;
658 const char* str
="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
660 rules
=(UChar
*)malloc(sizeof(UChar
*) * (len
+1));
661 u_uastrcpy(rules
, str
);
663 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
664 if(U_FAILURE(status
)){
665 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
667 ucol_setStrength(myCollation
, UCOL_PRIMARY
);
669 for (i
= 17; i
< 26 ; i
++)
672 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
675 ucol_close(myCollation
);
679 static void TestSecondary()
684 UCollator
*myCollation
;
685 UErrorCode status
=U_ZERO_ERROR
;
686 const char* str
="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
688 rules
=(UChar
*)malloc(sizeof(UChar
*) * (len
+1));
689 u_uastrcpy(rules
, str
);
691 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
692 if(U_FAILURE(status
)){
693 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
695 ucol_setStrength(myCollation
, UCOL_SECONDARY
);
696 for (i
= 26; i
< 34 ; i
++)
698 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
701 ucol_close(myCollation
);
705 static void TestIdentical()
710 UCollator
*myCollation
;
711 UErrorCode status
=U_ZERO_ERROR
;
712 const char* str
="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
714 rules
=(UChar
*)malloc(sizeof(UChar
*) * (len
+1));
715 u_uastrcpy(rules
, str
);
717 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_IDENTICAL
, NULL
,&status
);
718 if(U_FAILURE(status
)){
719 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
721 for(i
= 34; i
<37; i
++)
723 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
726 ucol_close(myCollation
);
730 static void TestExtra()
735 UCollator
*myCollation
;
736 UErrorCode status
= U_ZERO_ERROR
;
737 const char* str
="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
739 rules
=(UChar
*)malloc(sizeof(UChar
*) * (len
+1));
740 u_uastrcpy(rules
, str
);
742 myCollation
=ucol_openRules(rules
, len
, UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
743 if(U_FAILURE(status
)){
744 log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status
));
746 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
747 for (i
= 0; i
< COUNT_TEST_CASES
-1 ; i
++)
749 for (j
= i
+ 1; j
< COUNT_TEST_CASES
; j
+= 1)
752 doTest(myCollation
, testCases
[i
], testCases
[j
], UCOL_LESS
);
756 ucol_close(myCollation
);
760 static void TestJB581(void)
762 UChar dispName
[100];
763 int32_t bufferLen
= 0;
766 UCollationResult result
= UCOL_EQUAL
;
767 uint8_t sourceKeyArray
[100];
768 uint8_t targetKeyArray
[100];
769 int32_t sourceKeyOut
= 0,
771 UCollator
*myCollator
= 0;
772 UErrorCode status
= U_ZERO_ERROR
;
774 /*u_uastrcpy(source, "This is a test.");*/
775 /*u_uastrcpy(target, "THISISATEST.");*/
776 u_uastrcpy(source
, "THISISATEST.");
777 u_uastrcpy(target
, "Thisisatest.");
779 myCollator
= ucol_open("en_US", &status
);
780 if (U_FAILURE(status
)){
781 bufferLen
= uloc_getDisplayName("en_US", 0, dispName
, 100, &status
);
782 /*Report the error with display name... */
783 log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName
);
786 result
= ucol_strcoll(myCollator
, source
, -1, target
, -1);
787 /* result is 1, secondary differences only for ignorable space characters*/
790 log_err("Comparing two strings with only secondary differences in C failed.\n");
792 /* To compare them with just primary differences */
793 ucol_setStrength(myCollator
, UCOL_PRIMARY
);
794 result
= ucol_strcoll(myCollator
, source
, -1, target
, -1);
798 log_err("Comparing two strings with no differences in C failed.\n");
800 /* Now, do the same comparison with keys */
801 sourceKeyOut
= ucol_getSortKey(myCollator
, source
, -1, sourceKeyArray
, 100);
802 targetKeyOut
= ucol_getSortKey(myCollator
, target
, -1, targetKeyArray
, 100);
804 bufferLen
= ((targetKeyOut
> 100) ? 100 : targetKeyOut
);
805 result
= memcmp(sourceKeyArray
, targetKeyArray
, bufferLen
);
808 log_err("Comparing two strings with sort keys in C failed.\n");
810 ucol_close(myCollator
);
813 static void TestJB1401(void)
815 UCollator
*myCollator
= 0;
816 UErrorCode status
= U_ZERO_ERROR
;
817 static UChar NFD_UnsafeStartChars
[] = {
818 0x0f73, /* Tibetan Vowel Sign II */
819 0x0f75, /* Tibetan Vowel Sign UU */
820 0x0f81, /* Tibetan Vowel Sign Reversed II */
826 myCollator
= ucol_open("en_US", &status
);
827 if (U_FAILURE(status
)){
828 int32_t bufferLen
= 0;
829 UChar dispName
[100];
830 bufferLen
= uloc_getDisplayName("en_US", 0, dispName
, 100, &status
);
831 /*Report the error with display name... */
832 log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName
);
835 ucol_setAttribute(myCollator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
836 if (U_FAILURE(status
)){
837 log_err("ERROR: Failed to set normalization mode ON for collator.\n");
847 /* Get the next funny character to be tested, and set up the
848 * three test strings X, Y, Z, consisting of an A-grave + test char,
849 * in original form, NFD, and then NFC form.
851 c
= NFD_UnsafeStartChars
[i
];
854 X
[0]=0xC0; X
[1]=c
; X
[2]=0; /* \u00C0 is A Grave*/
856 unorm_normalize(X
, -1, UNORM_NFD
, 0, Y
, 20, &status
);
857 unorm_normalize(Y
, -1, UNORM_NFC
, 0, Z
, 20, &status
);
858 if (U_FAILURE(status
)){
859 log_err("ERROR: Failed to normalize test of character %x\n", c
);
863 /* Collation test. All three strings should be equal.
864 * doTest does both strcoll and sort keys, with params in both orders.
866 doTest(myCollator
, X
, Y
, UCOL_EQUAL
);
867 doTest(myCollator
, X
, Z
, UCOL_EQUAL
);
868 doTest(myCollator
, Y
, Z
, UCOL_EQUAL
);
870 /* Run collation element iterators over the three strings. Results should be same for each.
873 UCollationElements
*ceiX
, *ceiY
, *ceiZ
;
874 int32_t ceX
, ceY
, ceZ
;
877 ceiX
= ucol_openElements(myCollator
, X
, -1, &status
);
878 ceiY
= ucol_openElements(myCollator
, Y
, -1, &status
);
879 ceiZ
= ucol_openElements(myCollator
, Z
, -1, &status
);
880 if (U_FAILURE(status
)) {
881 log_err("ERROR: uucol_openElements failed.\n");
886 ceX
= ucol_next(ceiX
, &status
);
887 ceY
= ucol_next(ceiY
, &status
);
888 ceZ
= ucol_next(ceiZ
, &status
);
889 if (U_FAILURE(status
)) {
890 log_err("ERROR: ucol_next failed for iteration #%d.\n", j
);
893 if (ceX
!= ceY
|| ceY
!= ceZ
) {
894 log_err("ERROR: ucol_next failed for iteration #%d.\n", j
);
897 if (ceX
== UCOL_NULLORDER
) {
901 ucol_closeElements(ceiX
);
902 ucol_closeElements(ceiY
);
903 ucol_closeElements(ceiZ
);
906 ucol_close(myCollator
);
912 * Tests the [variable top] tag in rule syntax. Since the default [alternate]
913 * tag has the value shifted, any codepoints before [variable top] should give
916 static void TestVariableTop(void)
918 const char *str
= "&z = [variable top]";
919 int len
= strlen(str
);
921 UCollator
*myCollation
;
922 UCollator
*enCollation
;
923 UErrorCode status
= U_ZERO_ERROR
;
927 uint8_t expected
[20];
929 rules
= (UChar
*)malloc(sizeof(UChar
*) * (len
+ 1));
930 u_uastrcpy(rules
, str
);
932 enCollation
= ucol_open("en_US", &status
);
933 myCollation
= ucol_openRules(rules
, len
, UCOL_OFF
,
934 UCOL_PRIMARY
,NULL
, &status
);
935 if (U_FAILURE(status
)) {
936 log_err("ERROR: in creation of rule based collator :%s\n",
937 myErrorName(status
));
941 ucol_setStrength(enCollation
, UCOL_PRIMARY
);
942 ucol_setAttribute(enCollation
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
,
944 ucol_setAttribute(myCollation
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
,
947 if (ucol_getAttribute(myCollation
, UCOL_ALTERNATE_HANDLING
, &status
) !=
948 UCOL_SHIFTED
|| U_FAILURE(status
)) {
949 log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
952 uprv_memset(expected
, 0, 20);
954 /* space is supposed to be a variable */
956 len
= ucol_getSortKey(enCollation
, source
, 1, result
,
959 if (uprv_memcmp(expected
, result
, len
) != 0) {
960 log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
966 len
= ucol_getSortKey(myCollation
, source
, 1, result
,
968 if (uprv_memcmp(expected
, result
, len
) != 0) {
969 log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
976 ucol_close(enCollation
);
977 ucol_close(myCollation
);
983 * Tests surrogate support.
984 * NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
985 * Therefore, another (unassigned) code point was used for this test.
987 static void TestSurrogates(void)
990 "&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
991 int len
= strlen(str
);
994 UCollator
*myCollation
;
995 UCollator
*enCollation
;
996 UErrorCode status
= U_ZERO_ERROR
;
998 {{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
1000 {{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
1002 uint8_t enresult
[20], myresult
[20];
1005 /* tests for open rules with surrogate rules */
1006 rules
= (UChar
*)malloc(sizeof(UChar
*) * (len
+ 1));
1007 rlen
= u_unescape(str
, rules
, len
);
1009 enCollation
= ucol_open("en_US", &status
);
1010 myCollation
= ucol_openRules(rules
, rlen
, UCOL_OFF
,
1011 UCOL_TERTIARY
,NULL
, &status
);
1012 if (U_FAILURE(status
)) {
1013 log_err("ERROR: in creation of rule based collator :%s\n",
1014 myErrorName(status
));
1019 this test is to verify the supplementary sort key order in the english
1022 log_verbose("start of english collation supplementary characters test\n");
1024 doTest(enCollation
, source
[count
], target
[count
], UCOL_LESS
);
1027 doTest(enCollation
, source
[count
], target
[count
], UCOL_GREATER
);
1029 log_verbose("start of tailored collation supplementary characters test\n");
1031 /* tests getting collation elements for surrogates for tailored rules */
1033 doTest(myCollation
, source
[count
], target
[count
], UCOL_LESS
);
1037 /* tests that \uD800\uDC02 still has the same value, not changed */
1038 enlen
= ucol_getSortKey(enCollation
, source
[3], 2, enresult
, 20);
1039 mylen
= ucol_getSortKey(myCollation
, source
[3], 2, myresult
, 20);
1040 if (enlen
!= mylen
||
1041 uprv_memcmp(enresult
, myresult
, enlen
) != 0) {
1042 log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
1046 ucol_close(enCollation
);
1047 ucol_close(myCollation
);
1053 *### TODO: Add more invalid rules to test all different scenarios.
1058 #define MAX_ERROR_STATES 2
1060 static const char* rulesArr
[MAX_ERROR_STATES
] = {
1061 "& C < ch, cH, Ch[this should fail]<d",
1062 "& C < ch, cH, & Ch[variable top]"
1064 static const char* preContextArr
[MAX_ERROR_STATES
] = {
1069 static const char* postContextArr
[MAX_ERROR_STATES
] = {
1075 for(i
= 0;i
<MAX_ERROR_STATES
;i
++){
1076 UChar rules
[1000] = { '\0' };
1077 UChar preContextExp
[1000] = { '\0' };
1078 UChar postContextExp
[1000] = { '\0' };
1079 UParseError parseError
;
1080 UErrorCode status
= U_ZERO_ERROR
;
1082 u_charsToUChars(rulesArr
[i
],rules
,uprv_strlen(rulesArr
[i
])+1);
1083 u_charsToUChars(preContextArr
[i
],preContextExp
,uprv_strlen(preContextArr
[i
])+1);
1084 u_charsToUChars(postContextArr
[i
],postContextExp
,uprv_strlen(postContextArr
[i
])+1);
1085 /* clean up stuff in parseError */
1086 u_memset(parseError
.preContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1087 u_memset(parseError
.postContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1088 /* open the rules and test */
1089 coll
= ucol_openRules(rules
,u_strlen(rules
),UCOL_OFF
,UCOL_DEFAULT_STRENGTH
,&parseError
,&status
);
1090 if(u_strcmp(parseError
.preContext
,preContextExp
)!=0){
1091 log_err("preContext in UParseError for ucol_openRules does not match\n");
1093 if(u_strcmp(parseError
.postContext
,postContextExp
)!=0){
1094 log_err("postContext in UParseError for ucol_openRules does not match\n");
1100 TestJitterbug1098(){
1102 UCollator
* c1
= NULL
;
1103 UErrorCode status
= U_ZERO_ERROR
;
1104 UParseError parseError
;
1105 char preContext
[200]={0};
1106 char postContext
[200]={0};
1108 const char* rules
[] = {
1116 const UCollationResult results1098
[] = {
1122 const UChar input
[][2]= {
1130 u_memset(parseError
.preContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1131 u_memset(parseError
.postContext
,0x0000,U_PARSE_CONTEXT_LEN
);
1132 for(;rules
[i
]!=0;i
++){
1133 u_uastrcpy(rule
, rules
[i
]);
1134 c1
= ucol_openRules(rule
, u_strlen(rule
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, &parseError
, &status
);
1135 if(U_FAILURE(status
)){
1136 log_err("Could not parse the rules syntax. Error: %s ", u_errorName(status
));
1138 if (status
== U_PARSE_ERROR
) {
1139 u_UCharsToChars(parseError
.preContext
,preContext
,20);
1140 u_UCharsToChars(parseError
.postContext
,postContext
,20);
1141 log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext
,postContext
);
1148 doTest(c1
,X
,Y
,results1098
[i
]);
1153 /* These tests do cleanup and reinitialize ICU in the course of their operation.
1154 * The ICU data directory must be preserved across these operations.
1155 * Here is a helper function to assist with that.
1157 static char *safeGetICUDataDirectory() {
1158 const char *dataDir
= u_getDataDirectory(); /* Returned string vanashes with u_cleanup */
1159 char *retStr
= NULL
;
1160 if (dataDir
!= NULL
) {
1161 retStr
= (char *)malloc(strlen(dataDir
)+1);
1162 strcpy(retStr
, dataDir
);
1169 TestFCDCrash(void) {
1170 static const char *test
[] = {
1171 "Gr\\u00F6\\u00DFe",
1175 char *icuDataDir
= safeGetICUDataDirectory();
1176 UErrorCode status
= U_ZERO_ERROR
;
1177 UCollator
*coll
= ucol_open("es", &status
);
1178 if(U_FAILURE(status
)) {
1179 log_err("Couldn't open collator\n");
1185 u_setDataDirectory(icuDataDir
);
1186 coll
= ucol_open("de_DE", &status
);
1187 if(U_FAILURE(status
)) {
1188 log_err("Couldn't open collator\n");
1191 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
1192 genericOrderingTest(coll
, test
, 2);
1198 #endif /* #if !UCONFIG_NO_COLLATION */