2 /********************************************************************
4 * Copyright (c) 2001-2012, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7 /*******************************************************************************
11 *******************************************************************************/
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
19 #include "unicode/utypes.h"
21 #if !UCONFIG_NO_COLLATION
23 #include "unicode/ucol.h"
24 #include "unicode/ucoleitr.h"
25 #include "unicode/uloc.h"
29 #include "unicode/ustring.h"
36 #include "unicode/parseerr.h"
37 #include "unicode/ucnv.h"
38 #include "unicode/ures.h"
39 #include "unicode/uscript.h"
40 #include "unicode/utf16.h"
45 #define LEN(a) (sizeof(a)/sizeof(a[0]))
47 #define MAX_TOKEN_LEN 16
49 typedef UCollationResult
tst_strcoll(void *collator
, const int object
,
50 const UChar
*source
, const int sLen
,
51 const UChar
*target
, const int tLen
);
55 const static char cnt1
[][10] = {
70 const static char cnt2
[][10] = {
82 static void IncompleteCntTest(void)
84 UErrorCode status
= U_ZERO_ERROR
;
89 UCollator
*coll
= NULL
;
90 uint32_t i
= 0, j
= 0;
93 u_uastrcpy(temp
, " & Z < ABC < Q < B");
95 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
, NULL
,&status
);
97 if(U_SUCCESS(status
)) {
98 size
= sizeof(cnt1
)/sizeof(cnt1
[0]);
99 for(i
= 0; i
< size
-1; i
++) {
100 for(j
= i
+1; j
< size
; j
++) {
101 UCollationElements
*iter
;
102 u_uastrcpy(t1
, cnt1
[i
]);
103 u_uastrcpy(t2
, cnt1
[j
]);
104 doTest(coll
, t1
, t2
, UCOL_LESS
);
105 /* synwee : added collation element iterator test */
106 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
107 if (U_FAILURE(status
)) {
108 log_err("Creation of iterator failed\n");
112 ucol_closeElements(iter
);
120 u_uastrcpy(temp
, " & Z < DAVIS < MARK <DAV");
121 coll
= ucol_openRules(temp
, u_strlen(temp
), UCOL_OFF
, UCOL_DEFAULT_STRENGTH
,NULL
, &status
);
123 if(U_SUCCESS(status
)) {
124 size
= sizeof(cnt2
)/sizeof(cnt2
[0]);
125 for(i
= 0; i
< size
-1; i
++) {
126 for(j
= i
+1; j
< size
; j
++) {
127 UCollationElements
*iter
;
128 u_uastrcpy(t1
, cnt2
[i
]);
129 u_uastrcpy(t2
, cnt2
[j
]);
130 doTest(coll
, t1
, t2
, UCOL_LESS
);
132 /* synwee : added collation element iterator test */
133 iter
= ucol_openElements(coll
, t2
, u_strlen(t2
), &status
);
134 if (U_FAILURE(status
)) {
135 log_err("Creation of iterator failed\n");
139 ucol_closeElements(iter
);
149 const static char shifted
[][20] = {
161 const static UCollationResult shiftedTert
[] = {
173 const static char nonignorable
[][20] = {
185 static void BlackBirdTest(void) {
186 UErrorCode status
= U_ZERO_ERROR
;
190 uint32_t i
= 0, j
= 0;
192 UCollator
*coll
= ucol_open("en_US", &status
);
194 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
195 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
197 if(U_SUCCESS(status
)) {
198 size
= sizeof(nonignorable
)/sizeof(nonignorable
[0]);
199 for(i
= 0; i
< size
-1; i
++) {
200 for(j
= i
+1; j
< size
; j
++) {
201 u_uastrcpy(t1
, nonignorable
[i
]);
202 u_uastrcpy(t2
, nonignorable
[j
]);
203 doTest(coll
, t1
, t2
, UCOL_LESS
);
208 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
209 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
211 if(U_SUCCESS(status
)) {
212 size
= sizeof(shifted
)/sizeof(shifted
[0]);
213 for(i
= 0; i
< size
-1; i
++) {
214 for(j
= i
+1; j
< size
; j
++) {
215 u_uastrcpy(t1
, shifted
[i
]);
216 u_uastrcpy(t2
, shifted
[j
]);
217 doTest(coll
, t1
, t2
, UCOL_LESS
);
222 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
223 if(U_SUCCESS(status
)) {
224 size
= sizeof(shifted
)/sizeof(shifted
[0]);
225 for(i
= 1; i
< size
; i
++) {
226 u_uastrcpy(t1
, shifted
[i
-1]);
227 u_uastrcpy(t2
, shifted
[i
]);
228 doTest(coll
, t1
, t2
, shiftedTert
[i
]);
235 const static UChar testSourceCases
[][MAX_TOKEN_LEN
] = {
236 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
237 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
238 {0x0041/*'A'*/, 0x0300, 0x0000},
239 {0x00C0, 0x0301, 0x0000},
240 /* this would work with forced normalization */
241 {0x00C0, 0x0316, 0x0000}
244 const static UChar testTargetCases
[][MAX_TOKEN_LEN
] = {
245 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
248 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
249 /* this would work with forced normalization */
250 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
253 const static UCollationResult results
[] = {
261 static void FunkyATest(void)
265 UErrorCode status
= U_ZERO_ERROR
;
266 UCollator
*myCollation
;
267 myCollation
= ucol_open("en_US", &status
);
268 if(U_FAILURE(status
)){
269 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
272 log_verbose("Testing some A letters, for some reason\n");
273 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
274 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
275 for (i
= 0; i
< 4 ; i
++)
277 doTest(myCollation
, testSourceCases
[i
], testTargetCases
[i
], results
[i
]);
279 ucol_close(myCollation
);
282 UColAttributeValue caseFirst
[] = {
289 UColAttributeValue alternateHandling
[] = {
294 UColAttributeValue caseLevel
[] = {
299 UColAttributeValue strengths
[] = {
308 static const char * strengthsC
[] = {
316 static const char * caseFirstC
[] = {
323 static const char * alternateHandlingC
[] = {
324 "UCOL_NON_IGNORABLE",
328 static const char * caseLevelC
[] = {
333 /* not used currently - does not test only prints */
334 static void PrintMarkDavis(void)
336 UErrorCode status
= U_ZERO_ERROR
;
338 uint8_t sortkey
[256];
339 UCollator
*coll
= ucol_open("en_US", &status
);
340 uint32_t h
,i
,j
,k
, sortkeysize
;
345 log_verbose("PrintMarkDavis");
347 u_uastrcpy(m
, "Mark Davis");
353 for(i
= 0; i
<sizem
; i
++) {
354 fprintf(stderr
, "\\u%04X ", m
[i
]);
356 fprintf(stderr
, "\n");
358 for(h
= 0; h
<sizeof(caseFirst
)/sizeof(caseFirst
[0]); h
++) {
359 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, caseFirst
[i
], &status
);
360 fprintf(stderr
, "caseFirst: %s\n", caseFirstC
[h
]);
362 for(i
= 0; i
<sizeof(alternateHandling
)/sizeof(alternateHandling
[0]); i
++) {
363 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, alternateHandling
[i
], &status
);
364 fprintf(stderr
, " AltHandling: %s\n", alternateHandlingC
[i
]);
366 for(j
= 0; j
<sizeof(caseLevel
)/sizeof(caseLevel
[0]); j
++) {
367 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, caseLevel
[j
], &status
);
368 fprintf(stderr
, " caseLevel: %s\n", caseLevelC
[j
]);
370 for(k
= 0; k
<sizeof(strengths
)/sizeof(strengths
[0]); k
++) {
371 ucol_setAttribute(coll
, UCOL_STRENGTH
, strengths
[k
], &status
);
372 sortkeysize
= ucol_getSortKey(coll
, m
, sizem
, sortkey
, 256);
373 fprintf(stderr
, " strength: %s\n Sortkey: ", strengthsC
[k
]);
374 fprintf(stderr
, "%s\n", ucol_sortKeyToString(coll
, sortkey
, buffer
, &len
));
385 static void BillFairmanTest(void) {
387 ** check for actual locale via ICU resource bundles
389 ** lp points to the original locale ("fr_FR_....")
392 UResourceBundle
*lr
,*cr
;
393 UErrorCode lec
= U_ZERO_ERROR
;
394 const char *lp
= "fr_FR_you_ll_never_find_this_locale";
396 log_verbose("BillFairmanTest\n");
398 lr
= ures_open(NULL
,lp
,&lec
);
400 cr
= ures_getByKey(lr
,"collations",0,&lec
);
402 lp
= ures_getLocaleByType(cr
, ULOC_ACTUAL_LOCALE
, &lec
);
404 if (U_SUCCESS(lec
)) {
405 if(strcmp(lp
, "fr") != 0) {
406 log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp
);
416 static void testPrimary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
417 UChar source
[256] = { '\0'};
418 UChar target
[256] = { '\0'};
422 UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
423 UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
425 /*log_verbose("Testing primary\n");*/
427 doTest(col
, p
, q
, UCOL_LESS
);
429 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
431 if(result!=UCOL_LESS){
432 aescstrdup(p,utfSource,256);
433 aescstrdup(q,utfTarget,256);
434 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
438 u_strcpy(source
+1,p
);
440 u_strcpy(target
+1,q
);
441 doTest(col
, source
, target
, UCOL_LESS
);
443 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
447 static void testSecondary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
448 UChar source
[256] = { '\0'};
449 UChar target
[256] = { '\0'};
451 /*log_verbose("Testing secondary\n");*/
453 doTest(col
, p
, q
, UCOL_LESS
);
455 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
458 u_strcpy(source
+1,p
);
460 u_strcpy(target
+1,q
);
462 doTest(col
, source
, target
, UCOL_LESS
);
464 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
469 source
[u_strlen(p
)] = 0x62;
470 source
[u_strlen(p
)+1] = 0;
474 target
[u_strlen(q
)] = 0x61;
475 target
[u_strlen(q
)+1] = 0;
477 doTest(col
, source
, target
, UCOL_GREATER
);
480 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
484 static void testTertiary(UCollator
* col
, const UChar
* p
,const UChar
* q
){
485 UChar source
[256] = { '\0'};
486 UChar target
[256] = { '\0'};
488 /*log_verbose("Testing tertiary\n");*/
490 doTest(col
, p
, q
, UCOL_LESS
);
492 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
495 u_strcpy(source
+1,p
);
497 u_strcpy(target
+1,q
);
499 doTest(col
, source
, target
, UCOL_LESS
);
501 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
505 source
[u_strlen(p
)] = 0xE0;
506 source
[u_strlen(p
)+1] = 0;
509 target
[u_strlen(q
)] = 0x61;
510 target
[u_strlen(q
)+1] = 0;
512 doTest(col
, source
, target
, UCOL_GREATER
);
515 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
519 static void testEquality(UCollator
* col
, const UChar
* p
,const UChar
* q
){
521 UChar source[256] = { '\0'};
522 UChar target[256] = { '\0'};
525 doTest(col
, p
, q
, UCOL_EQUAL
);
527 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
531 static void testCollator(UCollator
*coll
, UErrorCode
*status
) {
532 const UChar
*rules
= NULL
, *current
= NULL
;
534 uint32_t strength
= 0;
535 uint32_t chOffset
= 0; uint32_t chLen
= 0;
536 uint32_t exOffset
= 0; uint32_t exLen
= 0;
537 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
538 uint32_t firstEx
= 0;
539 /* uint32_t rExpsLen = 0; */
540 uint32_t firstLen
= 0;
541 UBool varT
= FALSE
; UBool top_
= TRUE
;
543 UBool startOfRules
= TRUE
;
544 UBool lastReset
= FALSE
;
545 UBool before
= FALSE
;
546 uint32_t beforeStrength
= 0;
554 UChar
*rulesCopy
= NULL
;
555 UParseError parseError
;
557 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
561 rules
= ucol_getRules(coll
, &ruleLen
);
562 if(U_SUCCESS(*status
) && ruleLen
> 0) {
563 rulesCopy
= (UChar
*)uprv_malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
564 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
565 src
.current
= src
.source
= rulesCopy
;
566 src
.end
= rulesCopy
+ruleLen
;
567 src
.extraCurrent
= src
.end
;
568 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
569 *first
= *second
= 0;
571 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
572 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
573 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
,&parseError
, status
)) != NULL
) {
574 strength
= src
.parsedToken
.strength
;
575 chOffset
= src
.parsedToken
.charsOffset
;
576 chLen
= src
.parsedToken
.charsLen
;
577 exOffset
= src
.parsedToken
.extensionOffset
;
578 exLen
= src
.parsedToken
.extensionLen
;
579 prefixOffset
= src
.parsedToken
.prefixOffset
;
580 prefixLen
= src
.parsedToken
.prefixLen
;
581 specs
= src
.parsedToken
.flags
;
583 startOfRules
= FALSE
;
584 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
585 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
586 if(top_
) { /* if reset is on top, the sequence is broken. We should have an empty string */
589 u_strncpy(second
,src
.source
+chOffset
, chLen
);
592 if(exLen
> 0 && firstEx
== 0) {
593 u_strncat(first
, src
.source
+exOffset
, exLen
);
594 first
[firstLen
+exLen
] = 0;
597 if(lastReset
== TRUE
&& prefixLen
!= 0) {
598 u_strncpy(first
+prefixLen
, first
, firstLen
);
599 u_strncpy(first
, src
.source
+prefixOffset
, prefixLen
);
600 first
[firstLen
+prefixLen
] = 0;
601 firstLen
= firstLen
+prefixLen
;
604 if(before
== TRUE
) { /* swap first and second */
605 u_strcpy(tempB
, first
);
606 u_strcpy(first
, second
);
607 u_strcpy(second
, tempB
);
616 if(beforeStrength
< strength
) {
617 strength
= beforeStrength
;
625 testEquality(coll
,first
,second
);
628 testPrimary(coll
,first
,second
);
631 testSecondary(coll
,first
,second
);
634 testTertiary(coll
,first
,second
);
638 before
= (UBool
)((specs
& UCOL_TOK_BEFORE
) != 0);
640 beforeStrength
= (specs
& UCOL_TOK_BEFORE
)-1;
647 if(before
== TRUE
&& strength
!= UCOL_TOK_RESET
) { /* first and second were swapped */
652 u_strcpy(first
, second
);
655 uprv_free(src
.source
);
659 static UCollationResult
ucaTest(void *collator
, const int object
, const UChar
*source
, const int sLen
, const UChar
*target
, const int tLen
) {
660 UCollator
*UCA
= (UCollator
*)collator
;
661 return ucol_strcoll(UCA
, source
, sLen
, target
, tLen
);
665 static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
666 #if U_PLATFORM_HAS_WIN32_API
667 LCID lcid = (LCID)collator;
668 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
675 static UCollationResult
swampEarlier(tst_strcoll
* func
, void *collator
, int opts
,
677 const UChar
*s
, const uint32_t sLen
,
678 const UChar
*t
, const uint32_t tLen
) {
679 UChar source
[256] = {0};
680 UChar target
[256] = {0};
683 u_strcpy(source
+1, s
);
685 u_strcpy(target
+1, t
);
687 return func(collator
, opts
, source
, sLen
+1, target
, tLen
+1);
690 static UCollationResult
swampLater(tst_strcoll
* func
, void *collator
, int opts
,
692 const UChar
*s
, const uint32_t sLen
,
693 const UChar
*t
, const uint32_t tLen
) {
694 UChar source
[256] = {0};
695 UChar target
[256] = {0};
702 return func(collator
, opts
, source
, sLen
+1, target
, tLen
+1);
705 static uint32_t probeStrength(tst_strcoll
* func
, void *collator
, int opts
,
706 const UChar
*s
, const uint32_t sLen
,
707 const UChar
*t
, const uint32_t tLen
,
708 UCollationResult result
) {
709 /*UChar fPrimary = 0x6d;*/
710 /*UChar sPrimary = 0x6e;*/
711 UChar fSecondary
= 0x310d;
712 UChar sSecondary
= 0x31a3;
713 UChar fTertiary
= 0x310f;
714 UChar sTertiary
= 0x31b7;
716 UCollationResult oposite
;
717 if(result
== UCOL_EQUAL
) {
718 return UCOL_IDENTICAL
;
719 } else if(result
== UCOL_GREATER
) {
722 oposite
= UCOL_GREATER
;
725 if(swampEarlier(func
, collator
, opts
, sSecondary
, fSecondary
, s
, sLen
, t
, tLen
) == result
) {
727 } else if((swampEarlier(func
, collator
, opts
, sTertiary
, 0x310f, s
, sLen
, t
, tLen
) == result
) &&
728 (swampEarlier(func
, collator
, opts
, 0x310f, sTertiary
, s
, sLen
, t
, tLen
) == result
)) {
729 return UCOL_SECONDARY
;
730 } else if((swampLater(func
, collator
, opts
, sTertiary
, fTertiary
, s
, sLen
, t
, tLen
) == result
) &&
731 (swampLater(func
, collator
, opts
, fTertiary
, sTertiary
, s
, sLen
, t
, tLen
) == result
)) {
732 return UCOL_TERTIARY
;
733 } else if((swampLater(func
, collator
, opts
, sTertiary
, 0x310f, s
, sLen
, t
, tLen
) == oposite
) &&
734 (swampLater(func
, collator
, opts
, fTertiary
, sTertiary
, s
, sLen
, t
, tLen
) == oposite
)) {
735 return UCOL_QUATERNARY
;
737 return UCOL_IDENTICAL
;
741 static char *getRelationSymbol(UCollationResult res
, uint32_t strength
, char *buffer
) {
744 if(res
== UCOL_EQUAL
|| strength
== 0xdeadbeef) {
748 } else if(res
== UCOL_GREATER
) {
749 for(i
= 0; i
<strength
+1; i
++) {
752 buffer
[strength
+1] = '\0';
754 for(i
= 0; i
<strength
+1; i
++) {
757 buffer
[strength
+1] = '\0';
765 static void logFailure (const char *platform
, const char *test
,
766 const UChar
*source
, const uint32_t sLen
,
767 const UChar
*target
, const uint32_t tLen
,
768 UCollationResult realRes
, uint32_t realStrength
,
769 UCollationResult expRes
, uint32_t expStrength
, UBool error
) {
773 char sEsc
[256], s
[256], tEsc
[256], t
[256], b
[256], output
[512], relation
[256];
774 static int32_t maxOutputLength
= 0;
775 int32_t outputLength
;
777 *sEsc
= *tEsc
= *s
= *t
= 0;
779 log_err("Difference between expected and generated order. Run test with -v for more info\n");
780 } else if(getTestOption(VERBOSITY_OPTION
) == 0) {
783 for(i
= 0; i
<sLen
; i
++) {
784 sprintf(b
, "%04X", source
[i
]);
789 if(source
[i
] < 0x80) {
790 sprintf(b
, "(%c)", source
[i
]);
794 for(i
= 0; i
<tLen
; i
++) {
795 sprintf(b
, "%04X", target
[i
]);
800 if(target
[i
] < 0x80) {
801 sprintf(b
, "(%c)", target
[i
]);
806 strcpy(output, "[[ ");
807 strcat(output, sEsc);
808 strcat(output, getRelationSymbol(expRes, expStrength, relation));
809 strcat(output, tEsc);
811 strcat(output, " : ");
813 strcat(output, sEsc);
814 strcat(output, getRelationSymbol(realRes, realStrength, relation));
815 strcat(output, tEsc);
816 strcat(output, " ]] ");
818 log_verbose("%s", output);
822 strcpy(output
, "DIFF: ");
825 strcat(output
, " : ");
828 strcat(output
, test
);
829 strcat(output
, ": ");
831 strcat(output
, sEsc
);
832 strcat(output
, getRelationSymbol(expRes
, expStrength
, relation
));
833 strcat(output
, tEsc
);
837 strcat(output
, platform
);
838 strcat(output
, ": ");
840 strcat(output
, sEsc
);
841 strcat(output
, getRelationSymbol(realRes
, realStrength
, relation
));
842 strcat(output
, tEsc
);
844 outputLength
= (int32_t)strlen(output
);
845 if(outputLength
> maxOutputLength
) {
846 maxOutputLength
= outputLength
;
847 U_ASSERT(outputLength
< sizeof(output
));
850 log_verbose("%s\n", output
);
855 static void printOutRules(const UChar *rules) {
856 uint32_t len = u_strlen(rules);
861 fprintf(stdout, "Rules:");
863 for(i = 0; i<len; i++) {
864 if(rules[i]<0x7f && rules[i]>=0x20) {
865 toPrint = (char)rules[i];
868 fprintf(stdout, "\n&");
869 } else if(toPrint == ';') {
870 fprintf(stdout, "<<");
872 } else if(toPrint == ',') {
873 fprintf(stdout, "<<<");
876 fprintf(stdout, "%c", toPrint);
879 } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
880 fprintf(stdout, "\\u%04X", rules[i]);
884 fprintf(stdout, "\n");
894 static uint32_t testSwitch(tst_strcoll
* func
, void *collator
, int opts
, uint32_t strength
, const UChar
*first
, const UChar
*second
, const char* msg
, UBool error
) {
896 UCollationResult realResult
;
897 uint32_t realStrength
;
899 uint32_t sLen
= u_strlen(first
);
900 uint32_t tLen
= u_strlen(second
);
902 realResult
= func(collator
, opts
, first
, sLen
, second
, tLen
);
903 realStrength
= probeStrength(func
, collator
, opts
, first
, sLen
, second
, tLen
, realResult
);
905 if(strength
== UCOL_IDENTICAL
&& realResult
!= UCOL_EQUAL
) {
906 logFailure(msg
, "tailoring", first
, sLen
, second
, tLen
, realResult
, realStrength
, UCOL_EQUAL
, strength
, error
);
908 } else if(realResult
!= UCOL_LESS
|| realStrength
!= strength
) {
909 logFailure(msg
, "tailoring", first
, sLen
, second
, tLen
, realResult
, realStrength
, UCOL_LESS
, strength
, error
);
916 static void testAgainstUCA(UCollator
*coll
, UCollator
*UCA
, const char *refName
, UBool error
, UErrorCode
*status
) {
917 const UChar
*rules
= NULL
, *current
= NULL
;
919 uint32_t strength
= 0;
920 uint32_t chOffset
= 0; uint32_t chLen
= 0;
921 uint32_t exOffset
= 0; uint32_t exLen
= 0;
922 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
923 /* uint32_t rExpsLen = 0; */
924 uint32_t firstLen
= 0, secondLen
= 0;
925 UBool varT
= FALSE
; UBool top_
= TRUE
;
927 UBool startOfRules
= TRUE
;
933 UChar
*rulesCopy
= NULL
;
935 uint32_t UCAdiff
= 0;
936 uint32_t Windiff
= 1;
937 UParseError parseError
;
939 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
942 rules
= ucol_getRules(coll
, &ruleLen
);
944 /*printOutRules(rules);*/
946 if(U_SUCCESS(*status
) && ruleLen
> 0) {
947 rulesCopy
= (UChar
*)uprv_malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
948 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
949 src
.current
= src
.source
= rulesCopy
;
950 src
.end
= rulesCopy
+ruleLen
;
951 src
.extraCurrent
= src
.end
;
952 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
953 *first
= *second
= 0;
955 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
956 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
957 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,status
)) != NULL
) {
958 strength
= src
.parsedToken
.strength
;
959 chOffset
= src
.parsedToken
.charsOffset
;
960 chLen
= src
.parsedToken
.charsLen
;
961 exOffset
= src
.parsedToken
.extensionOffset
;
962 exLen
= src
.parsedToken
.extensionLen
;
963 prefixOffset
= src
.parsedToken
.prefixOffset
;
964 prefixLen
= src
.parsedToken
.prefixLen
;
965 specs
= src
.parsedToken
.flags
;
967 startOfRules
= FALSE
;
968 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
969 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
971 u_strncpy(second
,src
.source
+chOffset
, chLen
);
976 u_strncat(first
, src
.source
+exOffset
, exLen
);
977 first
[firstLen
+exLen
] = 0;
981 if(strength
!= UCOL_TOK_RESET
) {
982 if((*first
<0x3400 || *first
>=0xa000) && (*second
<0x3400 || *second
>=0xa000)) {
983 UCAdiff
+= testSwitch(&ucaTest
, (void *)UCA
, 0, strength
, first
, second
, refName
, error
);
984 /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
990 u_strcpy(first
, second
);
993 if(UCAdiff
!= 0 && Windiff
!= 0) {
997 log_verbose("No immediate difference with %s!\n", refName
);
1000 log_verbose("No immediate difference with Win32!\n");
1002 uprv_free(src
.source
);
1007 * Takes two CEs (lead and continuation) and
1008 * compares them as CEs should be compared:
1009 * primary vs. primary, secondary vs. secondary
1010 * tertiary vs. tertiary
1012 static int32_t compareCEs(uint32_t s1
, uint32_t s2
,
1013 uint32_t t1
, uint32_t t2
) {
1014 uint32_t s
= 0, t
= 0;
1015 if(s1
== t1
&& s2
== t2
) {
1018 s
= (s1
& 0xFFFF0000)|((s2
& 0xFFFF0000)>>16);
1019 t
= (t1
& 0xFFFF0000)|((t2
& 0xFFFF0000)>>16);
1025 s
= (s1
& 0x0000FF00) | (s2
& 0x0000FF00)>>8;
1026 t
= (t1
& 0x0000FF00) | (t2
& 0x0000FF00)>>8;
1032 s
= (s1
& 0x000000FF)<<8 | (s2
& 0x000000FF);
1033 t
= (t1
& 0x000000FF)<<8 | (t2
& 0x000000FF);
1045 uint32_t startContCE
;
1047 uint32_t limitContCE
;
1048 } indirectBoundaries
;
1050 /* these values are used for finding CE values for indirect positioning. */
1051 /* Indirect positioning is a mechanism for allowing resets on symbolic */
1052 /* values. It only works for resets and you cannot tailor indirect names */
1053 /* An indirect name can define either an anchor point or a range. An */
1054 /* anchor point behaves in exactly the same way as a code point in reset */
1055 /* would, except that it cannot be tailored. A range (we currently only */
1056 /* know for the [top] range will explicitly set the upper bound for */
1057 /* generated CEs, thus allowing for better control over how many CEs can */
1058 /* be squeezed between in the range without performance penalty. */
1059 /* In that respect, we use [top] for tailoring of locales that use CJK */
1060 /* characters. Other indirect values are currently a pure convenience, */
1061 /* they can be used to assure that the CEs will be always positioned in */
1062 /* the same place relative to a point with known properties (e.g. first */
1063 /* primary ignorable). */
1064 static indirectBoundaries ucolIndirectBoundaries
[15];
1065 static UBool indirectBoundariesSet
= FALSE
;
1066 static void setIndirectBoundaries(uint32_t indexR
, uint32_t *start
, uint32_t *end
) {
1067 /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1068 /* to initalize here. */
1069 ucolIndirectBoundaries
[indexR
].startCE
= start
[0];
1070 ucolIndirectBoundaries
[indexR
].startContCE
= start
[1];
1072 ucolIndirectBoundaries
[indexR
].limitCE
= end
[0];
1073 ucolIndirectBoundaries
[indexR
].limitContCE
= end
[1];
1075 ucolIndirectBoundaries
[indexR
].limitCE
= 0;
1076 ucolIndirectBoundaries
[indexR
].limitContCE
= 0;
1080 static void testCEs(UCollator
*coll
, UErrorCode
*status
) {
1081 const UChar
*rules
= NULL
, *current
= NULL
;
1082 int32_t ruleLen
= 0;
1084 uint32_t strength
= 0;
1085 uint32_t maxStrength
= UCOL_IDENTICAL
;
1086 uint32_t baseCE
, baseContCE
, nextCE
, nextContCE
, currCE
, currContCE
;
1088 uint32_t lastContCE
;
1091 uint32_t chOffset
= 0; uint32_t chLen
= 0;
1092 uint32_t exOffset
= 0; uint32_t exLen
= 0;
1093 uint32_t prefixOffset
= 0; uint32_t prefixLen
= 0;
1094 uint32_t oldOffset
= 0;
1096 /* uint32_t rExpsLen = 0; */
1097 /* uint32_t firstLen = 0; */
1099 UBool varT
= FALSE
; UBool top_
= TRUE
;
1100 UBool startOfRules
= TRUE
;
1101 UBool before
= FALSE
;
1102 UColTokenParser src
;
1104 UParseError parseError
;
1105 UChar
*rulesCopy
= NULL
;
1106 collIterate
*c
= uprv_new_collIterate(status
);
1107 UCAConstants
*consts
= NULL
;
1108 uint32_t UCOL_RESET_TOP_VALUE
, /*UCOL_RESET_TOP_CONT, */
1109 UCOL_NEXT_TOP_VALUE
, UCOL_NEXT_TOP_CONT
;
1111 UCollator
*UCA
= ucol_open("root", status
);
1113 if (U_FAILURE(*status
)) {
1114 log_err("Could not open root collator %s\n", u_errorName(*status
));
1115 uprv_delete_collIterate(c
);
1119 colLoc
= ucol_getLocaleByType(coll
, ULOC_ACTUAL_LOCALE
, status
);
1120 if (U_FAILURE(*status
)) {
1121 log_err("Could not get collator name: %s\n", u_errorName(*status
));
1123 uprv_delete_collIterate(c
);
1127 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
1129 consts
= (UCAConstants
*)((uint8_t *)UCA
->image
+ UCA
->image
->UCAConsts
);
1130 UCOL_RESET_TOP_VALUE
= consts
->UCA_LAST_NON_VARIABLE
[0];
1131 /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1132 UCOL_NEXT_TOP_VALUE
= consts
->UCA_FIRST_IMPLICIT
[0];
1133 UCOL_NEXT_TOP_CONT
= consts
->UCA_FIRST_IMPLICIT
[1];
1135 baseCE
=baseContCE
=nextCE
=nextContCE
=currCE
=currContCE
=lastCE
=lastContCE
= UCOL_NOT_FOUND
;
1139 rules
= ucol_getRules(coll
, &ruleLen
);
1141 src
.invUCA
= ucol_initInverseUCA(status
);
1143 if(indirectBoundariesSet
== FALSE
) {
1144 /* UCOL_RESET_TOP_VALUE */
1145 setIndirectBoundaries(0, consts
->UCA_LAST_NON_VARIABLE
, consts
->UCA_FIRST_IMPLICIT
);
1146 /* UCOL_FIRST_PRIMARY_IGNORABLE */
1147 setIndirectBoundaries(1, consts
->UCA_FIRST_PRIMARY_IGNORABLE
, 0);
1148 /* UCOL_LAST_PRIMARY_IGNORABLE */
1149 setIndirectBoundaries(2, consts
->UCA_LAST_PRIMARY_IGNORABLE
, 0);
1150 /* UCOL_FIRST_SECONDARY_IGNORABLE */
1151 setIndirectBoundaries(3, consts
->UCA_FIRST_SECONDARY_IGNORABLE
, 0);
1152 /* UCOL_LAST_SECONDARY_IGNORABLE */
1153 setIndirectBoundaries(4, consts
->UCA_LAST_SECONDARY_IGNORABLE
, 0);
1154 /* UCOL_FIRST_TERTIARY_IGNORABLE */
1155 setIndirectBoundaries(5, consts
->UCA_FIRST_TERTIARY_IGNORABLE
, 0);
1156 /* UCOL_LAST_TERTIARY_IGNORABLE */
1157 setIndirectBoundaries(6, consts
->UCA_LAST_TERTIARY_IGNORABLE
, 0);
1158 /* UCOL_FIRST_VARIABLE */
1159 setIndirectBoundaries(7, consts
->UCA_FIRST_VARIABLE
, 0);
1160 /* UCOL_LAST_VARIABLE */
1161 setIndirectBoundaries(8, consts
->UCA_LAST_VARIABLE
, 0);
1162 /* UCOL_FIRST_NON_VARIABLE */
1163 setIndirectBoundaries(9, consts
->UCA_FIRST_NON_VARIABLE
, 0);
1164 /* UCOL_LAST_NON_VARIABLE */
1165 setIndirectBoundaries(10, consts
->UCA_LAST_NON_VARIABLE
, consts
->UCA_FIRST_IMPLICIT
);
1166 /* UCOL_FIRST_IMPLICIT */
1167 setIndirectBoundaries(11, consts
->UCA_FIRST_IMPLICIT
, 0);
1168 /* UCOL_LAST_IMPLICIT */
1169 setIndirectBoundaries(12, consts
->UCA_LAST_IMPLICIT
, consts
->UCA_FIRST_TRAILING
);
1170 /* UCOL_FIRST_TRAILING */
1171 setIndirectBoundaries(13, consts
->UCA_FIRST_TRAILING
, 0);
1172 /* UCOL_LAST_TRAILING */
1173 setIndirectBoundaries(14, consts
->UCA_LAST_TRAILING
, 0);
1174 ucolIndirectBoundaries
[14].limitCE
= (consts
->UCA_PRIMARY_SPECIAL_MIN
<<24);
1175 indirectBoundariesSet
= TRUE
;
1179 if(U_SUCCESS(*status
) && ruleLen
> 0) {
1180 rulesCopy
= (UChar
*)uprv_malloc((ruleLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
1181 uprv_memcpy(rulesCopy
, rules
, ruleLen
*sizeof(UChar
));
1182 src
.current
= src
.source
= rulesCopy
;
1183 src
.end
= rulesCopy
+ruleLen
;
1184 src
.extraCurrent
= src
.end
;
1185 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1187 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1188 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1189 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,status
)) != NULL
) {
1190 strength
= src
.parsedToken
.strength
;
1191 chOffset
= src
.parsedToken
.charsOffset
;
1192 chLen
= src
.parsedToken
.charsLen
;
1193 exOffset
= src
.parsedToken
.extensionOffset
;
1194 exLen
= src
.parsedToken
.extensionLen
;
1195 prefixOffset
= src
.parsedToken
.prefixOffset
;
1196 prefixLen
= src
.parsedToken
.prefixLen
;
1197 specs
= src
.parsedToken
.flags
;
1199 startOfRules
= FALSE
;
1200 varT
= (UBool
)((specs
& UCOL_TOK_VARIABLE_TOP
) != 0);
1201 top_
= (UBool
)((specs
& UCOL_TOK_TOP
) != 0);
1203 uprv_init_collIterate(coll
, src
.source
+chOffset
, chLen
, c
, status
);
1205 currCE
= ucol_getNextCE(coll
, c
, status
);
1206 if(currCE
== 0 && UCOL_ISTHAIPREVOWEL(*(src
.source
+chOffset
))) {
1207 log_verbose("Thai prevowel detected. Will pick next CE\n");
1208 currCE
= ucol_getNextCE(coll
, c
, status
);
1211 currContCE
= ucol_getNextCE(coll
, c
, status
);
1212 if(!isContinuation(currContCE
)) {
1216 /* we need to repack CEs here */
1218 if(strength
== UCOL_TOK_RESET
) {
1219 before
= (UBool
)((specs
& UCOL_TOK_BEFORE
) != 0);
1221 int32_t tokenIndex
= src
.parsedToken
.indirectIndex
;
1223 nextCE
= baseCE
= currCE
= ucolIndirectBoundaries
[tokenIndex
].startCE
;
1224 nextContCE
= baseContCE
= currContCE
= ucolIndirectBoundaries
[tokenIndex
].startContCE
;
1226 nextCE
= baseCE
= currCE
;
1227 nextContCE
= baseContCE
= currContCE
;
1229 maxStrength
= UCOL_IDENTICAL
;
1231 if(strength
< maxStrength
) {
1232 maxStrength
= strength
;
1233 if(baseCE
== UCOL_RESET_TOP_VALUE
) {
1234 log_verbose("Resetting to [top]\n");
1235 nextCE
= UCOL_NEXT_TOP_VALUE
;
1236 nextContCE
= UCOL_NEXT_TOP_CONT
;
1238 result
= ucol_inv_getNextCE(&src
, baseCE
& 0xFFFFFF3F, baseContCE
, &nextCE
, &nextContCE
, maxStrength
);
1241 if(ucol_isTailored(coll
, *(src
.source
+oldOffset
), status
)) {
1242 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src
.source
+oldOffset
));
1245 log_err("%s: couldn't find the CE\n", colLoc
);
1251 currCE
&= 0xFFFFFF3F;
1252 currContCE
&= 0xFFFFFFBF;
1254 if(maxStrength
== UCOL_IDENTICAL
) {
1255 if(baseCE
!= currCE
|| baseContCE
!= currContCE
) {
1256 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc
);
1259 if(strength
== UCOL_IDENTICAL
) {
1260 if(lastCE
!= currCE
|| lastContCE
!= currContCE
) {
1261 log_err("%s: current CE (initial strength UCOL_EQUAL)\n", colLoc
);
1264 if(compareCEs(currCE
, currContCE
, nextCE
, nextContCE
) > 0) {
1265 /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1266 log_err("%s: current CE is not less than base CE\n", colLoc
);
1269 if(compareCEs(currCE
, currContCE
, lastCE
, lastContCE
) < 0) {
1270 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1271 log_err("%s: sequence of generated CEs is broken\n", colLoc
);
1275 if(compareCEs(currCE
, currContCE
, lastCE
, lastContCE
) > 0) {
1276 /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1277 log_err("%s: sequence of generated CEs is broken\n", colLoc
);
1285 oldOffset
= chOffset
;
1286 lastCE
= currCE
& 0xFFFFFF3F;
1287 lastContCE
= currContCE
& 0xFFFFFFBF;
1289 uprv_free(src
.source
);
1292 uprv_delete_collIterate(c
);
1296 /* these locales are now picked from index RB */
1297 static const char* localesToTest
[] = {
1298 "ar", "bg", "ca", "cs", "da",
1299 "el", "en_BE", "en_US_POSIX",
1300 "es", "et", "fi", "fr", "hi",
1301 "hr", "hu", "is", "iw", "ja",
1302 "ko", "lt", "lv", "mk", "mt",
1303 "nb", "nn", "nn_NO", "pl", "ro",
1304 "ru", "sh", "sk", "sl", "sq",
1305 "sr", "sv", "th", "tr", "uk",
1310 static const char* rulesToTest
[] = {
1312 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1314 /* Cui Mins rules */
1315 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1316 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1317 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1318 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1319 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1320 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1321 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1325 static void TestCollations(void) {
1326 int32_t noOfLoc
= uloc_countAvailable();
1327 int32_t i
= 0, j
= 0;
1329 UErrorCode status
= U_ZERO_ERROR
;
1335 const char *locName
= NULL
;
1336 UCollator
*coll
= NULL
;
1337 UCollator
*UCA
= ucol_open("", &status
);
1338 UColAttributeValue oldStrength
= ucol_getAttribute(UCA
, UCOL_STRENGTH
, &status
);
1339 if (U_FAILURE(status
)) {
1340 log_err_status(status
, "Could not open UCA collator %s\n", u_errorName(status
));
1343 ucol_setAttribute(UCA
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
);
1345 for(i
= 0; i
<noOfLoc
; i
++) {
1346 status
= U_ZERO_ERROR
;
1347 locName
= uloc_getAvailable(i
);
1348 if(uprv_strcmp("ja", locName
) == 0) {
1349 log_verbose("Don't know how to test prefixes\n");
1352 if(hasCollationElements(locName
)) {
1353 nameSize
= uloc_getDisplayName(locName
, NULL
, name
, 256, &status
);
1354 for(j
= 0; j
<nameSize
; j
++) {
1355 cName
[j
] = (char)name
[j
];
1357 cName
[nameSize
] = 0;
1358 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
1359 coll
= ucol_open(locName
, &status
);
1360 if(U_SUCCESS(status
)) {
1361 testAgainstUCA(coll
, UCA
, "UCA", FALSE
, &status
);
1364 log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName
, u_errorName(status
));
1365 status
= U_ZERO_ERROR
;
1369 ucol_setAttribute(UCA
, UCOL_STRENGTH
, oldStrength
, &status
);
1373 static void RamsRulesTest(void) {
1374 UErrorCode status
= U_ZERO_ERROR
;
1376 UCollator
*coll
= NULL
;
1379 int32_t noOfLoc
= uloc_countAvailable();
1380 const char *locName
= NULL
;
1382 log_verbose("RamsRulesTest\n");
1384 if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1385 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1389 for(i
= 0; i
<noOfLoc
; i
++) {
1390 locName
= uloc_getAvailable(i
);
1391 if(hasCollationElements(locName
)) {
1392 if (uprv_strcmp("ja", locName
)==0) {
1393 log_verbose("Don't know how to test Japanese because of prefixes\n");
1396 if (uprv_strcmp("de__PHONEBOOK", locName
)==0) {
1397 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1400 if (uprv_strcmp("bn", locName
)==0 ||
1401 uprv_strcmp("en_US_POSIX", locName
)==0 ||
1402 uprv_strcmp("km", locName
)==0 ||
1403 uprv_strcmp("km_KH", locName
)==0 ||
1404 uprv_strcmp("my", locName
)==0 ||
1405 uprv_strcmp("si", locName
)==0 ||
1406 uprv_strcmp("si_LK", locName
)==0 ||
1407 uprv_strcmp("zh", locName
)==0 ||
1408 uprv_strcmp("zh_Hant", locName
)==0
1410 log_verbose("Don't know how to test %s. "
1411 "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName
);
1414 log_verbose("Testing locale %s\n", locName
);
1415 status
= U_ZERO_ERROR
;
1416 coll
= ucol_open(locName
, &status
);
1417 if(U_SUCCESS(status
)) {
1418 if((status
!= U_USING_DEFAULT_WARNING
) && (status
!= U_USING_FALLBACK_WARNING
)) {
1419 if(coll
->image
->jamoSpecial
== TRUE
) {
1420 log_err("%s has special JAMOs\n", locName
);
1422 ucol_setAttribute(coll
, UCOL_CASE_FIRST
, UCOL_OFF
, &status
);
1423 testCollator(coll
, &status
);
1424 testCEs(coll
, &status
);
1426 log_verbose("Skipping %s: %s\n", locName
, u_errorName(status
));
1430 log_err("Could not open %s: %s\n", locName
, u_errorName(status
));
1435 for(i
= 0; i
<sizeof(rulesToTest
)/sizeof(rulesToTest
[0]); i
++) {
1436 log_verbose("Testing rule: %s\n", rulesToTest
[i
]);
1437 ruleLen
= u_unescape(rulesToTest
[i
], rule
, 2048);
1438 status
= U_ZERO_ERROR
;
1439 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1440 if(U_SUCCESS(status
)) {
1441 testCollator(coll
, &status
);
1442 testCEs(coll
, &status
);
1445 log_err_status(status
, "Could not test rule: %s: '%s'\n", u_errorName(status
), rulesToTest
[i
]);
1451 static void IsTailoredTest(void) {
1452 UErrorCode status
= U_ZERO_ERROR
;
1454 UCollator
*coll
= NULL
;
1456 UChar tailored
[2048];
1457 UChar notTailored
[2048];
1458 uint32_t ruleLen
, tailoredLen
, notTailoredLen
;
1460 log_verbose("IsTailoredTest\n");
1462 u_uastrcpy(rule
, "&Z < A, B, C;c < d");
1463 ruleLen
= u_strlen(rule
);
1465 u_uastrcpy(tailored
, "ABCcd");
1466 tailoredLen
= u_strlen(tailored
);
1468 u_uastrcpy(notTailored
, "ZabD");
1469 notTailoredLen
= u_strlen(notTailored
);
1471 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1472 if(U_SUCCESS(status
)) {
1473 for(i
= 0; i
<tailoredLen
; i
++) {
1474 if(!ucol_isTailored(coll
, tailored
[i
], &status
)) {
1475 log_err("%i: %04X should be tailored - it is reported as not\n", i
, tailored
[i
]);
1478 for(i
= 0; i
<notTailoredLen
; i
++) {
1479 if(ucol_isTailored(coll
, notTailored
[i
], &status
)) {
1480 log_err("%i: %04X should not be tailored - it is reported as it is\n", i
, notTailored
[i
]);
1486 log_err_status(status
, "Can't tailor rules\n");
1489 status
= U_ZERO_ERROR
;
1490 coll
= ucol_open("ja", &status
);
1491 if(!ucol_isTailored(coll
, 0x4E9C, &status
)) {
1492 log_err_status(status
, "0x4E9C should be tailored - it is reported as not\n");
1498 const static char chTest
[][20] = {
1501 "ca", "cb", "cx", "cy", "CZ",
1502 "c\\u030C", "C\\u030C",
1505 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1506 "ch", "cH", "Ch", "CH",
1507 "cha", "charly", "che", "chh", "chch", "chr",
1510 "r\\u030C", "R\\u030C",
1513 "s\\u030C", "S\\u030C",
1515 "z\\u030C", "Z\\u030C"
1518 static void TestChMove(void) {
1519 UChar t1
[256] = {0};
1520 UChar t2
[256] = {0};
1522 uint32_t i
= 0, j
= 0;
1524 UErrorCode status
= U_ZERO_ERROR
;
1526 UCollator
*coll
= ucol_open("cs", &status
);
1528 if(U_SUCCESS(status
)) {
1529 size
= sizeof(chTest
)/sizeof(chTest
[0]);
1530 for(i
= 0; i
< size
-1; i
++) {
1531 for(j
= i
+1; j
< size
; j
++) {
1532 u_unescape(chTest
[i
], t1
, 256);
1533 u_unescape(chTest
[j
], t2
, 256);
1534 doTest(coll
, t1
, t2
, UCOL_LESS
);
1539 log_data_err("Can't open collator");
1547 const static char impTest
[][20] = {
1557 static void TestImplicitTailoring(void) {
1558 static const struct {
1560 const char *data
[10];
1563 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1564 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1565 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1566 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1571 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
1572 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
1576 UChar t1[256] = {0};
1577 UChar t2[256] = {0};
1579 const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1581 uint32_t i = 0, j = 0;
1583 uint32_t ruleLen = 0;
1584 UErrorCode status = U_ZERO_ERROR;
1585 UCollator *coll = NULL;
1586 ruleLen = u_unescape(rule, t1, 256);
1588 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1590 if(U_SUCCESS(status)) {
1591 size = sizeof(impTest)/sizeof(impTest[0]);
1592 for(i = 0; i < size-1; i++) {
1593 for(j = i+1; j < size; j++) {
1594 u_unescape(impTest[i], t1, 256);
1595 u_unescape(impTest[j], t2, 256);
1596 doTest(coll, t1, t2, UCOL_LESS);
1601 log_err("Can't open collator");
1607 static void TestFCDProblem(void) {
1608 UChar t1
[256] = {0};
1609 UChar t2
[256] = {0};
1611 const char *s1
= "\\u0430\\u0306\\u0325";
1612 const char *s2
= "\\u04D1\\u0325";
1614 UErrorCode status
= U_ZERO_ERROR
;
1615 UCollator
*coll
= ucol_open("", &status
);
1616 u_unescape(s1
, t1
, 256);
1617 u_unescape(s2
, t2
, 256);
1619 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
1620 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
1622 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
1623 doTest(coll
, t1
, t2
, UCOL_EQUAL
);
1629 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1630 We're only using NFC/NFD in this test.
1632 #define NORM_BUFFER_TEST_LEN 18
1635 UChar NFC
[NORM_BUFFER_TEST_LEN
];
1636 UChar NFD
[NORM_BUFFER_TEST_LEN
];
1639 static void TestComposeDecompose(void) {
1640 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1641 static const UChar UNICODESET_STR
[] = {
1642 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1643 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1644 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1647 int32_t i
= 0, j
= 0;
1649 UErrorCode status
= U_ZERO_ERROR
;
1650 const char *locName
= NULL
;
1654 uint32_t noCases
= 0;
1655 UCollator
*coll
= NULL
;
1657 UChar comp
[NORM_BUFFER_TEST_LEN
];
1659 UCollationElements
*iter
;
1660 USet
*charsToTest
= uset_openPattern(UNICODESET_STR
, -1, &status
);
1661 int32_t charsToTestSize
;
1663 noOfLoc
= uloc_countAvailable();
1665 coll
= ucol_open("", &status
);
1666 if (U_FAILURE(status
)) {
1667 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status
));
1670 charsToTestSize
= uset_size(charsToTest
);
1671 if (charsToTestSize
<= 0) {
1672 log_err("Set was zero. Missing data?\n");
1675 t
= (tester
**)malloc(charsToTestSize
* sizeof(tester
*));
1676 t
[0] = (tester
*)malloc(sizeof(tester
));
1677 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize
);
1679 for(u
= 0; u
< charsToTestSize
; u
++) {
1680 UChar32 ch
= uset_charAt(charsToTest
, u
);
1682 U16_APPEND_UNSAFE(comp
, len
, ch
);
1683 nfcSize
= unorm_normalize(comp
, len
, UNORM_NFC
, 0, t
[noCases
]->NFC
, NORM_BUFFER_TEST_LEN
, &status
);
1684 nfdSize
= unorm_normalize(comp
, len
, UNORM_NFD
, 0, t
[noCases
]->NFD
, NORM_BUFFER_TEST_LEN
, &status
);
1686 if(nfcSize
!= nfdSize
|| (uprv_memcmp(t
[noCases
]->NFC
, t
[noCases
]->NFD
, nfcSize
* sizeof(UChar
)) != 0)
1687 || (len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0))) {
1689 if(len
!= nfdSize
|| (uprv_memcmp(comp
, t
[noCases
]->NFD
, nfdSize
* sizeof(UChar
)) != 0)) {
1690 u_strncpy(t
[noCases
]->NFC
, comp
, len
);
1691 t
[noCases
]->NFC
[len
] = 0;
1694 t
[noCases
] = (tester
*)malloc(sizeof(tester
));
1695 uprv_memset(t
[noCases
], 0, sizeof(tester
));
1698 log_verbose("Testing %d/%d of possible test cases\n", noCases
, charsToTestSize
);
1699 uset_close(charsToTest
);
1702 for(u
=0; u
<(UChar32
)noCases
; u
++) {
1703 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
1704 log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t
[u
]->u
);
1705 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
1709 for(u = 0; u < charsToTestSize; u++) {
1711 log_verbose("%08X ", u);
1713 uprv_memset(t[noCases], 0, sizeof(tester));
1716 U16_APPEND_UNSAFE(comp, len, u);
1718 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1719 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1720 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1721 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1727 log_verbose("Testing locales, number of cases = %i\n", noCases
);
1728 for(i
= 0; i
<noOfLoc
; i
++) {
1729 status
= U_ZERO_ERROR
;
1730 locName
= uloc_getAvailable(i
);
1731 if(hasCollationElements(locName
)) {
1734 int32_t nameSize
= uloc_getDisplayName(locName
, NULL
, name
, sizeof(cName
), &status
);
1736 for(j
= 0; j
<nameSize
; j
++) {
1737 cName
[j
] = (char)name
[j
];
1739 cName
[nameSize
] = 0;
1740 log_verbose("\nTesting locale %s (%s)\n", locName
, cName
);
1742 coll
= ucol_open(locName
, &status
);
1743 ucol_setStrength(coll
, UCOL_IDENTICAL
);
1744 iter
= ucol_openElements(coll
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
1746 for(u
=0; u
<(UChar32
)noCases
; u
++) {
1747 if(!ucol_equal(coll
, t
[u
]->NFC
, -1, t
[u
]->NFD
, -1)) {
1748 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t
[u
]->u
, cName
);
1749 doTest(coll
, t
[u
]->NFC
, t
[u
]->NFD
, UCOL_EQUAL
);
1750 log_verbose("Testing NFC\n");
1751 ucol_setText(iter
, t
[u
]->NFC
, u_strlen(t
[u
]->NFC
), &status
);
1753 log_verbose("Testing NFD\n");
1754 ucol_setText(iter
, t
[u
]->NFD
, u_strlen(t
[u
]->NFD
), &status
);
1758 ucol_closeElements(iter
);
1762 for(u
= 0; u
<= (UChar32
)noCases
; u
++) {
1768 static void TestEmptyRule(void) {
1769 UErrorCode status
= U_ZERO_ERROR
;
1770 UChar rulez
[] = { 0 };
1771 UCollator
*coll
= ucol_openRules(rulez
, 0, UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
1776 static void TestUCARules(void) {
1777 UErrorCode status
= U_ZERO_ERROR
;
1780 uint32_t ruleLen
= 0;
1781 UCollator
*UCAfromRules
= NULL
;
1782 UCollator
*coll
= ucol_open("", &status
);
1783 if(status
== U_FILE_ACCESS_ERROR
) {
1784 log_data_err("Is your data around?\n");
1786 } else if(U_FAILURE(status
)) {
1787 log_err("Error opening collator\n");
1790 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, 256);
1792 log_verbose("TestUCARules\n");
1794 rules
= (UChar
*)malloc((ruleLen
+1)*sizeof(UChar
));
1795 ruleLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rules
, ruleLen
);
1797 log_verbose("Rules length is %d\n", ruleLen
);
1798 UCAfromRules
= ucol_openRules(rules
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
1799 if(U_SUCCESS(status
)) {
1800 ucol_close(UCAfromRules
);
1802 log_verbose("Unable to create a collator from UCARules!\n");
1805 u_unescape(blah, b, 256);
1806 ucol_getSortKey(coll, b, 1, res, 256);
1815 /* Pinyin tonal order */
1817 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1818 (w/macron)< (w/acute)< (w/caron)< (w/grave)
1819 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1820 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1821 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1822 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1823 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1826 However, in testing we got the following order:
1827 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1828 (w/acute)< (w/grave)< (w/caron)< (w/macron)
1829 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1831 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1832 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1833 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1835 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1838 static void TestBefore(void) {
1839 const static char *data
[] = {
1840 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1841 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1842 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1843 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1844 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1845 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1847 genericRulesStarter(
1848 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1849 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1850 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1851 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1852 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1853 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1854 data
, sizeof(data
)/sizeof(data
[0]));
1858 /* superceded by TestBeforePinyin */
1859 static void TestJ784(void) {
1860 const static char *data
[] = {
1861 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1862 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1863 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1864 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1865 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1867 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1869 genericLocaleStarter("zh", data
, sizeof(data
)/sizeof(data
[0]));
1874 /* superceded by the changes to the lv locale */
1875 static void TestJ831(void) {
1876 const static char *data
[] = {
1882 genericLocaleStarter("lv", data
, sizeof(data
)/sizeof(data
[0]));
1886 static void TestJ815(void) {
1887 const static char *data
[] = {
1903 genericLocaleStarter("fr", data
, sizeof(data
)/sizeof(data
[0]));
1904 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data
, sizeof(data
)/sizeof(data
[0]));
1909 "& a < b < c < d& r < c", "& a < b < d& r < c",
1910 "& a < b < c < d& c < m", "& a < b < c < m < d",
1911 "& a < b < c < d& a < m", "& a < m < b < c < d",
1912 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",
1913 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d",
1914 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d <<< x <<< e",
1915 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",
1916 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",
1917 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d <<< e <<< f < x < g",
1919 static void TestRedundantRules(void) {
1922 static const struct {
1924 const char *expectedRules
;
1925 const char *testdata
[8];
1926 uint32_t testdatalen
;
1928 /* this test conflicts with positioning of CODAN placeholder */
1930 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1934 /* this test conflicts with the [before x] syntax tightening */
1936 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1940 /* this test conflicts with the [before x] syntax tightening */
1942 "& a < b <<< c << d <<< e& [before 1] e <<< x",
1943 "& a <<< x < b <<< c << d <<< e",
1944 {"a", "x", "b", "c", "d", "e"}, 6
1947 "& a < b < c < d& [before 1] c < m",
1948 "& a < b < m < c < d",
1949 {"a", "b", "m", "c", "d"}, 5
1952 "& a < b <<< c << d <<< e& [before 3] e <<< x",
1953 "& a < b <<< c << d <<< x <<< e",
1954 {"a", "b", "c", "d", "x", "e"}, 6
1956 /* this test conflicts with the [before x] syntax tightening */
1958 "& a < b <<< c << d <<< e& [before 2] e <<< x",
1959 "& a < b <<< c <<< x << d <<< e",
1960 {"a", "b", "c", "x", "d", "e"},, 6
1963 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1964 "& a < b <<< c << d <<< e <<< f < x < g",
1965 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1968 "& a <<< b << c < d& a < m",
1969 "& a <<< b << c < m < d",
1970 {"a", "b", "c", "m", "d"}, 5
1973 "&a<b<<b\\u0301 &z<b",
1975 {"a", "b\\u0301", "z", "b"}, 4
1988 "& a < b < c < d& r < c",
1989 "& a < b < d& r < c",
1993 "& a < b < c < d& r < c",
1994 "& a < b < d& r < c",
1998 "& a < b < c < d& c < m",
1999 "& a < b < c < m < d",
2000 {"a", "b", "c", "m", "d"}, 5
2003 "& a < b < c < d& a < m",
2004 "& a < m < b < c < d",
2005 {"a", "m", "b", "c", "d"}, 5
2010 UCollator
*credundant
= NULL
;
2011 UCollator
*cresulting
= NULL
;
2012 UErrorCode status
= U_ZERO_ERROR
;
2013 UChar rlz
[2048] = { 0 };
2016 for(i
= 0; i
<sizeof(tests
)/sizeof(tests
[0]); i
++) {
2017 log_verbose("testing rule %s, expected to be %s\n", tests
[i
].rules
, tests
[i
].expectedRules
);
2018 rlen
= u_unescape(tests
[i
].rules
, rlz
, 2048);
2020 credundant
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
2021 if(status
== U_FILE_ACCESS_ERROR
) {
2022 log_data_err("Is your data around?\n");
2024 } else if(U_FAILURE(status
)) {
2025 log_err("Error opening collator\n");
2029 rlen
= u_unescape(tests
[i
].expectedRules
, rlz
, 2048);
2030 cresulting
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
2032 testAgainstUCA(cresulting
, credundant
, "expected", TRUE
, &status
);
2034 ucol_close(credundant
);
2035 ucol_close(cresulting
);
2037 log_verbose("testing using data\n");
2039 genericRulesStarter(tests
[i
].rules
, tests
[i
].testdata
, tests
[i
].testdatalen
);
2044 static void TestExpansionSyntax(void) {
2047 const static char *rules
[] = {
2048 "&AE <<< a << b <<< c &d <<< f",
2049 "&AE <<< a <<< b << c << d < e < f <<< g",
2050 "&AE <<< B <<< C / D <<< F"
2053 const static char *expectedRules
[] = {
2054 "&A <<< a / E << b / E <<< c /E &d <<< f",
2055 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2056 "&A <<< B / E <<< C / ED <<< F / E"
2059 const static char *testdata
[][8] = {
2060 {"AE", "a", "b", "c"},
2061 {"AE", "a", "b", "c", "d", "e", "f", "g"},
2062 {"AE", "B", "C"} /* / ED <<< F / E"},*/
2065 const static uint32_t testdatalen
[] = {
2073 UCollator
*credundant
= NULL
;
2074 UCollator
*cresulting
= NULL
;
2075 UErrorCode status
= U_ZERO_ERROR
;
2076 UChar rlz
[2048] = { 0 };
2079 for(i
= 0; i
<sizeof(rules
)/sizeof(rules
[0]); i
++) {
2080 log_verbose("testing rule %s, expected to be %s\n", rules
[i
], expectedRules
[i
]);
2081 rlen
= u_unescape(rules
[i
], rlz
, 2048);
2083 credundant
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
2084 if(status
== U_FILE_ACCESS_ERROR
) {
2085 log_data_err("Is your data around?\n");
2087 } else if(U_FAILURE(status
)) {
2088 log_err("Error opening collator\n");
2091 rlen
= u_unescape(expectedRules
[i
], rlz
, 2048);
2092 cresulting
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
,&status
);
2094 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2095 /* as a hard error test, but only in information mode */
2096 testAgainstUCA(cresulting
, credundant
, "expected", FALSE
, &status
);
2098 ucol_close(credundant
);
2099 ucol_close(cresulting
);
2101 log_verbose("testing using data\n");
2103 genericRulesStarter(rules
[i
], testdata
[i
], testdatalen
[i
]);
2107 static void TestCase(void)
2109 const static UChar gRules
[MAX_TOKEN_LEN
] =
2110 /*" & 0 < 1,\u2461<a,A"*/
2111 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2113 const static UChar testCase
[][MAX_TOKEN_LEN
] =
2115 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2116 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2117 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2118 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2121 const static UCollationResult caseTestResults
[][9] =
2123 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
2124 { UCOL_GREATER
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
},
2125 { UCOL_LESS
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_GREATER
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_LESS
},
2126 { UCOL_GREATER
, UCOL_LESS
, UCOL_GREATER
, UCOL_EQUAL
, UCOL_LESS
, UCOL_LESS
, UCOL_EQUAL
, UCOL_EQUAL
, UCOL_GREATER
}
2129 const static UColAttributeValue caseTestAttributes
[][2] =
2131 { UCOL_LOWER_FIRST
, UCOL_OFF
},
2132 { UCOL_UPPER_FIRST
, UCOL_OFF
},
2133 { UCOL_LOWER_FIRST
, UCOL_ON
},
2134 { UCOL_UPPER_FIRST
, UCOL_ON
}
2137 UErrorCode status
= U_ZERO_ERROR
;
2138 UCollationElements
*iter
;
2139 UCollator
*myCollation
;
2140 myCollation
= ucol_open("en_US", &status
);
2142 if(U_FAILURE(status
)){
2143 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
2146 log_verbose("Testing different case settings\n");
2147 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
2149 for(k
= 0; k
<4; k
++) {
2150 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
2151 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
2152 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes
[k
][0], caseTestAttributes
[k
][1]);
2153 for (i
= 0; i
< 3 ; i
++) {
2154 for(j
= i
+1; j
<4; j
++) {
2155 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
2159 ucol_close(myCollation
);
2161 myCollation
= ucol_openRules(gRules
, u_strlen(gRules
), UCOL_OFF
, UCOL_TERTIARY
,NULL
, &status
);
2162 if(U_FAILURE(status
)){
2163 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
2166 log_verbose("Testing different case settings with custom rules\n");
2167 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
2169 for(k
= 0; k
<4; k
++) {
2170 ucol_setAttribute(myCollation
, UCOL_CASE_FIRST
, caseTestAttributes
[k
][0], &status
);
2171 ucol_setAttribute(myCollation
, UCOL_CASE_LEVEL
, caseTestAttributes
[k
][1], &status
);
2172 for (i
= 0; i
< 3 ; i
++) {
2173 for(j
= i
+1; j
<4; j
++) {
2174 log_verbose("k:%d, i:%d, j:%d\n", k
, i
, j
);
2175 doTest(myCollation
, testCase
[i
], testCase
[j
], caseTestResults
[k
][3*i
+j
-1]);
2176 iter
=ucol_openElements(myCollation
, testCase
[i
], u_strlen(testCase
[i
]), &status
);
2178 ucol_closeElements(iter
);
2179 iter
=ucol_openElements(myCollation
, testCase
[j
], u_strlen(testCase
[j
]), &status
);
2181 ucol_closeElements(iter
);
2185 ucol_close(myCollation
);
2187 const static char *lowerFirst
[] = {
2203 const static char *upperFirst
[] = {
2218 log_verbose("mixed case test\n");
2219 log_verbose("lower first, case level off\n");
2220 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst
, sizeof(lowerFirst
)/sizeof(lowerFirst
[0]));
2221 log_verbose("upper first, case level off\n");
2222 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst
, sizeof(upperFirst
)/sizeof(upperFirst
[0]));
2223 log_verbose("lower first, case level on\n");
2224 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst
, sizeof(lowerFirst
)/sizeof(lowerFirst
[0]));
2225 log_verbose("upper first, case level on\n");
2226 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst
, sizeof(upperFirst
)/sizeof(upperFirst
[0]));
2231 static void TestIncrementalNormalize(void) {
2233 /*UChar baseA =0x61;*/
2235 /* UChar baseB = 0x42;*/
2236 static const UChar ccMix
[] = {0x316, 0x321, 0x300};
2237 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/
2239 0x316 is combining grave accent below, cc=220
2240 0x321 is combining palatalized hook below, cc=202
2241 0x300 is combining grave accent, cc=230
2244 #define MAXSLEN 2000
2245 /*int maxSLen = 64000;*/
2250 UErrorCode status
= U_ZERO_ERROR
;
2251 UCollationResult result
;
2253 int32_t myQ
= getTestOption(QUICK_OPTION
);
2255 if(getTestOption(QUICK_OPTION
) < 0) {
2256 setTestOption(QUICK_OPTION
, 1);
2260 /* Test 1. Run very long unnormalized strings, to force overflow of*/
2261 /* most buffers along the way.*/
2262 UChar strA
[MAXSLEN
+1];
2263 UChar strB
[MAXSLEN
+1];
2265 coll
= ucol_open("en_US", &status
);
2266 if(status
== U_FILE_ACCESS_ERROR
) {
2267 log_data_err("Is your data around?\n");
2269 } else if(U_FAILURE(status
)) {
2270 log_err("Error opening collator\n");
2273 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
2275 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2276 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2277 /*for (sLen = 1000; sLen<1001; sLen++) {*/
2278 for (sLen
= 500; sLen
<501; sLen
++) {
2279 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2282 for (i
=1; i
<=sLen
-1; i
++) {
2283 strA
[i
] = ccMix
[i
% 3];
2284 strB
[sLen
-i
] = ccMix
[i
% 3];
2289 ucol_setStrength(coll
, UCOL_TERTIARY
); /* Do test with default strength, which runs*/
2290 doTest(coll
, strA
, strB
, UCOL_EQUAL
); /* optimized functions in the impl*/
2291 ucol_setStrength(coll
, UCOL_IDENTICAL
); /* Do again with the slow, general impl.*/
2292 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
2296 setTestOption(QUICK_OPTION
, myQ
);
2299 /* Test 2: Non-normal sequence in a string that extends to the last character*/
2300 /* of the string. Checks a couple of edge cases.*/
2303 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0};
2304 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0};
2305 ucol_setStrength(coll
, UCOL_TERTIARY
);
2306 doTest(coll
, strA
, strB
, UCOL_EQUAL
);
2309 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
2313 * test below used a code point from Desseret, which sorts differently
2316 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2317 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2318 static const UChar strB
[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2319 ucol_setStrength(coll
, UCOL_TERTIARY
);
2320 doTest(coll
, strA
, strB
, UCOL_GREATER
);
2323 /* Test 4: Imbedded nulls do not terminate a string when length is specified.*/
2326 static const UChar strA
[] = {0x41, 0x00, 0x42, 0x00};
2327 static const UChar strB
[] = {0x41, 0x00, 0x00, 0x00};
2334 /* there used to be -3 here. Hmmmm.... */
2335 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2336 result
= ucol_strcoll(coll
, strA
, 3, strB
, 3);
2337 if (result
!= UCOL_GREATER
) {
2338 log_err("ERROR 1 in test 4\n");
2340 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2341 if (result
!= UCOL_EQUAL
) {
2342 log_err("ERROR 2 in test 4\n");
2345 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2346 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2347 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2348 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2350 r
= strcmp(sortKeyA
, sortKeyAz
);
2352 log_err("Error 3 in test 4\n");
2354 r
= strcmp(sortKeyA
, sortKeyB
);
2356 log_err("Error 4 in test 4\n");
2358 r
= strcmp(sortKeyAz
, sortKeyBz
);
2360 log_err("Error 5 in test 4\n");
2363 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2364 ucol_getSortKey(coll
, strA
, 3, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2365 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2366 ucol_getSortKey(coll
, strB
, 3, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2367 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2369 r
= strcmp(sortKeyA
, sortKeyAz
);
2371 log_err("Error 6 in test 4\n");
2373 r
= strcmp(sortKeyA
, sortKeyB
);
2375 log_err("Error 7 in test 4\n");
2377 r
= strcmp(sortKeyAz
, sortKeyBz
);
2379 log_err("Error 8 in test 4\n");
2381 ucol_setStrength(coll
, UCOL_TERTIARY
);
2385 /* Test 5: Null characters in non-normal source strings.*/
2388 static const UChar strA
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2389 static const UChar strB
[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2396 result
= ucol_strcoll(coll
, strA
, 6, strB
, 6);
2397 if (result
!= UCOL_GREATER
) {
2398 log_err("ERROR 1 in test 5\n");
2400 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2401 if (result
!= UCOL_EQUAL
) {
2402 log_err("ERROR 2 in test 5\n");
2405 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2406 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2407 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2408 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2410 r
= strcmp(sortKeyA
, sortKeyAz
);
2412 log_err("Error 3 in test 5\n");
2414 r
= strcmp(sortKeyA
, sortKeyB
);
2416 log_err("Error 4 in test 5\n");
2418 r
= strcmp(sortKeyAz
, sortKeyBz
);
2420 log_err("Error 5 in test 5\n");
2423 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2424 ucol_getSortKey(coll
, strA
, 6, (uint8_t *)sortKeyA
, sizeof(sortKeyA
));
2425 ucol_getSortKey(coll
, strA
, -1, (uint8_t *)sortKeyAz
, sizeof(sortKeyAz
));
2426 ucol_getSortKey(coll
, strB
, 6, (uint8_t *)sortKeyB
, sizeof(sortKeyB
));
2427 ucol_getSortKey(coll
, strB
, -1, (uint8_t *)sortKeyBz
, sizeof(sortKeyBz
));
2429 r
= strcmp(sortKeyA
, sortKeyAz
);
2431 log_err("Error 6 in test 5\n");
2433 r
= strcmp(sortKeyA
, sortKeyB
);
2435 log_err("Error 7 in test 5\n");
2437 r
= strcmp(sortKeyAz
, sortKeyBz
);
2439 log_err("Error 8 in test 5\n");
2441 ucol_setStrength(coll
, UCOL_TERTIARY
);
2445 /* Test 6: Null character as base of a non-normal combining sequence.*/
2448 static const UChar strA
[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2449 static const UChar strB
[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2451 result
= ucol_strcoll(coll
, strA
, 5, strB
, 5);
2452 if (result
!= UCOL_LESS
) {
2453 log_err("Error 1 in test 6\n");
2455 result
= ucol_strcoll(coll
, strA
, -1, strB
, -1);
2456 if (result
!= UCOL_EQUAL
) {
2457 log_err("Error 2 in test 6\n");
2467 static void TestGetCaseBit(void) {
2468 static const char *caseBitData
[] = {
2469 "a", "A", "ch", "Ch", "CH",
2470 "\\uFF9E", "\\u0009"
2473 static const uint8_t results
[] = {
2474 UCOL_LOWER_CASE
, UCOL_UPPER_CASE
, UCOL_LOWER_CASE
, UCOL_MIXED_CASE
, UCOL_UPPER_CASE
,
2475 UCOL_UPPER_CASE
, UCOL_LOWER_CASE
2478 uint32_t i
, blen
= 0;
2480 UErrorCode status
= U_ZERO_ERROR
;
2481 UCollator
*UCA
= ucol_open("", &status
);
2484 for(i
= 0; i
<sizeof(results
)/sizeof(results
[0]); i
++) {
2485 blen
= u_unescape(caseBitData
[i
], b
, 256);
2486 res
= ucol_uprv_getCaseBits(UCA
, b
, blen
, &status
);
2487 if(results
[i
] != res
) {
2488 log_err("Expected case = %02X, got %02X for %04X\n", results
[i
], res
, b
[0]);
2494 static void TestHangulTailoring(void) {
2495 static const char *koreanData
[] = {
2496 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2497 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2498 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2499 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2500 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2501 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2505 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2506 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2507 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2508 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2509 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2510 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2513 UErrorCode status
= U_ZERO_ERROR
;
2514 UChar rlz
[2048] = { 0 };
2515 uint32_t rlen
= u_unescape(rules
, rlz
, 2048);
2517 UCollator
*coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
2518 if(status
== U_FILE_ACCESS_ERROR
) {
2519 log_data_err("Is your data around?\n");
2521 } else if(U_FAILURE(status
)) {
2522 log_err("Error opening collator\n");
2526 log_verbose("Using start of korean rules\n");
2528 if(U_SUCCESS(status
)) {
2529 genericOrderingTest(coll
, koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2531 log_err("Unable to open collator with rules %s\n", rules
);
2534 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2535 ((UCATableHeader
*)coll
->image
)->jamoSpecial
= TRUE
; /* don't try this at home */
2536 genericOrderingTest(coll
, koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2540 log_verbose("Using ko__LOTUS locale\n");
2541 genericLocaleStarter("ko__LOTUS", koreanData
, sizeof(koreanData
)/sizeof(koreanData
[0]));
2544 static void TestCompressOverlap(void) {
2547 UErrorCode status
= U_ZERO_ERROR
;
2554 coll
= ucol_open("", &status
);
2556 if (U_FAILURE(status
)) {
2557 log_err_status(status
, "Collator can't be created -> %s\n", u_errorName(status
));
2560 while (count
< 149) {
2561 secstr
[count
] = 0x0020; /* [06, 05, 05] */
2562 tertstr
[count
] = 0x0020;
2566 /* top down compression ----------------------------------- */
2567 secstr
[count
] = 0x0332; /* [, 87, 05] */
2568 tertstr
[count
] = 0x3000; /* [06, 05, 07] */
2570 /* no compression secstr should have 150 secondary bytes, tertstr should
2571 have 150 tertiary bytes.
2572 with correct overlapping compression, secstr should have 4 secondary
2573 bytes, tertstr should have > 2 tertiary bytes */
2574 resultlen
= ucol_getSortKey(coll
, secstr
, 150, (uint8_t *)result
, 250);
2575 tempptr
= uprv_strchr(result
, 1) + 1;
2576 while (*(tempptr
+ 1) != 1) {
2577 /* the last secondary collation element is not checked since it is not
2578 part of the compression */
2579 if (*tempptr
< UCOL_COMMON_TOP2
- UCOL_TOP_COUNT2
) {
2580 log_err("Secondary compression overlapped\n");
2585 /* tertiary top/bottom/common for en_US is similar to the secondary
2586 top/bottom/common */
2587 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, (uint8_t *)result
, 250);
2588 tempptr
= uprv_strrchr(result
, 1) + 1;
2589 while (*(tempptr
+ 1) != 0) {
2590 /* the last secondary collation element is not checked since it is not
2591 part of the compression */
2592 if (*tempptr
< coll
->tertiaryTop
- coll
->tertiaryTopCount
) {
2593 log_err("Tertiary compression overlapped\n");
2598 /* bottom up compression ------------------------------------- */
2601 resultlen
= ucol_getSortKey(coll
, secstr
, 150, (uint8_t *)result
, 250);
2602 tempptr
= uprv_strchr(result
, 1) + 1;
2603 while (*(tempptr
+ 1) != 1) {
2604 /* the last secondary collation element is not checked since it is not
2605 part of the compression */
2606 if (*tempptr
> UCOL_COMMON_BOT2
+ UCOL_BOT_COUNT2
) {
2607 log_err("Secondary compression overlapped\n");
2612 /* tertiary top/bottom/common for en_US is similar to the secondary
2613 top/bottom/common */
2614 resultlen
= ucol_getSortKey(coll
, tertstr
, 150, (uint8_t *)result
, 250);
2615 tempptr
= uprv_strrchr(result
, 1) + 1;
2616 while (*(tempptr
+ 1) != 0) {
2617 /* the last secondary collation element is not checked since it is not
2618 part of the compression */
2619 if (*tempptr
> coll
->tertiaryBottom
+ coll
->tertiaryBottomCount
) {
2620 log_err("Tertiary compression overlapped\n");
2628 static void TestCyrillicTailoring(void) {
2629 static const char *test
[] = {
2635 /* Russian overrides contractions, so this test is not valid anymore */
2636 /*genericLocaleStarter("ru", test, 3);*/
2638 genericLocaleStarter("root", test
, 3);
2639 genericRulesStarter("&\\u0410 = \\u0410", test
, 3);
2640 genericRulesStarter("&Z < \\u0410", test
, 3);
2641 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test
, 3);
2642 genericRulesStarter("&Z < \\u0410 < \\u04d0", test
, 3);
2643 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test
, 3);
2644 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test
, 3);
2647 static void TestSuppressContractions(void) {
2649 static const char *testNoCont2
[] = {
2654 static const char *testNoCont
[] = {
2657 "\\uFF21\\u0410\\u0302"
2660 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont
, 3);
2661 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2
, 3);
2664 static void TestContraction(void) {
2665 const static char *testrules
[] = {
2667 "&A = A\\u0306/\\u0306",
2670 const static UChar testdata
[][2] = {
2671 {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2672 {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2673 {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2675 const static UChar testdata2
[][2] = {
2676 {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2677 {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2678 {0x0063 /* 'c' */, 0x006C /* 'l' */}
2680 const static char *testrules3
[] = {
2681 "&z < xyz &xyzw << B",
2682 "&z < xyz &xyz << B / w",
2683 "&z < ch &achm << B",
2684 "&z < ch &a << B / chm",
2685 "&\\ud800\\udc00w << B",
2686 "&\\ud800\\udc00 << B / w",
2687 "&a\\ud800\\udc00m << B",
2688 "&a << B / \\ud800\\udc00m",
2691 UErrorCode status
= U_ZERO_ERROR
;
2693 UChar rule
[256] = {0};
2697 for (i
= 0; i
< sizeof(testrules
) / sizeof(testrules
[0]); i
++) {
2698 UCollationElements
*iter1
;
2700 log_verbose("Rule %s for testing\n", testrules
[i
]);
2701 rlen
= u_unescape(testrules
[i
], rule
, 32);
2702 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2703 if (U_FAILURE(status
)) {
2704 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
2707 iter1
= ucol_openElements(coll
, testdata
[i
], 2, &status
);
2708 if (U_FAILURE(status
)) {
2709 log_err("Collation iterator creation failed\n");
2713 UCollationElements
*iter2
= ucol_openElements(coll
,
2717 if (U_FAILURE(status
)) {
2718 log_err("Collation iterator creation failed\n");
2721 ce
= ucol_next(iter2
, &status
);
2722 while (ce
!= UCOL_NULLORDER
) {
2723 if ((uint32_t)ucol_next(iter1
, &status
) != ce
) {
2724 log_err("Collation elements in contraction split does not match\n");
2727 ce
= ucol_next(iter2
, &status
);
2730 ucol_closeElements(iter2
);
2732 if (ucol_next(iter1
, &status
) != UCOL_NULLORDER
) {
2733 log_err("Collation elements not exhausted\n");
2736 ucol_closeElements(iter1
);
2740 rlen
= u_unescape("& a < b < c < ch < d & c = ch / h", rule
, 256);
2741 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2742 if (ucol_strcoll(coll
, testdata2
[0], 2, testdata2
[1], 2) != UCOL_LESS
) {
2743 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2744 testdata2
[0][0], testdata2
[0][1], testdata2
[1][0],
2748 if (ucol_strcoll(coll
, testdata2
[1], 2, testdata2
[2], 2) != UCOL_LESS
) {
2749 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2750 testdata2
[1][0], testdata2
[1][1], testdata2
[2][0],
2756 for (i
= 0; i
< sizeof(testrules3
) / sizeof(testrules3
[0]); i
+= 2) {
2759 UCollationElements
*iter1
,
2761 UChar ch
= 0x0042 /* 'B' */;
2763 rlen
= u_unescape(testrules3
[i
], rule
, 32);
2764 coll1
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2765 rlen
= u_unescape(testrules3
[i
+ 1], rule
, 32);
2766 coll2
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2767 if (U_FAILURE(status
)) {
2768 log_err("Collator creation failed %s\n", testrules
[i
]);
2771 iter1
= ucol_openElements(coll1
, &ch
, 1, &status
);
2772 iter2
= ucol_openElements(coll2
, &ch
, 1, &status
);
2773 if (U_FAILURE(status
)) {
2774 log_err("Collation iterator creation failed\n");
2777 ce
= ucol_next(iter1
, &status
);
2778 if (U_FAILURE(status
)) {
2779 log_err("Retrieving ces failed\n");
2782 while (ce
!= UCOL_NULLORDER
) {
2783 if (ce
!= (uint32_t)ucol_next(iter2
, &status
)) {
2784 log_err("CEs does not match\n");
2787 ce
= ucol_next(iter1
, &status
);
2788 if (U_FAILURE(status
)) {
2789 log_err("Retrieving ces failed\n");
2793 if (ucol_next(iter2
, &status
) != UCOL_NULLORDER
) {
2794 log_err("CEs not exhausted\n");
2797 ucol_closeElements(iter1
);
2798 ucol_closeElements(iter2
);
2804 static void TestExpansion(void) {
2805 const static char *testrules
[] = {
2806 "&J << K / B & K << M",
2809 const static UChar testdata
[][3] = {
2810 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2811 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2812 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2813 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2814 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2815 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2818 UErrorCode status
= U_ZERO_ERROR
;
2820 UChar rule
[256] = {0};
2824 for (i
= 0; i
< sizeof(testrules
) / sizeof(testrules
[0]); i
++) {
2826 log_verbose("Rule %s for testing\n", testrules
[i
]);
2827 rlen
= u_unescape(testrules
[i
], rule
, 32);
2828 coll
= ucol_openRules(rule
, rlen
, UCOL_ON
, UCOL_TERTIARY
,NULL
, &status
);
2829 if (U_FAILURE(status
)) {
2830 log_err_status(status
, "Collator creation failed %s -> %s\n", testrules
[i
], u_errorName(status
));
2834 for (j
= 0; j
< 5; j
++) {
2835 doTest(coll
, testdata
[j
], testdata
[j
+ 1], UCOL_LESS
);
2842 /* this test tests the current limitations of the engine */
2843 /* it always fail, so it is disabled by default */
2844 static void TestLimitations(void) {
2845 /* recursive expansions */
2847 static const char *rule
= "&a=b/c&d=c/e";
2848 static const char *tlimit01
[] = {"add","b","adf"};
2849 static const char *tlimit02
[] = {"aa","b","af"};
2850 log_verbose("recursive expansions\n");
2851 genericRulesStarter(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]));
2852 genericRulesStarter(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]));
2854 /* contractions spanning expansions */
2856 static const char *rule
= "&a<<<c/e&g<<<eh";
2857 static const char *tlimit01
[] = {"ad","c","af","f","ch","h"};
2858 static const char *tlimit02
[] = {"ad","c","ch","af","f","h"};
2859 log_verbose("contractions spanning expansions\n");
2860 genericRulesStarter(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]));
2861 genericRulesStarter(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]));
2863 /* normalization: nulls in contractions */
2865 static const char *rule
= "&a<<<\\u0000\\u0302";
2866 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
2867 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
2868 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
2869 static const UColAttributeValue valOn
[] = { UCOL_ON
};
2870 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
2872 log_verbose("NULL in contractions\n");
2873 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
2874 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
2875 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
2876 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
2879 /* normalization: contractions spanning normalization */
2881 static const char *rule
= "&a<<<\\u0000\\u0302";
2882 static const char *tlimit01
[] = {"a","\\u0000\\u0302\\u0327"};
2883 static const char *tlimit02
[] = {"\\u0000\\u0302\\u0327","a"};
2884 static const UColAttribute att
[] = { UCOL_DECOMPOSITION_MODE
};
2885 static const UColAttributeValue valOn
[] = { UCOL_ON
};
2886 static const UColAttributeValue valOff
[] = { UCOL_OFF
};
2888 log_verbose("contractions spanning normalization\n");
2889 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOn
, 1);
2890 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOn
, 1);
2891 genericRulesStarterWithOptions(rule
, tlimit01
, 2, att
, valOff
, 1);
2892 genericRulesStarterWithOptions(rule
, tlimit02
, 2, att
, valOff
, 1);
2897 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2898 static const char *rule
= "&\\u2010<x<[variable top]=z";
2899 /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2900 static const char *tlimit01
[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2901 static const char *tlimit02
[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2902 static const char *tlimit03
[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2903 static const UColAttribute att
[] = { UCOL_ALTERNATE_HANDLING
, UCOL_STRENGTH
};
2904 static const UColAttributeValue valOn
[] = { UCOL_SHIFTED
, UCOL_QUATERNARY
};
2905 static const UColAttributeValue valOff
[] = { UCOL_NON_IGNORABLE
, UCOL_TERTIARY
};
2907 log_verbose("variable top\n");
2908 genericRulesStarterWithOptions(rule
, tlimit03
, sizeof(tlimit03
)/sizeof(tlimit03
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2909 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2910 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2911 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOff
, sizeof(att
)/sizeof(att
[0]));
2912 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOff
, sizeof(att
)/sizeof(att
[0]));
2917 static const char *rule
= "&c<ch<<<cH<<<Ch<<<CH";
2918 static const char *tlimit01
[] = {"c","CH","Ch","cH","ch"};
2919 static const char *tlimit02
[] = {"c","CH","cH","Ch","ch"};
2920 static const UColAttribute att
[] = { UCOL_CASE_FIRST
};
2921 static const UColAttributeValue valOn
[] = { UCOL_UPPER_FIRST
};
2922 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2923 log_verbose("case level\n");
2924 genericRulesStarterWithOptions(rule
, tlimit01
, sizeof(tlimit01
)/sizeof(tlimit01
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2925 genericRulesStarterWithOptions(rule
, tlimit02
, sizeof(tlimit02
)/sizeof(tlimit02
[0]), att
, valOn
, sizeof(att
)/sizeof(att
[0]));
2926 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2927 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2933 static void TestBocsuCoverage(void) {
2934 UErrorCode status
= U_ZERO_ERROR
;
2935 const char *testString
= "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2936 UChar test
[256] = {0};
2937 uint32_t tlen
= u_unescape(testString
, test
, 32);
2938 uint8_t key
[256] = {0};
2941 UCollator
*coll
= ucol_open("", &status
);
2942 if(U_SUCCESS(status
)) {
2943 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
);
2945 klen
= ucol_getSortKey(coll
, test
, tlen
, key
, 256);
2949 log_data_err("Couldn't open UCA\n");
2953 static void TestVariableTopSetting(void) {
2954 UErrorCode status
= U_ZERO_ERROR
;
2955 const UChar
*current
= NULL
;
2956 uint32_t varTopOriginal
= 0, varTop1
, varTop2
;
2957 UCollator
*coll
= ucol_open("", &status
);
2958 if(U_SUCCESS(status
)) {
2960 uint32_t strength
= 0;
2962 uint32_t chOffset
= 0;
2964 uint32_t exOffset
= 0;
2966 uint32_t oldChOffset
= 0;
2967 uint32_t oldChLen
= 0;
2968 uint32_t oldExOffset
= 0;
2969 uint32_t oldExLen
= 0;
2970 uint32_t prefixOffset
= 0;
2971 uint32_t prefixLen
= 0;
2973 UBool startOfRules
= TRUE
;
2974 UColTokenParser src
;
2977 UChar
*rulesCopy
= NULL
;
2980 UCollationResult result
;
2982 UChar first
[256] = { 0 };
2983 UChar second
[256] = { 0 };
2984 UParseError parseError
;
2985 int32_t myQ
= getTestOption(QUICK_OPTION
);
2987 uprv_memset(&src
, 0, sizeof(UColTokenParser
));
2991 if(getTestOption(QUICK_OPTION
) <= 0) {
2992 setTestOption(QUICK_OPTION
, 1);
2995 /* this test will fail when normalization is turned on */
2996 /* therefore we always turn off exhaustive mode for it */
2998 log_verbose("Slide variable top over UCARules\n");
2999 rulesLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rulesCopy
, 0);
3000 rulesCopy
= (UChar
*)uprv_malloc((rulesLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
)*sizeof(UChar
));
3001 rulesLen
= ucol_getRulesEx(coll
, UCOL_FULL_RULES
, rulesCopy
, rulesLen
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
);
3003 if(U_SUCCESS(status
) && rulesLen
> 0) {
3004 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
3005 src
.current
= src
.source
= rulesCopy
;
3006 src
.end
= rulesCopy
+rulesLen
;
3007 src
.extraCurrent
= src
.end
;
3008 src
.extraEnd
= src
.end
+UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
3010 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3011 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3012 while ((current
= ucol_tok_parseNextToken(&src
, startOfRules
, &parseError
,&status
)) != NULL
) {
3013 strength
= src
.parsedToken
.strength
;
3014 chOffset
= src
.parsedToken
.charsOffset
;
3015 chLen
= src
.parsedToken
.charsLen
;
3016 exOffset
= src
.parsedToken
.extensionOffset
;
3017 exLen
= src
.parsedToken
.extensionLen
;
3018 prefixOffset
= src
.parsedToken
.prefixOffset
;
3019 prefixLen
= src
.parsedToken
.prefixLen
;
3020 specs
= src
.parsedToken
.flags
;
3022 startOfRules
= FALSE
;
3024 log_verbose("%04X %d ", *(src
.source
+chOffset
), chLen
);
3026 if(strength
== UCOL_PRIMARY
) {
3027 status
= U_ZERO_ERROR
;
3028 varTopOriginal
= ucol_getVariableTop(coll
, &status
);
3029 varTop1
= ucol_setVariableTop(coll
, src
.source
+oldChOffset
, oldChLen
, &status
);
3030 if(U_FAILURE(status
)) {
3034 uint32_t CE
= UCOL_NO_MORE_CES
;
3036 /* before we start screaming, let's see if there is a problem with the rules */
3037 UErrorCode collIterateStatus
= U_ZERO_ERROR
;
3038 collIterate
*s
= uprv_new_collIterate(&collIterateStatus
);
3039 uprv_init_collIterate(coll
, src
.source
+oldChOffset
, oldChLen
, s
, &collIterateStatus
);
3041 CE
= ucol_getNextCE(coll
, s
, &status
);
3043 for(i
= 0; i
< oldChLen
; i
++) {
3044 j
= sprintf(buf
, "%04X ", *(src
.source
+oldChOffset
+i
));
3047 if(status
== U_PRIMARY_TOO_LONG_ERROR
) {
3048 log_verbose("= Expected failure for %s =", buffer
);
3050 if(uprv_collIterateAtEnd(s
)) {
3051 log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3052 oldChOffset
, u_errorName(status
), buffer
);
3054 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3058 uprv_delete_collIterate(s
);
3060 varTop2
= ucol_getVariableTop(coll
, &status
);
3061 if((varTop1
& 0xFFFF0000) != (varTop2
& 0xFFFF0000)) {
3062 log_err("cannot retrieve set varTop value!\n");
3066 if((varTop1
& 0xFFFF0000) > 0 && oldExLen
== 0) {
3068 u_strncpy(first
, src
.source
+oldChOffset
, oldChLen
);
3069 u_strncpy(first
+oldChLen
, src
.source
+chOffset
, chLen
);
3070 u_strncpy(first
+oldChLen
+chLen
, src
.source
+oldChOffset
, oldChLen
);
3071 first
[2*oldChLen
+chLen
] = 0;
3074 u_strncpy(second
, src
.source
+chOffset
, chLen
);
3076 } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3077 u_strncpy(second
, src
.source
+oldExOffset
, oldExLen
);
3078 u_strncpy(second
+oldChLen
, src
.source
+chOffset
, chLen
);
3079 u_strncpy(second
+oldChLen
+chLen
, src
.source
+oldExOffset
, oldExLen
);
3080 second
[2*oldExLen
+chLen
] = 0;
3082 result
= ucol_strcoll(coll
, first
, -1, second
, -1);
3083 if(result
== UCOL_EQUAL
) {
3084 doTest(coll
, first
, second
, UCOL_EQUAL
);
3086 log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src
.source
+oldChOffset
), *(src
.source
+chOffset
));
3090 if(strength
!= UCOL_TOK_RESET
) {
3091 oldChOffset
= chOffset
;
3093 oldExOffset
= exOffset
;
3097 status
= U_ZERO_ERROR
;
3100 log_err("Unexpected failure getting rules %s\n", u_errorName(status
));
3103 if (U_FAILURE(status
)) {
3104 log_err("Error parsing rules %s\n", u_errorName(status
));
3107 status
= U_ZERO_ERROR
;
3110 setTestOption(QUICK_OPTION
, myQ
);
3112 log_verbose("Testing setting variable top to contractions\n");
3114 UChar
*conts
= (UChar
*)((uint8_t *)coll
->image
+ coll
->image
->contractionUCACombos
);
3115 int32_t maxUCAContractionLength
= coll
->image
->contractionUCACombosWidth
;
3116 while(*conts
!= 0) {
3118 * A continuation is NUL-terminated and NUL-padded
3119 * except if it has the maximum length.
3121 int32_t contractionLength
= maxUCAContractionLength
;
3122 while(contractionLength
> 0 && conts
[contractionLength
- 1] == 0) {
3123 --contractionLength
;
3125 if(*(conts
+1)==0) { /* pre-context */
3126 varTop1
= ucol_setVariableTop(coll
, conts
, 1, &status
);
3128 varTop1
= ucol_setVariableTop(coll
, conts
, contractionLength
, &status
);
3130 if(U_FAILURE(status
)) {
3131 if(status
== U_PRIMARY_TOO_LONG_ERROR
) {
3132 /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3133 * therefore it is not an error when it complains about them. */
3134 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3135 *conts
, *(conts
+1), *(conts
+2));
3137 log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3138 *conts
, *(conts
+1), *(conts
+2), u_errorName(status
));
3140 status
= U_ZERO_ERROR
;
3142 conts
+=maxUCAContractionLength
;
3145 status
= U_ZERO_ERROR
;
3151 ucol_setVariableTop(coll
, first
, -1, &status
);
3153 if(U_SUCCESS(status
)) {
3154 log_err("Invalid contraction succeded in setting variable top!\n");
3159 log_verbose("Test restoring variable top\n");
3161 status
= U_ZERO_ERROR
;
3162 ucol_restoreVariableTop(coll
, varTopOriginal
, &status
);
3163 if(varTopOriginal
!= ucol_getVariableTop(coll
, &status
)) {
3164 log_err("Couldn't restore old variable top\n");
3167 log_verbose("Testing calling with error set\n");
3169 status
= U_INTERNAL_PROGRAM_ERROR
;
3170 varTop1
= ucol_setVariableTop(coll
, first
, 1, &status
);
3171 varTop2
= ucol_getVariableTop(coll
, &status
);
3172 ucol_restoreVariableTop(coll
, varTop2
, &status
);
3173 varTop1
= ucol_setVariableTop(NULL
, first
, 1, &status
);
3174 varTop2
= ucol_getVariableTop(NULL
, &status
);
3175 ucol_restoreVariableTop(NULL
, varTop2
, &status
);
3176 if(status
!= U_INTERNAL_PROGRAM_ERROR
) {
3177 log_err("Bad reaction to passed error!\n");
3179 uprv_free(src
.source
);
3182 log_data_err("Couldn't open UCA collator\n");
3187 static void TestNonChars(void) {
3188 static const char *test
[] = {
3189 "\\u0000", /* ignorable */
3190 "\\uFFFE", /* special merge-sort character with minimum non-ignorable weights */
3191 "\\uFDD0", "\\uFDEF",
3192 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */
3193 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */
3194 "\\U0003FFFE", "\\U0003FFFF",
3195 "\\U0004FFFE", "\\U0004FFFF",
3196 "\\U0005FFFE", "\\U0005FFFF",
3197 "\\U0006FFFE", "\\U0006FFFF",
3198 "\\U0007FFFE", "\\U0007FFFF",
3199 "\\U0008FFFE", "\\U0008FFFF",
3200 "\\U0009FFFE", "\\U0009FFFF",
3201 "\\U000AFFFE", "\\U000AFFFF",
3202 "\\U000BFFFE", "\\U000BFFFF",
3203 "\\U000CFFFE", "\\U000CFFFF",
3204 "\\U000DFFFE", "\\U000DFFFF",
3205 "\\U000EFFFE", "\\U000EFFFF",
3206 "\\U000FFFFE", "\\U000FFFFF",
3207 "\\U0010FFFE", "\\U0010FFFF",
3208 "\\uFFFF" /* special character with maximum primary weight */
3210 UErrorCode status
= U_ZERO_ERROR
;
3211 UCollator
*coll
= ucol_open("en_US", &status
);
3213 log_verbose("Test non characters\n");
3215 if(U_SUCCESS(status
)) {
3216 genericOrderingTestWithResult(coll
, test
, 35, UCOL_LESS
);
3218 log_err_status(status
, "Unable to open collator\n");
3224 static void TestExtremeCompression(void) {
3225 static char *test
[4];
3226 int32_t j
= 0, i
= 0;
3228 for(i
= 0; i
<4; i
++) {
3229 test
[i
] = (char *)malloc(2048*sizeof(char));
3232 for(j
= 20; j
< 500; j
++) {
3233 for(i
= 0; i
<4; i
++) {
3234 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
3235 test
[i
][j
-1] = (char)('a'+i
);
3238 genericLocaleStarter("en_US", (const char **)test
, 4);
3242 for(i
= 0; i
<4; i
++) {
3248 static void TestExtremeCompression(void) {
3249 static char *test
[4];
3250 int32_t j
= 0, i
= 0;
3251 UErrorCode status
= U_ZERO_ERROR
;
3252 UCollator
*coll
= ucol_open("en_US", status
);
3253 for(i
= 0; i
<4; i
++) {
3254 test
[i
] = (char *)malloc(2048*sizeof(char));
3256 for(j
= 10; j
< 2048; j
++) {
3257 for(i
= 0; i
<4; i
++) {
3258 uprv_memset(test
[i
], 'a', (j
-2)*sizeof(char));
3259 test
[i
][j
-1] = (char)('a'+i
);
3263 genericLocaleStarter("en_US", (const char **)test
, 4);
3265 for(j
= 10; j
< 2048; j
++) {
3266 for(i
= 0; i
<1; i
++) {
3267 uprv_memset(test
[i
], 'a', (j
-1)*sizeof(char));
3271 for(i
= 0; i
<4; i
++) {
3277 static void TestSurrogates(void) {
3278 static const char *test
[] = {
3279 "z","\\ud900\\udc25", "\\ud805\\udc50",
3280 "\\ud800\\udc00y", "\\ud800\\udc00r",
3281 "\\ud800\\udc00f", "\\ud800\\udc00",
3282 "\\ud800\\udc00c", "\\ud800\\udc00b",
3283 "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3288 static const char *rule
=
3289 "&z < \\ud900\\udc25 < \\ud805\\udc50"
3290 "< \\ud800\\udc00y < \\ud800\\udc00r"
3291 "< \\ud800\\udc00f << \\ud800\\udc00"
3292 "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3293 "< \\ud800\\udc00a < c < b" ;
3295 genericRulesStarter(rule
, test
, 14);
3298 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3299 static void TestPrefix(void) {
3302 static const struct {
3304 const char *data
[50];
3314 "&z<<<\\ud900\\udc25|a",
3315 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3319 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3320 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3324 /* This test uses data suplied by Masashiko Maedera to test the implementation */
3325 /* JIS X 4061 collation order implementation */
3326 static void TestNewJapanese(void) {
3328 static const char * const test1
[] = {
3329 "\\u30b7\\u30e3\\u30fc\\u30ec",
3330 "\\u30b7\\u30e3\\u30a4",
3331 "\\u30b7\\u30e4\\u30a3",
3332 "\\u30b7\\u30e3\\u30ec",
3333 "\\u3061\\u3087\\u3053",
3334 "\\u3061\\u3088\\u3053",
3335 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3336 "\\u3066\\u30fc\\u305f",
3337 "\\u30c6\\u30fc\\u30bf",
3338 "\\u30c6\\u30a7\\u30bf",
3339 "\\u3066\\u3048\\u305f",
3340 "\\u3067\\u30fc\\u305f",
3341 "\\u30c7\\u30fc\\u30bf",
3342 "\\u30c7\\u30a7\\u30bf",
3343 "\\u3067\\u3048\\u305f",
3344 "\\u3066\\u30fc\\u305f\\u30fc",
3345 "\\u30c6\\u30fc\\u30bf\\u30a1",
3346 "\\u30c6\\u30a7\\u30bf\\u30fc",
3347 "\\u3066\\u3047\\u305f\\u3041",
3348 "\\u3066\\u3048\\u305f\\u30fc",
3349 "\\u3067\\u30fc\\u305f\\u30fc",
3350 "\\u30c7\\u30fc\\u30bf\\u30a1",
3351 "\\u3067\\u30a7\\u305f\\u30a1",
3352 "\\u30c7\\u3047\\u30bf\\u3041",
3353 "\\u30c7\\u30a8\\u30bf\\u30a2",
3355 "\\u3073\\u3085\\u3042",
3356 "\\u3074\\u3085\\u3042",
3357 "\\u3073\\u3085\\u3042\\u30fc",
3358 "\\u30d3\\u30e5\\u30a2\\u30fc",
3359 "\\u3074\\u3085\\u3042\\u30fc",
3360 "\\u30d4\\u30e5\\u30a2\\u30fc",
3361 "\\u30d2\\u30e5\\u30a6",
3362 "\\u30d2\\u30e6\\u30a6",
3363 "\\u30d4\\u30e5\\u30a6\\u30a2",
3364 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3365 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3366 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3367 "\\u3072\\u3085\\u3093",
3368 "\\u3074\\u3085\\u3093",
3369 "\\u3075\\u30fc\\u308a",
3370 "\\u30d5\\u30fc\\u30ea",
3371 "\\u3075\\u3045\\u308a",
3372 "\\u3075\\u30a5\\u308a",
3373 "\\u3075\\u30a5\\u30ea",
3374 "\\u30d5\\u30a6\\u30ea",
3375 "\\u3076\\u30fc\\u308a",
3376 "\\u30d6\\u30fc\\u30ea",
3377 "\\u3076\\u3045\\u308a",
3378 "\\u30d6\\u30a5\\u308a",
3379 "\\u3077\\u3046\\u308a",
3380 "\\u30d7\\u30a6\\u30ea",
3381 "\\u3075\\u30fc\\u308a\\u30fc",
3382 "\\u30d5\\u30a5\\u30ea\\u30fc",
3383 "\\u3075\\u30a5\\u308a\\u30a3",
3384 "\\u30d5\\u3045\\u308a\\u3043",
3385 "\\u30d5\\u30a6\\u30ea\\u30fc",
3386 "\\u3075\\u3046\\u308a\\u3043",
3387 "\\u30d6\\u30a6\\u30ea\\u30a4",
3388 "\\u3077\\u30fc\\u308a\\u30fc",
3389 "\\u3077\\u30a5\\u308a\\u30a4",
3390 "\\u3077\\u3046\\u308a\\u30fc",
3391 "\\u30d7\\u30a6\\u30ea\\u30a4",
3407 static const char *test2
[] = {
3408 "\\u306f\\u309d", /* H\\u309d */
3409 "\\u30cf\\u30fd", /* K\\u30fd */
3410 "\\u306f\\u306f", /* HH */
3411 "\\u306f\\u30cf", /* HK */
3412 "\\u30cf\\u30cf", /* KK */
3413 "\\u306f\\u309e", /* H\\u309e */
3414 "\\u30cf\\u30fe", /* K\\u30fe */
3415 "\\u306f\\u3070", /* HH\\u309b */
3416 "\\u30cf\\u30d0", /* KK\\u309b */
3417 "\\u306f\\u3071", /* HH\\u309c */
3418 "\\u30cf\\u3071", /* KH\\u309c */
3419 "\\u30cf\\u30d1", /* KK\\u309c */
3420 "\\u3070\\u309d", /* H\\u309b\\u309d */
3421 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3422 "\\u3070\\u306f", /* H\\u309bH */
3423 "\\u30d0\\u30cf", /* K\\u309bK */
3424 "\\u3070\\u309e", /* H\\u309b\\u309e */
3425 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3426 "\\u3070\\u3070", /* H\\u309bH\\u309b */
3427 "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3428 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3429 "\\u3070\\u3071", /* H\\u309bH\\u309c */
3430 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3431 "\\u3071\\u309d", /* H\\u309c\\u309d */
3432 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3433 "\\u3071\\u306f", /* H\\u309cH */
3434 "\\u30d1\\u30cf", /* K\\u309cK */
3435 "\\u3071\\u3070", /* H\\u309cH\\u309b */
3436 "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3437 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3438 "\\u3071\\u3071", /* H\\u309cH\\u309c */
3439 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3442 static const char *test3[] = {
3470 "\\u30b7\\u30e3\\u30fc\\u30ec",
3473 static const UColAttribute att
[] = { UCOL_STRENGTH
};
3474 static const UColAttributeValue val
[] = { UCOL_QUATERNARY
};
3476 static const UColAttribute attShifted
[] = { UCOL_STRENGTH
, UCOL_ALTERNATE_HANDLING
};
3477 static const UColAttributeValue valShifted
[] = { UCOL_QUATERNARY
, UCOL_SHIFTED
};
3479 genericLocaleStarterWithOptions("ja", test1
, sizeof(test1
)/sizeof(test1
[0]), att
, val
, 1);
3480 genericLocaleStarterWithOptions("ja", test2
, sizeof(test2
)/sizeof(test2
[0]), att
, val
, 1);
3481 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3482 genericLocaleStarterWithOptions("ja", test1
, sizeof(test1
)/sizeof(test1
[0]), attShifted
, valShifted
, 2);
3483 genericLocaleStarterWithOptions("ja", test2
, sizeof(test2
)/sizeof(test2
[0]), attShifted
, valShifted
, 2);
3486 static void TestStrCollIdenticalPrefix(void) {
3487 const char* rule
= "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3488 const char* test
[] = {
3492 genericRulesStarterWithResult(rule
, test
, sizeof(test
)/sizeof(test
[0]), UCOL_EQUAL
);
3494 /* Contractions should have all their canonically equivalent */
3495 /* strings included */
3496 static void TestContractionClosure(void) {
3497 static const struct {
3499 const char *data
[10];
3502 { "&b=\\u00e4\\u00e4",
3503 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3505 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3510 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3511 genericRulesStarterWithResult(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
, UCOL_EQUAL
);
3515 /* This tests also fails*/
3516 static void TestBeforePrefixFailure(void) {
3517 static const struct {
3519 const char *data
[10];
3523 "&[before 3]\\uff41 <<< x",
3524 {"x", "\\uff41"}, 2 },
3525 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3526 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3527 "&[before 3]\\u30a7<<<\\u30a9",
3528 {"\\u30a9", "\\u30a7"}, 2 },
3529 { "&[before 3]\\u30a7<<<\\u30a9"
3530 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3531 "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3532 {"\\u30a9", "\\u30a7"}, 2 },
3537 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3538 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3543 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3544 "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3545 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3547 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3548 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3549 "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3550 const char* test
[] = {
3551 "\\u30c6\\u30fc\\u30bf",
3552 "\\u30c6\\u30a7\\u30bf",
3554 genericRulesStarter(rule1
, test
, sizeof(test
)/sizeof(test
[0]));
3555 genericRulesStarter(rule2
, test
, sizeof(test
)/sizeof(test
[0]));
3556 /* this piece of code should be in some sort of verbose mode */
3557 /* it gets the collation elements for elements and prints them */
3558 /* This is useful when trying to see whether the problem is */
3560 UErrorCode status
= U_ZERO_ERROR
;
3562 UCollationElements
*it
= NULL
;
3565 uint32_t uStringLen
;
3566 UCollator
*coll
= NULL
;
3568 uStringLen
= u_unescape(rule1
, string
, 256);
3570 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
3572 /*coll = ucol_open("ja_JP_JIS", &status);*/
3573 it
= ucol_openElements(coll
, string
, 0, &status
);
3575 for(i
= 0; i
< sizeof(test
)/sizeof(test
[0]); i
++) {
3576 log_verbose("%s\n", test
[i
]);
3577 uStringLen
= u_unescape(test
[i
], string
, 256);
3578 ucol_setText(it
, string
, uStringLen
, &status
);
3580 while((CE
=ucol_next(it
, &status
)) != UCOL_NULLORDER
) {
3581 log_verbose("%08X\n", CE
);
3587 ucol_closeElements(it
);
3593 static void TestPrefixCompose(void) {
3595 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3597 const char* test[] = {
3598 "\\u30c6\\u30fc\\u30bf",
3599 "\\u30c6\\u30a7\\u30bf",
3603 UErrorCode status
= U_ZERO_ERROR
;
3605 /*UCollationElements *it = NULL;*/
3608 uint32_t uStringLen
;
3609 UCollator
*coll
= NULL
;
3611 uStringLen
= u_unescape(rule1
, string
, 256);
3613 coll
= ucol_openRules(string
, uStringLen
, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
3621 [last variable] last variable value
3622 [last primary ignorable] largest CE for primary ignorable
3623 [last secondary ignorable] largest CE for secondary ignorable
3624 [last tertiary ignorable] largest CE for tertiary ignorable
3625 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3628 static void TestRuleOptions(void) {
3629 /* values here are hardcoded and are correct for the current UCA
3630 * when the UCA changes, one might be forced to change these
3635 * These strings contain the last character before [variable top]
3636 * and the first and second characters (by primary weights) after it.
3637 * See FractionalUCA.txt. For example:
3638 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3639 [variable top = 0C FE]
3640 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3642 00B4; [0D 0C, 05, 05]
3644 * Note: Starting with UCA 6.0, the [variable top] collation element
3645 * is not the weight of any character or string,
3646 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3648 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3649 #define FIRST_REGULAR_CHAR_STRING "\\u0060"
3650 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3653 * This string has to match the character that has the [last regular] weight
3654 * which changes with each UCA version.
3655 * See the bottom of FractionalUCA.txt which says something like
3656 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3658 * Note: Starting with UCA 6.0, the [last regular] collation element
3659 * is not the weight of any character or string,
3660 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3662 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3664 static const struct {
3666 const char *data
[10];
3669 /* - all befores here amount to zero */
3670 { "&[before 3][first tertiary ignorable]<<<a",
3671 { "\\u0000", "a"}, 2
3672 }, /* you cannot go before first tertiary ignorable */
3674 { "&[before 3][last tertiary ignorable]<<<a",
3675 { "\\u0000", "a"}, 2
3676 }, /* you cannot go before last tertiary ignorable */
3678 { "&[before 3][first secondary ignorable]<<<a",
3679 { "\\u0000", "a"}, 2
3680 }, /* you cannot go before first secondary ignorable */
3682 { "&[before 3][last secondary ignorable]<<<a",
3683 { "\\u0000", "a"}, 2
3684 }, /* you cannot go before first secondary ignorable */
3686 /* 'normal' befores */
3688 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3689 { "c", "b", "\\u0332", "a" }, 4
3692 /* we don't have a code point that corresponds to
3693 * the last primary ignorable
3695 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3696 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3699 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3700 { "c", "b", "\\u0009", "a", "\\u000a" }, 5
3703 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3704 { LAST_VARIABLE_CHAR_STRING
, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING
}, 5
3707 { "&[first regular]<a"
3708 "&[before 1][first regular]<b",
3709 { "b", FIRST_REGULAR_CHAR_STRING
, "a", SECOND_REGULAR_CHAR_STRING
}, 4
3712 { "&[before 1][last regular]<b"
3713 "&[last regular]<a",
3714 { LAST_REGULAR_CHAR_STRING
, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3717 { "&[before 1][first implicit]<b"
3718 "&[first implicit]<a",
3719 { "b", "\\u4e00", "a", "\\u4e01"}, 4
3722 { "&[before 1][last implicit]<b"
3723 "&[last implicit]<a",
3724 { "b", "\\U0010FFFD", "a" }, 3
3727 { "&[last variable]<z"
3728 "&[last primary ignorable]<x"
3729 "&[last secondary ignorable]<<y"
3730 "&[last tertiary ignorable]<<<w"
3732 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING
, "z", "u"}, 7
3738 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3739 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3744 static void TestOptimize(void) {
3745 /* this is not really a test - just trying out
3746 * whether copying of UCA contents will fail
3747 * Cannot really test, since the functionality
3750 static const struct {
3752 const char *data
[10];
3755 /* - all befores here amount to zero */
3756 { "[optimize [\\uAC00-\\uD7FF]]",
3761 for(i
= 0; i
<(sizeof(tests
)/sizeof(tests
[0])); i
++) {
3762 genericRulesStarter(tests
[i
].rules
, tests
[i
].data
, tests
[i
].len
);
3767 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3768 weiv ucol_strcollIter?
3769 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3770 weiv these are the input strings?
3771 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3772 weiv will check - could be a problem with utf-8 iterator
3773 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3775 cycheng@ca.ibm.c... note that we have a standalone high surrogate
3776 weiv that doesn't sound right
3777 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3778 weiv so you have two strings, you convert them to utf-8 and to utf-16BE
3779 cycheng@ca.ibm.c... yes
3780 weiv and then do the comparison
3781 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3782 weiv utf-16 strings look like a little endian ones in the example you sent me
3783 weiv It could be a bug - let me try to test it out
3784 cycheng@ca.ibm.c... ok
3785 cycheng@ca.ibm.c... we can wait till the conf. call
3786 cycheng@ca.ibm.c... next weke
3787 weiv that would be great
3789 weiv I might be wrong
3790 weiv let me play with it some more
3791 cycheng@ca.ibm.c... ok
3792 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be
3793 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3794 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3796 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3798 cycheng@ca.ibm.c... the 4 strings we sent are just samples
3801 static void Alexis(void) {
3802 UErrorCode status
= U_ZERO_ERROR
;
3803 UCollator
*coll
= ucol_open("", &status
);
3806 const char utf16be
[2][4] = {
3807 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3808 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3811 const char utf8
[2][4] = {
3812 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3813 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3816 UCharIterator iterU161
, iterU162
;
3817 UCharIterator iterU81
, iterU82
;
3819 UCollationResult resU16
, resU8
;
3821 uiter_setUTF16BE(&iterU161
, utf16be
[0], 4);
3822 uiter_setUTF16BE(&iterU162
, utf16be
[1], 4);
3824 uiter_setUTF8(&iterU81
, utf8
[0], 4);
3825 uiter_setUTF8(&iterU82
, utf8
[1], 4);
3827 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3829 resU16
= ucol_strcollIter(coll
, &iterU161
, &iterU162
, &status
);
3830 resU8
= ucol_strcollIter(coll
, &iterU81
, &iterU82
, &status
);
3833 if(resU16
!= resU8
) {
3834 log_err("different results\n");
3841 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3842 static void Alexis2(void) {
3843 UErrorCode status
= U_ZERO_ERROR
;
3844 UChar U16Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3845 char U16BESource
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U16BETarget
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3846 char U8Source
[CMSCOLL_ALEXIS2_BUFFER_SIZE
], U8Target
[CMSCOLL_ALEXIS2_BUFFER_SIZE
];
3847 int32_t U16LenS
= 0, U16LenT
= 0, U16BELenS
= 0, U16BELenT
= 0, U8LenS
= 0, U8LenT
= 0;
3849 UConverter
*conv
= NULL
;
3851 UCharIterator U16BEItS
, U16BEItT
;
3852 UCharIterator U8ItS
, U8ItT
;
3854 UCollationResult resU16
, resU16BE
, resU8
;
3856 static const char* const pairs
[][2] = {
3857 { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3858 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3859 { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3860 { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3861 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3862 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3863 { "\\u0020", "\\u0020\\u0000"}
3865 5F20 (my result here)
3867 5F20 (your result here)
3873 UCollator
*coll
= ucol_open("", &status
);
3874 if(status
== U_FILE_ACCESS_ERROR
) {
3875 log_data_err("Is your data around?\n");
3877 } else if(U_FAILURE(status
)) {
3878 log_err("Error opening collator\n");
3881 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
3882 conv
= ucnv_open("UTF16BE", &status
);
3883 for(i
= 0; i
< sizeof(pairs
)/sizeof(pairs
[0]); i
++) {
3884 U16LenS
= u_unescape(pairs
[i
][0], U16Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
3885 U16LenT
= u_unescape(pairs
[i
][1], U16Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
);
3887 resU16
= ucol_strcoll(coll
, U16Source
, U16LenS
, U16Target
, U16LenT
);
3889 log_verbose("Result of strcoll is %i\n", resU16
);
3891 U16BELenS
= ucnv_fromUChars(conv
, U16BESource
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Source
, U16LenS
, &status
);
3892 U16BELenT
= ucnv_fromUChars(conv
, U16BETarget
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, U16Target
, U16LenT
, &status
);
3894 /* use the original sizes, as the result from converter is in bytes */
3895 uiter_setUTF16BE(&U16BEItS
, U16BESource
, U16LenS
);
3896 uiter_setUTF16BE(&U16BEItT
, U16BETarget
, U16LenT
);
3898 resU16BE
= ucol_strcollIter(coll
, &U16BEItS
, &U16BEItT
, &status
);
3900 log_verbose("Result of U16BE is %i\n", resU16BE
);
3902 if(resU16
!= resU16BE
) {
3903 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
3906 u_strToUTF8(U8Source
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenS
, U16Source
, U16LenS
, &status
);
3907 u_strToUTF8(U8Target
, CMSCOLL_ALEXIS2_BUFFER_SIZE
, &U8LenT
, U16Target
, U16LenT
, &status
);
3909 uiter_setUTF8(&U8ItS
, U8Source
, U8LenS
);
3910 uiter_setUTF8(&U8ItT
, U8Target
, U8LenT
);
3912 resU8
= ucol_strcollIter(coll
, &U8ItS
, &U8ItT
, &status
);
3914 if(resU16
!= resU8
) {
3915 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs
[i
][0], pairs
[i
][1]);
3924 static void TestHebrewUCA(void) {
3925 UErrorCode status
= U_ZERO_ERROR
;
3926 static const char *first
[] = {
3927 "d790d6b8d79cd795d6bcd7a9",
3928 "d790d79cd79ed7a7d799d799d7a1",
3929 "d790d6b4d79ed795d6bcd7a9",
3932 char utf8String
[3][256];
3933 UChar utf16String
[3][256];
3935 int32_t i
= 0, j
= 0;
3936 int32_t sizeUTF8
[3];
3937 int32_t sizeUTF16
[3];
3939 UCollator
*coll
= ucol_open("", &status
);
3940 if (U_FAILURE(status
)) {
3941 log_err_status(status
, "Could not open UCA collation %s\n", u_errorName(status
));
3944 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3946 for(i
= 0; i
< sizeof(first
)/sizeof(first
[0]); i
++) {
3947 sizeUTF8
[i
] = u_parseUTF8(first
[i
], -1, utf8String
[i
], 256, &status
);
3948 u_strFromUTF8(utf16String
[i
], 256, &sizeUTF16
[i
], utf8String
[i
], sizeUTF8
[i
], &status
);
3949 log_verbose("%i: ");
3950 for(j
= 0; j
< sizeUTF16
[i
]; j
++) {
3951 /*log_verbose("\\u%04X", utf16String[i][j]);*/
3952 log_verbose("%04X", utf16String
[i
][j
]);
3956 for(i
= 0; i
< sizeof(first
)/sizeof(first
[0])-1; i
++) {
3957 for(j
= i
+ 1; j
< sizeof(first
)/sizeof(first
[0]); j
++) {
3958 doTest(coll
, utf16String
[i
], utf16String
[j
], UCOL_LESS
);
3966 static void TestPartialSortKeyTermination(void) {
3967 static const char* cases
[] = {
3968 "\\u1234\\u1234\\udc00",
3969 "\\udc00\\ud800\\ud800"
3972 int32_t i
= sizeof(UCollator
);
3974 UErrorCode status
= U_ZERO_ERROR
;
3976 UCollator
*coll
= ucol_open("", &status
);
3980 UChar currCase
[256];
3982 int32_t pKeyLen
= 0;
3986 for(i
= 0; i
< sizeof(cases
)/sizeof(cases
[0]); i
++) {
3987 uint32_t state
[2] = {0, 0};
3988 length
= u_unescape(cases
[i
], currCase
, 256);
3989 uiter_setString(&iter
, currCase
, length
);
3990 pKeyLen
= ucol_nextSortKeyPart(coll
, &iter
, state
, key
, 256, &status
);
3992 log_verbose("Done\n");
3998 static void TestSettings(void) {
3999 static const char* cases
[] = {
4004 static const char* locales
[] = {
4009 UErrorCode status
= U_ZERO_ERROR
;
4011 int32_t i
= 0, j
= 0;
4013 UChar source
[256], target
[256];
4014 int32_t sLen
= 0, tLen
= 0;
4016 UCollator
*collateObject
= NULL
;
4017 for(i
= 0; i
< sizeof(locales
)/sizeof(locales
[0]); i
++) {
4018 collateObject
= ucol_open(locales
[i
], &status
);
4019 ucol_setStrength(collateObject
, UCOL_PRIMARY
);
4020 ucol_setAttribute(collateObject
, UCOL_CASE_LEVEL
, UCOL_OFF
, &status
);
4021 for(j
= 1; j
< sizeof(cases
)/sizeof(cases
[0]); j
++) {
4022 sLen
= u_unescape(cases
[j
-1], source
, 256);
4024 tLen
= u_unescape(cases
[j
], target
, 256);
4026 doTest(collateObject
, source
, target
, UCOL_EQUAL
);
4028 ucol_close(collateObject
);
4032 static int32_t TestEqualsForCollator(const char* locName
, UCollator
*source
, UCollator
*target
) {
4033 UErrorCode status
= U_ZERO_ERROR
;
4034 int32_t errorNo
= 0;
4035 /*const UChar *sourceRules = NULL;*/
4036 /*int32_t sourceRulesLen = 0;*/
4037 UColAttributeValue french
= UCOL_OFF
;
4038 int32_t cloneSize
= 0;
4040 if(!ucol_equals(source
, target
)) {
4041 log_err("Same collators, different address not equal\n");
4045 if(uprv_strcmp(ucol_getLocaleByType(source
, ULOC_REQUESTED_LOCALE
, &status
), ucol_getLocaleByType(source
, ULOC_ACTUAL_LOCALE
, &status
)) == 0) {
4046 /* currently, safeClone is implemented through getRules/openRules
4047 * so it is the same as the test below - I will comment that test out.
4050 target
= ucol_safeClone(source
, NULL
, &cloneSize
, &status
);
4051 if(U_FAILURE(status
)) {
4052 log_err("Error creating clone\n");
4056 if(!ucol_equals(source
, target
)) {
4057 log_err("Collator different from it's clone\n");
4060 french
= ucol_getAttribute(source
, UCOL_FRENCH_COLLATION
, &status
);
4061 if(french
== UCOL_ON
) {
4062 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
4064 ucol_setAttribute(target
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
4066 if(U_FAILURE(status
)) {
4067 log_err("Error setting attributes\n");
4071 if(ucol_equals(source
, target
)) {
4072 log_err("Collators same even when options changed\n");
4076 /* commented out since safeClone uses exactly the same technique */
4078 sourceRules = ucol_getRules(source, &sourceRulesLen);
4079 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4080 if(U_FAILURE(status)) {
4081 log_err("Error instantiating target from rules\n");
4085 if(!ucol_equals(source, target)) {
4086 log_err("Collator different from collator that was created from the same rules\n");
4096 static void TestEquals(void) {
4097 /* ucol_equals is not currently a public API. There is a chance that it will become
4098 * something like this, but currently it is only used by RuleBasedCollator::operator==
4100 /* test whether the two collators instantiated from the same locale are equal */
4101 UErrorCode status
= U_ZERO_ERROR
;
4102 UParseError parseError
;
4103 int32_t noOfLoc
= uloc_countAvailable();
4104 const char *locName
= NULL
;
4105 UCollator
*source
= NULL
, *target
= NULL
;
4108 const char* rules
[] = {
4109 "&l < lj <<< Lj <<< LJ",
4110 "&n < nj <<< Nj <<< NJ",
4115 const char* badRules[] = {
4117 "&n < nj <<< nJ <<< NJ",
4119 "&AE <<< \\u00c4 <<< x"
4123 UChar sourceRules
[1024], targetRules
[1024];
4124 int32_t sourceRulesSize
= 0, targetRulesSize
= 0;
4125 int32_t rulesSize
= sizeof(rules
)/sizeof(rules
[0]);
4127 for(i
= 0; i
< rulesSize
; i
++) {
4128 sourceRulesSize
+= u_unescape(rules
[i
], sourceRules
+sourceRulesSize
, 1024 - sourceRulesSize
);
4129 targetRulesSize
+= u_unescape(rules
[rulesSize
-i
-1], targetRules
+targetRulesSize
, 1024 - targetRulesSize
);
4132 source
= ucol_openRules(sourceRules
, sourceRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
4133 if(status
== U_FILE_ACCESS_ERROR
) {
4134 log_data_err("Is your data around?\n");
4136 } else if(U_FAILURE(status
)) {
4137 log_err("Error opening collator\n");
4140 target
= ucol_openRules(targetRules
, targetRulesSize
, UCOL_DEFAULT
, UCOL_DEFAULT
, &parseError
, &status
);
4141 if(!ucol_equals(source
, target
)) {
4142 log_err("Equivalent collators not equal!\n");
4147 source
= ucol_open("root", &status
);
4148 target
= ucol_open("root", &status
);
4149 log_verbose("Testing root\n");
4150 if(!ucol_equals(source
, source
)) {
4151 log_err("Same collator not equal\n");
4153 if(TestEqualsForCollator(locName
, source
, target
)) {
4154 log_err("Errors for root\n", locName
);
4158 for(i
= 0; i
<noOfLoc
; i
++) {
4159 status
= U_ZERO_ERROR
;
4160 locName
= uloc_getAvailable(i
);
4161 /*if(hasCollationElements(locName)) {*/
4162 log_verbose("Testing equality for locale %s\n", locName
);
4163 source
= ucol_open(locName
, &status
);
4164 target
= ucol_open(locName
, &status
);
4165 if (U_FAILURE(status
)) {
4166 log_err("Error opening collator for locale %s %s\n", locName
, u_errorName(status
));
4169 if(TestEqualsForCollator(locName
, source
, target
)) {
4170 log_err("Errors for locale %s\n", locName
);
4177 static void TestJ2726(void) {
4178 UChar a
[2] = { 0x61, 0x00 }; /*"a"*/
4179 UChar aSpace
[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4180 UChar spaceA
[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4181 UErrorCode status
= U_ZERO_ERROR
;
4182 UCollator
*coll
= ucol_open("en", &status
);
4183 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
4184 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4185 doTest(coll
, a
, aSpace
, UCOL_EQUAL
);
4186 doTest(coll
, aSpace
, a
, UCOL_EQUAL
);
4187 doTest(coll
, a
, spaceA
, UCOL_EQUAL
);
4188 doTest(coll
, spaceA
, a
, UCOL_EQUAL
);
4189 doTest(coll
, spaceA
, aSpace
, UCOL_EQUAL
);
4190 doTest(coll
, aSpace
, spaceA
, UCOL_EQUAL
);
4194 static void NullRule(void) {
4196 UErrorCode status
= U_ZERO_ERROR
;
4197 UCollator
*coll
= ucol_openRules(r
, 1, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
4198 if(U_SUCCESS(status
)) {
4199 log_err("This should have been an error!\n");
4202 status
= U_ZERO_ERROR
;
4204 coll
= ucol_openRules(r
, 0, UCOL_DEFAULT
, UCOL_DEFAULT
, NULL
, &status
);
4205 if(U_FAILURE(status
)) {
4206 log_err_status(status
, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status
));
4213 * Test for CollationElementIterator previous and next for the whole set of
4214 * unicode characters with normalization on.
4216 static void TestNumericCollation(void)
4218 UErrorCode status
= U_ZERO_ERROR
;
4220 const static char *basicTestStrings
[]={
4233 const static char *preZeroTestStrings
[]={
4241 "avery000000010000",
4244 const static char *thirtyTwoBitNumericStrings
[]={
4251 const static char *longNumericStrings
[]={
4252 /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4253 In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4254 are treated as multiple collation elements. */
4255 "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4256 "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4257 "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4258 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4259 "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4260 "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4261 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4262 "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4263 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4264 "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4267 const static char *supplementaryDigits
[] = {
4268 "\\uD835\\uDFCE", /* 0 */
4269 "\\uD835\\uDFCF", /* 1 */
4270 "\\uD835\\uDFD0", /* 2 */
4271 "\\uD835\\uDFD1", /* 3 */
4272 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4273 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4274 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4275 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4276 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4277 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4280 const static char *foreignDigits
[] = {
4295 const static char *evenZeroes
[] = {
4302 UColAttribute att
= UCOL_NUMERIC_COLLATION
;
4303 UColAttributeValue val
= UCOL_ON
;
4305 /* Open our collator. */
4306 UCollator
* coll
= ucol_open("root", &status
);
4307 if (U_FAILURE(status
)){
4308 log_err_status(status
, "ERROR: in using ucol_open() -> %s\n",
4309 myErrorName(status
));
4312 genericLocaleStarterWithOptions("root", basicTestStrings
, sizeof(basicTestStrings
)/sizeof(basicTestStrings
[0]), &att
, &val
, 1);
4313 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings
, sizeof(thirtyTwoBitNumericStrings
)/sizeof(thirtyTwoBitNumericStrings
[0]), &att
, &val
, 1);
4314 genericLocaleStarterWithOptions("root", longNumericStrings
, sizeof(longNumericStrings
)/sizeof(longNumericStrings
[0]), &att
, &val
, 1);
4315 genericLocaleStarterWithOptions("en_US", foreignDigits
, sizeof(foreignDigits
)/sizeof(foreignDigits
[0]), &att
, &val
, 1);
4316 genericLocaleStarterWithOptions("root", supplementaryDigits
, sizeof(supplementaryDigits
)/sizeof(supplementaryDigits
[0]), &att
, &val
, 1);
4317 genericLocaleStarterWithOptions("root", evenZeroes
, sizeof(evenZeroes
)/sizeof(evenZeroes
[0]), &att
, &val
, 1);
4319 /* Setting up our collator to do digits. */
4320 ucol_setAttribute(coll
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
4321 if (U_FAILURE(status
)){
4322 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4323 myErrorName(status
));
4328 Testing that prepended zeroes still yield the correct collation behavior.
4329 We expect that every element in our strings array will be equal.
4331 genericOrderingTestWithResult(coll
, preZeroTestStrings
, sizeof(preZeroTestStrings
)/sizeof(preZeroTestStrings
[0]), UCOL_EQUAL
);
4336 static void TestTibetanConformance(void)
4338 const char* test
[] = {
4339 "\\u0FB2\\u0591\\u0F71\\u0061",
4340 "\\u0FB2\\u0F71\\u0061"
4343 UErrorCode status
= U_ZERO_ERROR
;
4344 UCollator
*coll
= ucol_open("", &status
);
4348 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4349 if (U_SUCCESS(status
)) {
4350 u_unescape(test
[0], source
, 100);
4351 u_unescape(test
[1], target
, 100);
4352 doTest(coll
, source
, target
, UCOL_EQUAL
);
4353 result
= ucol_strcoll(coll
, source
, -1, target
, -1);
4354 log_verbose("result %d\n", result
);
4355 if (UCOL_EQUAL
!= result
) {
4356 log_err("Tibetan comparison error\n");
4361 genericLocaleStarterWithResult("", test
, 2, UCOL_EQUAL
);
4364 static void TestPinyinProblem(void) {
4365 static const char *test
[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4366 genericLocaleStarter("zh__PINYIN", test
, sizeof(test
)/sizeof(test
[0]));
4369 #define TST_UCOL_MAX_INPUT 0x220001
4370 #define topByte 0xFF000000;
4371 #define bottomByte 0xFF;
4372 #define fourBytes 0xFFFFFFFF;
4375 static void showImplicit(UChar32 i
) {
4376 if (i
>= 0 && i
<= TST_UCOL_MAX_INPUT
) {
4377 log_verbose("%08X\t%08X\n", i
, uprv_uca_getImplicitFromRaw(i
));
4381 static void TestImplicitGeneration(void) {
4382 UErrorCode status
= U_ZERO_ERROR
;
4385 UChar32 i
= 0, j
= 0;
4386 UChar32 roundtrip
= 0;
4387 UChar32 lastBottom
= 0;
4388 UChar32 currentBottom
= 0;
4389 UChar32 lastTop
= 0;
4390 UChar32 currentTop
= 0;
4392 UCollator
*coll
= ucol_open("root", &status
);
4393 if(U_FAILURE(status
)) {
4394 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
4398 uprv_uca_getRawFromImplicit(0xE20303E7);
4400 for (i
= 0; i
<= TST_UCOL_MAX_INPUT
; ++i
) {
4401 current
= uprv_uca_getImplicitFromRaw(i
) & fourBytes
;
4403 /* check that it round-trips AND that all intervening ones are illegal*/
4404 roundtrip
= uprv_uca_getRawFromImplicit(current
);
4405 if (roundtrip
!= i
) {
4406 log_err("No roundtrip %08X\n", i
);
4409 for (j
= last
+ 1; j
< current
; ++j
) {
4410 roundtrip
= uprv_uca_getRawFromImplicit(j
);
4411 /* raise an error if it *doesn't* find an error*/
4412 if (roundtrip
!= -1) {
4413 log_err("Fails to recognize illegal %08X\n", j
);
4417 /* now do other consistency checks*/
4418 lastBottom
= last
& bottomByte
;
4419 currentBottom
= current
& bottomByte
;
4420 lastTop
= last
& topByte
;
4421 currentTop
= current
& topByte
;
4423 /* print out some values for spot-checking*/
4424 if (lastTop
!= currentTop
|| i
== 0x10000 || i
== 0x110000) {
4434 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i
)) != i
) {
4435 log_err("No raw <-> code point roundtrip for 0x%08X\n", i
);
4438 showImplicit(TST_UCOL_MAX_INPUT
-2);
4439 showImplicit(TST_UCOL_MAX_INPUT
-1);
4440 showImplicit(TST_UCOL_MAX_INPUT
);
4445 * Iterate through the given iterator, checking to see that all the strings
4446 * in the expected array are present.
4447 * @param expected array of strings we expect to see, or NULL
4448 * @param expectedCount number of elements of expected, or 0
4450 static int32_t checkUEnumeration(const char* msg
,
4452 const char** expected
,
4453 int32_t expectedCount
) {
4454 UErrorCode ec
= U_ZERO_ERROR
;
4455 int32_t i
= 0, n
, j
, bit
;
4456 int32_t seenMask
= 0;
4458 U_ASSERT(expectedCount
>= 0 && expectedCount
< 31); /* [sic] 31 not 32 */
4459 n
= uenum_count(iter
, &ec
);
4460 if (!assertSuccess("count", &ec
)) return -1;
4461 log_verbose("%s = [", msg
);
4463 const char* s
= uenum_next(iter
, NULL
, &ec
);
4464 if (!assertSuccess("snext", &ec
) || s
== NULL
) break;
4465 if (i
!= 0) log_verbose(",");
4466 log_verbose("%s", s
);
4467 /* check expected list */
4468 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
4469 if ((seenMask
&bit
) == 0 &&
4470 uprv_strcmp(s
, expected
[j
]) == 0) {
4476 log_verbose("] (%d)\n", i
);
4477 assertTrue("count verified", i
==n
);
4478 /* did we see all expected strings? */
4479 for (j
=0, bit
=1; j
<expectedCount
; ++j
, bit
<<=1) {
4480 if ((seenMask
&bit
)!=0) {
4481 log_verbose("Ok: \"%s\" seen\n", expected
[j
]);
4483 log_err("FAIL: \"%s\" not seen\n", expected
[j
]);
4490 * Test new API added for separate collation tree.
4492 static void TestSeparateTrees(void) {
4493 UErrorCode ec
= U_ZERO_ERROR
;
4494 UEnumeration
*e
= NULL
;
4499 static const char* AVAIL
[] = { "en", "de" };
4501 static const char* KW
[] = { "collation" };
4503 static const char* KWVAL
[] = { "phonebook", "stroke" };
4505 #if !UCONFIG_NO_SERVICE
4506 e
= ucol_openAvailableLocales(&ec
);
4508 assertSuccess("ucol_openAvailableLocales", &ec
);
4509 assertTrue("ucol_openAvailableLocales!=0", e
!=0);
4510 n
= checkUEnumeration("ucol_openAvailableLocales", e
, AVAIL
, LEN(AVAIL
));
4511 /* Don't need to check n because we check list */
4514 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec
));
4518 e
= ucol_getKeywords(&ec
);
4520 assertSuccess("ucol_getKeywords", &ec
);
4521 assertTrue("ucol_getKeywords!=0", e
!=0);
4522 n
= checkUEnumeration("ucol_getKeywords", e
, KW
, LEN(KW
));
4523 /* Don't need to check n because we check list */
4526 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec
));
4529 e
= ucol_getKeywordValues(KW
[0], &ec
);
4531 assertSuccess("ucol_getKeywordValues", &ec
);
4532 assertTrue("ucol_getKeywordValues!=0", e
!=0);
4533 n
= checkUEnumeration("ucol_getKeywordValues", e
, KWVAL
, LEN(KWVAL
));
4534 /* Don't need to check n because we check list */
4537 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec
));
4540 /* Try setting a warning before calling ucol_getKeywordValues */
4541 ec
= U_USING_FALLBACK_WARNING
;
4542 e
= ucol_getKeywordValues(KW
[0], &ec
);
4543 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec
)) {
4544 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e
!=0);
4545 n
= checkUEnumeration("ucol_getKeywordValues [with warning code set]", e
, KWVAL
, LEN(KWVAL
));
4546 /* Don't need to check n because we check list */
4551 U_DRAFT int32_t U_EXPORT2
4552 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4553 const char* locale, UBool* isAvailable,
4554 UErrorCode* status);
4557 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de",
4559 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
4560 assertEquals("getFunctionalEquivalent(de)", "de", loc
);
4561 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4562 isAvailable
== TRUE
);
4565 n
= ucol_getFunctionalEquivalent(loc
, sizeof(loc
), "collation", "de_DE",
4567 if (assertSuccess("getFunctionalEquivalent", &ec
)) {
4568 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc
);
4569 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4570 isAvailable
== TRUE
);
4574 /* supercedes TestJ784 */
4575 static void TestBeforePinyin(void) {
4576 const static char rules
[] = {
4577 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4578 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4579 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4580 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4581 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4582 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4585 const static char *test
[] = {
4596 const static char *test2
[] = {
4629 genericRulesStarter(rules
, test
, sizeof(test
)/sizeof(test
[0]));
4630 genericLocaleStarter("zh", test
, sizeof(test
)/sizeof(test
[0]));
4631 genericRulesStarter(rules
, test2
, sizeof(test2
)/sizeof(test2
[0]));
4632 genericLocaleStarter("zh", test2
, sizeof(test2
)/sizeof(test2
[0]));
4635 static void TestBeforeTightening(void) {
4636 static const struct {
4638 UErrorCode expectedStatus
;
4640 { "&[before 1]a<x", U_ZERO_ERROR
},
4641 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR
},
4642 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR
},
4643 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR
},
4644 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR
},
4645 { "&[before 2]a<<x",U_ZERO_ERROR
},
4646 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR
},
4647 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR
},
4648 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR
},
4649 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR
},
4650 { "&[before 3]a<<<x",U_ZERO_ERROR
},
4651 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR
},
4652 { "&[before I]a = x",U_INVALID_FORMAT_ERROR
}
4657 UErrorCode status
= U_ZERO_ERROR
;
4658 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
4661 UCollator
*coll
= NULL
;
4664 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
4665 rlen
= u_unescape(tests
[i
].rules
, rlz
, RULE_BUFFER_LEN
);
4666 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
4667 if(status
!= tests
[i
].expectedStatus
) {
4668 log_err_status(status
, "Opening a collator with rules %s returned error code %s, expected %s\n",
4669 tests
[i
].rules
, u_errorName(status
), u_errorName(tests
[i
].expectedStatus
));
4672 status
= U_ZERO_ERROR
;
4679 &[before
1] a
< x
<<< X
<< q
<<< Q
< z
4680 assert: m
<<< M
< x
<<< X
<< q
<<< Q
< z
< a
< n
4683 &[before
2] a
<< x
<<< X
<< q
<<< Q
< z
4684 assert: m
<<< M
< x
<<< X
<< q
<<< Q
<< a
< z
< n
4687 &[before
3] a
<<< x
<<< X
<< q
<<< Q
< z
4688 assert: m
<<< M
< x
<<< X
<<< a
<< q
<<< Q
< z
< n
4692 &[before
1] a
< x
<<< X
<< q
<<< Q
< z
4693 assert: x
<<< X
<< q
<<< Q
< z
< m
<<< M
<< a
< n
4696 &[before
2] a
<< x
<<< X
<< q
<<< Q
< z
4697 assert: m
<<< M
<< x
<<< X
<< q
<<< Q
<< a
< z
< n
4700 &[before
3] a
<<< x
<<< X
<< q
<<< Q
< z
4701 assert: m
<<< M
<< x
<<< X
<<< a
<< q
<<< Q
< z
< n
4705 &[before
1] a
< x
<<< X
<< q
<<< Q
< z
4706 assert: x
<<< X
<< q
<<< Q
< z
< n
< m
<<< a
<<< M
4709 &[before
2] a
<< x
<<< X
<< q
<<< Q
< z
4710 assert: x
<<< X
<< q
<<< Q
<< m
<<< a
<<< M
< z
< n
4713 &[before
3] a
<<< x
<<< X
<< q
<<< Q
< z
4714 assert: m
<<< x
<<< X
<<< a
<<< M
<< q
<<< Q
< z
< n
4717 &[before
1] s
< x
<<< X
<< q
<<< Q
< z
4718 assert: r
<<< R
< x
<<< X
<< q
<<< Q
< z
< s
< n
4720 &[before
2] s
<< x
<<< X
<< q
<<< Q
< z
4721 assert: r
<<< R
< x
<<< X
<< q
<<< Q
<< s
< z
< n
4723 &[before
3] s
<<< x
<<< X
<< q
<<< Q
< z
4724 assert: r
<<< R
< x
<<< X
<<< s
<< q
<<< Q
< z
< n
4727 &[before
1] \u24DC < x
<<< X
<< q
<<< Q
< z
4728 assert: x
<<< X
<< q
<<< Q
< z
< n
< m
<<< \u24DC <<< M
4730 &[before
2] \u24DC << x
<<< X
<< q
<<< Q
< z
4731 assert: x
<<< X
<< q
<<< Q
<< m
<<< \u24DC <<< M
< z
< n
4733 &[before
3] \u24DC <<< x
<<< X
<< q
<<< Q
< z
4734 assert: m
<<< x
<<< X
<<< \u24DC <<< M
<< q
<<< Q
< z
< n
4739 /* requires features not yet supported */
4740 static void TestMoreBefore(void) {
4741 static const struct {
4743 const char* order
[16];
4746 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4747 { "m","M","x","X","q","Q","z","a","n" }, 9},
4748 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4749 { "m","M","x","X","q","Q","a","z","n" }, 9},
4750 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4751 { "m","M","x","X","a","q","Q","z","n" }, 9},
4752 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4753 { "x","X","q","Q","z","m","M","a","n" }, 9},
4754 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4755 { "m","M","x","X","q","Q","a","z","n" }, 9},
4756 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4757 { "m","M","x","X","a","q","Q","z","n" }, 9},
4758 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4759 { "x","X","q","Q","z","n","m","a","M" }, 9},
4760 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4761 { "x","X","q","Q","m","a","M","z","n" }, 9},
4762 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4763 { "m","x","X","a","M","q","Q","z","n" }, 9},
4764 { "&[before 1] s < x <<< X << q <<< Q < z",
4765 { "r","R","x","X","q","Q","z","s","n" }, 9},
4766 { "&[before 2] s << x <<< X << q <<< Q < z",
4767 { "r","R","x","X","q","Q","s","z","n" }, 9},
4768 { "&[before 3] s <<< x <<< X << q <<< Q < z",
4769 { "r","R","x","X","s","q","Q","z","n" }, 9},
4770 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4771 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4772 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4773 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4774 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4775 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4780 for(i
= 0; i
< sizeof(tests
)/sizeof(tests
[0]); i
++) {
4781 genericRulesStarter(tests
[i
].rules
, tests
[i
].order
, tests
[i
].size
);
4786 static void TestTailorNULL( void ) {
4787 const static char* rule
= "&a <<< '\\u0000'";
4788 UErrorCode status
= U_ZERO_ERROR
;
4789 UChar rlz
[RULE_BUFFER_LEN
] = { 0 };
4791 UChar a
= 1, null
= 0;
4792 UCollationResult res
= UCOL_EQUAL
;
4794 UCollator
*coll
= NULL
;
4797 rlen
= u_unescape(rule
, rlz
, RULE_BUFFER_LEN
);
4798 coll
= ucol_openRules(rlz
, rlen
, UCOL_DEFAULT
, UCOL_DEFAULT
,NULL
, &status
);
4800 if(U_FAILURE(status
)) {
4801 log_err_status(status
, "Could not open default collator! -> %s\n", u_errorName(status
));
4803 res
= ucol_strcoll(coll
, &a
, 1, &null
, 1);
4805 if(res
!= UCOL_LESS
) {
4806 log_err("NULL was not tailored properly!\n");
4814 TestUpperFirstQuaternary(void)
4816 const char* tests
[] = { "B", "b", "Bb", "bB" };
4817 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_FIRST
};
4818 UColAttributeValue attVals
[] = { UCOL_QUATERNARY
, UCOL_UPPER_FIRST
};
4819 genericLocaleStarterWithOptions("root", tests
, sizeof(tests
)/sizeof(tests
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]));
4825 const char* tests
[] = { "\\u00e2T", "aT" };
4826 UColAttribute att
[] = { UCOL_STRENGTH
, UCOL_CASE_LEVEL
};
4827 UColAttributeValue attVals
[] = { UCOL_PRIMARY
, UCOL_ON
};
4828 const char* tests2
[] = { "a", "A" };
4829 const char* rule
= "&[first tertiary ignorable]=A=a";
4830 UColAttribute att2
[] = { UCOL_CASE_LEVEL
};
4831 UColAttributeValue attVals2
[] = { UCOL_ON
};
4832 /* Test whether we correctly ignore primary ignorables on case level when */
4833 /* we have only primary & case level */
4834 genericLocaleStarterWithOptionsAndResult("root", tests
, sizeof(tests
)/sizeof(tests
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]), UCOL_EQUAL
);
4835 /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4836 /* and case level */
4837 genericLocaleStarterWithOptions("root", tests2
, sizeof(tests2
)/sizeof(tests2
[0]), att
, attVals
, sizeof(att
)/sizeof(att
[0]));
4838 /* Test whether completely ignorable letters have case level info (they shouldn't) */
4839 genericRulesStarterWithOptionsAndResult(rule
, tests2
, sizeof(tests2
)/sizeof(tests2
[0]), att2
, attVals2
, sizeof(att2
)/sizeof(att2
[0]), UCOL_EQUAL
);
4845 static const char *test
= "this is a test string";
4847 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
4848 unsigned char sortkey
[256];
4849 int32_t sortkey_length
;
4850 UErrorCode status
= U_ZERO_ERROR
;
4851 static UCollator
*coll
= NULL
;
4852 coll
= ucol_open("root", &status
);
4853 if(U_FAILURE(status
)) {
4854 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
4857 ucol_setStrength(coll
, UCOL_PRIMARY
);
4858 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
4859 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
4860 if (U_FAILURE(status
)) {
4861 log_err("Failed setting atributes\n");
4864 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, NULL
, 0);
4865 if (sortkey_length
> 256) return;
4867 /* we mark the position where the null byte should be written in advance */
4868 sortkey
[sortkey_length
-1] = 0xAA;
4870 /* we set the buffer size one byte higher than needed */
4871 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
4874 /* no error occurs (for me) */
4875 if (sortkey
[sortkey_length
-1] == 0xAA) {
4876 log_err("Hit bug at first try\n");
4879 /* we mark the position where the null byte should be written again */
4880 sortkey
[sortkey_length
-1] = 0xAA;
4882 /* this time we set the buffer size to the exact amount needed */
4883 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, sortkey
,
4886 /* now the trailing null byte is not written */
4887 if (sortkey
[sortkey_length
-1] == 0xAA) {
4888 log_err("Hit bug at second try\n");
4894 /* Regression test for Thai partial sort key problem */
4898 const static char *test
[] = {
4899 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4900 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4903 genericLocaleStarter("th", test
, sizeof(test
)/sizeof(test
[0]));
4909 const static char *test
[] = { "a", "y" };
4910 const char* rules
= "&Ny << Y &[first secondary ignorable] <<< a";
4911 genericRulesStarter(rules
, test
, sizeof(test
)/sizeof(test
[0]));
4917 UErrorCode status
= U_ZERO_ERROR
;
4919 UCollator
*coll
=NULL
;
4920 uint8_t resColl
[100], expColl
[100];
4921 int32_t rLen
, tLen
, ruleLen
, sLen
, kLen
;
4922 UChar rule
[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypogegrammeni*/
4923 UChar rule2
[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/
4924 UChar rule3
[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circumflex.*/
4925 static const UChar tData
[][20]={
4927 {0x0041, 0x0323, 0x0302, 0},
4928 {0x1EA0, 0x0302, 0},
4929 {0x00C2, 0x0323, 0},
4930 {0x1ED8, 0}, /* O with dot and circumflex */
4931 {0x1ECC, 0x0302, 0},
4933 {0x1EA1, 0x0306, 0},
4935 static const UChar tailorData
[][20]={
4936 {0x1FA2, 0}, /* Omega with 3 combining marks */
4937 {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4938 {0x1FF3, 0x0313, 0x0300, 0},
4939 {0x1F60, 0x0300, 0x0345, 0},
4940 {0x1F62, 0x0345, 0},
4941 {0x1FA0, 0x0300, 0},
4943 static const UChar tailorData2
[][20]={
4944 {0x1E63, 0x030C, 0}, /* s with dot below + caron */
4945 {0x0073, 0x0323, 0x030C, 0},
4946 {0x0073, 0x030C, 0x0323, 0},
4948 static const UChar tailorData3
[][20]={
4949 {0x007a, 0}, /* z */
4950 {0x0061, 0x0065, 0}, /* a + e */
4951 {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4952 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */
4953 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4954 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot below */
4955 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */
4956 {0x00EA, 0}, /* e with circumflex */
4959 /* Test Vietnamese sort. */
4960 coll
= ucol_open("vi", &status
);
4961 if(U_FAILURE(status
)) {
4962 log_err_status(status
, "Couldn't open collator -> %s\n", u_errorName(status
));
4965 log_verbose("\n\nVI collation:");
4966 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[2], u_strlen(tData
[2])) ) {
4967 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4969 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[3], u_strlen(tData
[3])) ) {
4970 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4972 if ( !ucol_equal(coll
, tData
[5], u_strlen(tData
[5]), tData
[4], u_strlen(tData
[4])) ) {
4973 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4975 if ( !ucol_equal(coll
, tData
[7], u_strlen(tData
[7]), tData
[6], u_strlen(tData
[6])) ) {
4976 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4979 for (j
=0; j
<8; j
++) {
4980 tLen
= u_strlen(tData
[j
]);
4981 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
4982 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
4983 for(i
= 0; i
<rLen
; i
++) {
4984 log_verbose(" %02X", resColl
[i
]);
4990 /* Test Romanian sort. */
4991 coll
= ucol_open("ro", &status
);
4992 log_verbose("\n\nRO collation:");
4993 if ( !ucol_equal(coll
, tData
[0], u_strlen(tData
[0]), tData
[1], u_strlen(tData
[1])) ) {
4994 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4996 if ( !ucol_equal(coll
, tData
[4], u_strlen(tData
[4]), tData
[5], u_strlen(tData
[5])) ) {
4997 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4999 if ( !ucol_equal(coll
, tData
[6], u_strlen(tData
[6]), tData
[7], u_strlen(tData
[7])) ) {
5000 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5003 for (j
=4; j
<8; j
++) {
5004 tLen
= u_strlen(tData
[j
]);
5005 log_verbose("\n Data :%s \tlen: %d key: ", tData
[j
], tLen
);
5006 rLen
= ucol_getSortKey(coll
, tData
[j
], tLen
, resColl
, 100);
5007 for(i
= 0; i
<rLen
; i
++) {
5008 log_verbose(" %02X", resColl
[i
]);
5013 /* Test the precomposed Greek character with 3 combining marks. */
5014 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5015 ruleLen
= u_strlen(rule
);
5016 coll
= ucol_openRules(rule
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5017 if (U_FAILURE(status
)) {
5018 log_err("ucol_openRules failed with %s\n", u_errorName(status
));
5021 sLen
= u_strlen(tailorData
[0]);
5022 for (j
=1; j
<6; j
++) {
5023 tLen
= u_strlen(tailorData
[j
]);
5024 if ( !ucol_equal(coll
, tailorData
[0], sLen
, tailorData
[j
], tLen
)) {
5025 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j
, tailorData
[j
]);
5028 /* Test getSortKey. */
5029 tLen
= u_strlen(tailorData
[0]);
5030 kLen
=ucol_getSortKey(coll
, tailorData
[0], tLen
, expColl
, 100);
5031 for (j
=0; j
<6; j
++) {
5032 tLen
= u_strlen(tailorData
[j
]);
5033 rLen
= ucol_getSortKey(coll
, tailorData
[j
], tLen
, resColl
, 100);
5034 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
5035 log_err("\n Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5036 for(i
= 0; i
<rLen
; i
++) {
5037 log_err(" %02X", resColl
[i
]);
5043 log_verbose("\n\nTailoring test for s with caron:");
5044 ruleLen
= u_strlen(rule2
);
5045 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5046 tLen
= u_strlen(tailorData2
[0]);
5047 kLen
=ucol_getSortKey(coll
, tailorData2
[0], tLen
, expColl
, 100);
5048 for (j
=1; j
<3; j
++) {
5049 tLen
= u_strlen(tailorData2
[j
]);
5050 rLen
= ucol_getSortKey(coll
, tailorData2
[j
], tLen
, resColl
, 100);
5051 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
5052 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5053 for(i
= 0; i
<rLen
; i
++) {
5054 log_err(" %02X", resColl
[i
]);
5060 log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5061 ruleLen
= u_strlen(rule3
);
5062 coll
= ucol_openRules(rule3
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5063 tLen
= u_strlen(tailorData3
[3]);
5064 kLen
=ucol_getSortKey(coll
, tailorData3
[3], tLen
, expColl
, 100);
5065 for (j
=4; j
<6; j
++) {
5066 tLen
= u_strlen(tailorData3
[j
]);
5067 rLen
= ucol_getSortKey(coll
, tailorData3
[j
], tLen
, resColl
, 100);
5069 if ( kLen
!=rLen
|| uprv_memcmp(expColl
, resColl
, rLen
*sizeof(uint8_t))!=0 ) {
5070 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5071 for(i
= 0; i
<rLen
; i
++) {
5072 log_err(" %02X", resColl
[i
]);
5076 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j
, tailorData
[j
], tLen
);
5077 for(i
= 0; i
<rLen
; i
++) {
5078 log_verbose(" %02X", resColl
[i
]);
5085 TestTailor6179(void)
5087 UErrorCode status
= U_ZERO_ERROR
;
5089 UCollator
*coll
=NULL
;
5090 uint8_t resColl
[100];
5091 int32_t rLen
, tLen
, ruleLen
;
5092 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */
5093 static const UChar rule1
[]={
5094 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5095 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5096 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5097 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5098 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5099 static const UChar rule2
[]={
5100 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5101 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5102 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5103 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5104 0x3C,0x3C,0x20,0x62,0};
5106 static const UChar tData1
[][4]={
5111 static const UChar tData2
[][4]={
5118 * These values from FractionalUCA.txt will change,
5119 * and need to be updated here.
5121 static const uint8_t firstPrimaryIgnCE
[]={1, 0x88, 1, 5, 0};
5122 static const uint8_t lastPrimaryIgnCE
[]={1, 0xE3, 1, 5, 0};
5123 static const uint8_t firstSecondaryIgnCE
[]={1, 1, 0xbf, 0x04, 0};
5124 static const uint8_t lastSecondaryIgnCE
[]={1, 1, 0xbf, 0x04, 0};
5126 /* Test [Last Primary ignorable] */
5128 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary ignorable]<<b\n");
5129 ruleLen
= u_strlen(rule1
);
5130 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5131 if (U_FAILURE(status
)) {
5132 log_err_status(status
, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status
));
5135 tLen
= u_strlen(tData1
[0]);
5136 rLen
= ucol_getSortKey(coll
, tData1
[0], tLen
, resColl
, 100);
5137 if (rLen
!= LEN(lastPrimaryIgnCE
) || uprv_memcmp(resColl
, lastPrimaryIgnCE
, rLen
) != 0) {
5138 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1
[0], rLen
);
5139 for(i
= 0; i
<rLen
; i
++) {
5140 log_err(" %02X", resColl
[i
]);
5144 tLen
= u_strlen(tData1
[1]);
5145 rLen
= ucol_getSortKey(coll
, tData1
[1], tLen
, resColl
, 100);
5146 if (rLen
!= LEN(firstPrimaryIgnCE
) || uprv_memcmp(resColl
, firstPrimaryIgnCE
, rLen
) != 0) {
5147 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1
[1], rLen
);
5148 for(i
= 0; i
<rLen
; i
++) {
5149 log_err(" %02X", resColl
[i
]);
5156 /* Test [Last Secondary ignorable] */
5157 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first secondary ignorable]<<<b\n");
5158 ruleLen
= u_strlen(rule1
);
5159 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5160 if (U_FAILURE(status
)) {
5161 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status
));
5164 tLen
= u_strlen(tData2
[0]);
5165 rLen
= ucol_getSortKey(coll
, tData2
[0], tLen
, resColl
, 100);
5166 if (rLen
!= LEN(lastSecondaryIgnCE
) || uprv_memcmp(resColl
, lastSecondaryIgnCE
, rLen
) != 0) {
5167 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData2
[0], rLen
);
5168 for(i
= 0; i
<rLen
; i
++) {
5169 log_err(" %02X", resColl
[i
]);
5173 if(isICUVersionAtLeast(50, 0, 0)) { /* TODO: debug & fix, see ticket #8982 */
5174 tLen
= u_strlen(tData2
[1]);
5175 rLen
= ucol_getSortKey(coll
, tData2
[1], tLen
, resColl
, 100);
5176 if (rLen
!= LEN(firstSecondaryIgnCE
) || uprv_memcmp(resColl
, firstSecondaryIgnCE
, rLen
) != 0) {
5177 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2
[1], rLen
);
5178 for(i
= 0; i
<rLen
; i
++) {
5179 log_err(" %02X", resColl
[i
]);
5188 TestUCAPrecontext(void)
5190 UErrorCode status
= U_ZERO_ERROR
;
5192 UCollator
*coll
=NULL
;
5193 uint8_t resColl
[100], prevColl
[100];
5194 int32_t rLen
, tLen
, ruleLen
;
5195 UChar rule1
[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5196 UChar rule2
[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5197 /* & l middle-dot << a a is an expansion. */
5199 UChar tData1
[][20]={
5200 { 0xb7, 0}, /* standalone middle dot(0xb7) */
5201 { 0x387, 0}, /* standalone middle dot(0x387) */
5204 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */
5205 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */
5206 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5207 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */
5208 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5209 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */
5210 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */
5213 log_verbose("\n\nEN collation:");
5214 coll
= ucol_open("en", &status
);
5215 if (U_FAILURE(status
)) {
5216 log_err_status(status
, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status
));
5219 for (j
=0; j
<11; j
++) {
5220 tLen
= u_strlen(tData1
[j
]);
5221 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5222 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5223 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5226 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5227 for(i
= 0; i
<rLen
; i
++) {
5228 log_verbose(" %02X", resColl
[i
]);
5230 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5235 log_verbose("\n\nJA collation:");
5236 coll
= ucol_open("ja", &status
);
5237 if (U_FAILURE(status
)) {
5238 log_err("Tailoring test: &z <<a|- failed!");
5241 for (j
=0; j
<11; j
++) {
5242 tLen
= u_strlen(tData1
[j
]);
5243 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5244 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5245 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5248 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5249 for(i
= 0; i
<rLen
; i
++) {
5250 log_verbose(" %02X", resColl
[i
]);
5252 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5257 log_verbose("\n\nTailoring test: & middle dot < a ");
5258 ruleLen
= u_strlen(rule1
);
5259 coll
= ucol_openRules(rule1
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5260 if (U_FAILURE(status
)) {
5261 log_err("Tailoring test: & middle dot < a failed!");
5264 for (j
=0; j
<11; j
++) {
5265 tLen
= u_strlen(tData1
[j
]);
5266 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5267 if ((j
>0) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5268 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5271 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5272 for(i
= 0; i
<rLen
; i
++) {
5273 log_verbose(" %02X", resColl
[i
]);
5275 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5280 log_verbose("\n\nTailoring test: & l middle-dot << a ");
5281 ruleLen
= u_strlen(rule2
);
5282 coll
= ucol_openRules(rule2
, ruleLen
, UCOL_OFF
, UCOL_TERTIARY
, NULL
,&status
);
5283 if (U_FAILURE(status
)) {
5284 log_err("Tailoring test: & l middle-dot << a failed!");
5287 for (j
=0; j
<11; j
++) {
5288 tLen
= u_strlen(tData1
[j
]);
5289 rLen
= ucol_getSortKey(coll
, tData1
[j
], tLen
, resColl
, 100);
5290 if ((j
>0) && (j
!=3) && (strcmp((char *)resColl
, (char *)prevColl
)<0)) {
5291 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5294 if ((j
==3)&&(strcmp((char *)resColl
, (char *)prevColl
)>0)) {
5295 log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5298 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j
, tData1
[j
], rLen
);
5299 for(i
= 0; i
<rLen
; i
++) {
5300 log_verbose(" %02X", resColl
[i
]);
5302 uprv_memcpy(prevColl
, resColl
, sizeof(uint8_t)*(rLen
+1));
5308 TestOutOfBuffer5468(void)
5310 static const char *test
= "\\u4e00";
5312 int32_t ustr_length
= u_unescape(test
, ustr
, 256);
5313 unsigned char shortKeyBuf
[1];
5314 int32_t sortkey_length
;
5315 UErrorCode status
= U_ZERO_ERROR
;
5316 static UCollator
*coll
= NULL
;
5318 coll
= ucol_open("root", &status
);
5319 if(U_FAILURE(status
)) {
5320 log_err_status(status
, "Couldn't open UCA -> %s\n", u_errorName(status
));
5323 ucol_setStrength(coll
, UCOL_PRIMARY
);
5324 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
5325 ucol_setAttribute(coll
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
5326 if (U_FAILURE(status
)) {
5327 log_err("Failed setting atributes\n");
5331 sortkey_length
= ucol_getSortKey(coll
, ustr
, ustr_length
, shortKeyBuf
, sizeof(shortKeyBuf
));
5332 if (sortkey_length
!= 4) {
5333 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length
);
5335 log_verbose("length of sortKey is %d", sortkey_length
);
5339 #define TSKC_DATA_SIZE 5
5340 #define TSKC_BUF_SIZE 50
5342 TestSortKeyConsistency(void)
5344 UErrorCode icuRC
= U_ZERO_ERROR
;
5346 UChar data
[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5348 uint8_t bufFull
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
5349 uint8_t bufPart
[TSKC_DATA_SIZE
][TSKC_BUF_SIZE
];
5352 ucol
= ucol_openFromShortString("LEN_S4", FALSE
, NULL
, &icuRC
);
5353 if (U_FAILURE(icuRC
))
5355 log_err_status(icuRC
, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC
));
5359 for (i
= 0; i
< TSKC_DATA_SIZE
; i
++)
5361 UCharIterator uiter
;
5362 uint32_t state
[2] = { 0, 0 };
5363 int32_t dataLen
= i
+1;
5364 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
5365 bufFull
[i
][j
] = bufPart
[i
][j
] = 0;
5368 ucol_getSortKey(ucol
, data
, dataLen
, bufFull
[i
], TSKC_BUF_SIZE
);
5370 /* Partial sort key */
5371 uiter_setString(&uiter
, data
, dataLen
);
5372 ucol_nextSortKeyPart(ucol
, &uiter
, state
, bufPart
[i
], TSKC_BUF_SIZE
, &icuRC
);
5373 if (U_FAILURE(icuRC
))
5375 log_err("ucol_nextSortKeyPart failed\n");
5380 for (i2
=0; i2
<i
; i2
++)
5382 UBool fullMatch
= TRUE
;
5383 UBool partMatch
= TRUE
;
5384 for (j
=0; j
<TSKC_BUF_SIZE
; j
++)
5386 fullMatch
= fullMatch
&& (bufFull
[i
][j
] != bufFull
[i2
][j
]);
5387 partMatch
= partMatch
&& (bufPart
[i
][j
] != bufPart
[i2
][j
]);
5389 if (fullMatch
!= partMatch
) {
5390 log_err(fullMatch
? "full key was consistent, but partial key changed\n"
5391 : "partial key was consistent, but full key changed\n");
5398 /*=============================================*/
5403 static void TestCroatianSortKey(void) {
5404 const char* collString
= "LHR_AN_CX_EX_FX_HX_NX_S3";
5405 UErrorCode status
= U_ZERO_ERROR
;
5409 static const UChar text
[] = { 0x0044, 0xD81A };
5411 size_t length
= sizeof(text
)/sizeof(*text
);
5413 uint8_t textSortKey
[32];
5414 size_t lenSortKey
= 32;
5415 size_t actualSortKeyLen
;
5416 uint32_t uStateInfo
[2] = { 0, 0 };
5418 ucol
= ucol_openFromShortString(collString
, FALSE
, NULL
, &status
);
5419 if (U_FAILURE(status
)) {
5420 log_err_status(status
, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status
));
5424 uiter_setString(&iter
, text
, length
);
5426 actualSortKeyLen
= ucol_nextSortKeyPart(
5427 ucol
, &iter
, (uint32_t*)uStateInfo
,
5428 textSortKey
, lenSortKey
, &status
5431 if (actualSortKeyLen
== lenSortKey
) {
5432 log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5439 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5440 * they are both Hiragana and Katakana
5442 #define SORTKEYLEN 50
5443 static void TestHiragana(void) {
5444 UErrorCode status
= U_ZERO_ERROR
;
5446 UCollationResult strcollresult
;
5447 UChar data1
[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5448 UChar data2
[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5449 int32_t data1Len
= sizeof(data1
)/sizeof(*data1
);
5450 int32_t data2Len
= sizeof(data2
)/sizeof(*data2
);
5452 uint8_t sortKey1
[SORTKEYLEN
];
5453 uint8_t sortKey2
[SORTKEYLEN
];
5455 UCharIterator uiter1
;
5456 UCharIterator uiter2
;
5457 uint32_t state1
[2] = { 0, 0 };
5458 uint32_t state2
[2] = { 0, 0 };
5462 ucol
= ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE
, NULL
,
5464 if (U_FAILURE(status
)) {
5465 log_err_status(status
, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status
));
5469 /* Start of full sort keys */
5470 /* Full sort key1 */
5471 keySize1
= ucol_getSortKey(ucol
, data1
, data1Len
, sortKey1
, SORTKEYLEN
);
5472 /* Full sort key2 */
5473 keySize2
= ucol_getSortKey(ucol
, data2
, data2Len
, sortKey2
, SORTKEYLEN
);
5474 if (keySize1
== keySize2
) {
5475 for (i
= 0; i
< keySize1
; i
++) {
5476 if (sortKey1
[i
] != sortKey2
[i
]) {
5477 log_err("Full sort keys are different. Should be equal.");
5481 log_err("Full sort keys sizes doesn't match: %d %d", keySize1
, keySize2
);
5483 /* End of full sort keys */
5485 /* Start of partial sort keys */
5486 /* Partial sort key1 */
5487 uiter_setString(&uiter1
, data1
, data1Len
);
5488 keySize1
= ucol_nextSortKeyPart(ucol
, &uiter1
, state1
, sortKey1
, SORTKEYLEN
, &status
);
5489 /* Partial sort key2 */
5490 uiter_setString(&uiter2
, data2
, data2Len
);
5491 keySize2
= ucol_nextSortKeyPart(ucol
, &uiter2
, state2
, sortKey2
, SORTKEYLEN
, &status
);
5492 if (U_SUCCESS(status
) && keySize1
== keySize2
) {
5493 for (j
= 0; j
< keySize1
; j
++) {
5494 if (sortKey1
[j
] != sortKey2
[j
]) {
5495 log_err("Partial sort keys are different. Should be equal");
5499 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status
), keySize1
, keySize2
);
5501 /* End of partial sort keys */
5503 /* Start of strcoll */
5504 /* Use ucol_strcoll() to determine ordering */
5505 strcollresult
= ucol_strcoll(ucol
, data1
, data1Len
, data2
, data2Len
);
5506 if (strcollresult
!= UCOL_EQUAL
) {
5507 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5513 /* Convenient struct for running collation tests */
5515 const UChar source
[MAX_TOKEN_LEN
]; /* String on left */
5516 const UChar target
[MAX_TOKEN_LEN
]; /* String on right */
5517 UCollationResult result
; /* -1, 0 or +1, depending on collation */
5521 * Utility function to test one collation test case.
5522 * @param testcases Array of test cases.
5523 * @param n_testcases Size of the array testcases.
5524 * @param str_rules Array of rules. These rules should be specifying the same rule in different formats.
5525 * @param n_rules Size of the array str_rules.
5527 static void doTestOneTestCase(const OneTestCase testcases
[],
5529 const char* str_rules
[],
5532 int rule_no
, testcase_no
;
5535 UErrorCode status
= U_ZERO_ERROR
;
5536 UParseError parse_error
;
5537 UCollator
*myCollation
;
5539 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
5541 length
= u_unescape(str_rules
[rule_no
], rule
, 500);
5543 log_err("ERROR: The rule cannot be unescaped: %s\n");
5546 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
5547 if(U_FAILURE(status
)){
5548 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5551 log_verbose("Testing the <<* syntax\n");
5552 ucol_setAttribute(myCollation
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
5553 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
5554 for (testcase_no
= 0; testcase_no
< n_testcases
; ++testcase_no
) {
5556 testcases
[testcase_no
].source
,
5557 testcases
[testcase_no
].target
,
5558 testcases
[testcase_no
].result
5561 ucol_close(myCollation
);
5565 const static OneTestCase rangeTestcases
[] = {
5566 { {0x0061}, {0x0062}, UCOL_LESS
}, /* "a" < "b" */
5567 { {0x0062}, {0x0063}, UCOL_LESS
}, /* "b" < "c" */
5568 { {0x0061}, {0x0063}, UCOL_LESS
}, /* "a" < "c" */
5570 { {0x0062}, {0x006b}, UCOL_LESS
}, /* "b" << "k" */
5571 { {0x006b}, {0x006c}, UCOL_LESS
}, /* "k" << "l" */
5572 { {0x0062}, {0x006c}, UCOL_LESS
}, /* "b" << "l" */
5573 { {0x0061}, {0x006c}, UCOL_LESS
}, /* "a" < "l" */
5574 { {0x0061}, {0x006d}, UCOL_LESS
}, /* "a" < "m" */
5576 { {0x0079}, {0x006d}, UCOL_LESS
}, /* "y" < "f" */
5577 { {0x0079}, {0x0067}, UCOL_LESS
}, /* "y" < "g" */
5578 { {0x0061}, {0x0068}, UCOL_LESS
}, /* "y" < "h" */
5579 { {0x0061}, {0x0065}, UCOL_LESS
}, /* "g" < "e" */
5581 { {0x0061}, {0x0031}, UCOL_EQUAL
}, /* "a" = "1" */
5582 { {0x0061}, {0x0032}, UCOL_EQUAL
}, /* "a" = "2" */
5583 { {0x0061}, {0x0033}, UCOL_EQUAL
}, /* "a" = "3" */
5584 { {0x0061}, {0x0066}, UCOL_LESS
}, /* "a" < "f" */
5585 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL_LESS
}, /* "la" < "123" */
5586 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL_EQUAL
}, /* "aaa" = "123" */
5587 { {0x0062}, {0x007a}, UCOL_LESS
}, /* "b" < "z" */
5588 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL_LESS
}, /* "azm" = "2yc" */
5591 static int nRangeTestcases
= LEN(rangeTestcases
);
5593 const static OneTestCase rangeTestcasesSupplemental
[] = {
5594 { {0xfffe}, {0xffff}, UCOL_LESS
}, /* U+FFFE < U+FFFF */
5595 { {0xffff}, {0xd800, 0xdc00}, UCOL_LESS
}, /* U+FFFF < U+10000 */
5596 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+10000 < U+10001 */
5597 { {0xfffe}, {0xd800, 0xdc01}, UCOL_LESS
}, /* U+FFFE < U+10001 */
5598 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
5599 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+10000 < U+10001 */
5600 { {0xfffe}, {0xd800, 0xdc02}, UCOL_LESS
}, /* U+FFFE < U+10001 */
5603 static int nRangeTestcasesSupplemental
= LEN(rangeTestcasesSupplemental
);
5605 const static OneTestCase rangeTestcasesQwerty
[] = {
5606 { {0x0071}, {0x0077}, UCOL_LESS
}, /* "q" < "w" */
5607 { {0x0077}, {0x0065}, UCOL_LESS
}, /* "w" < "e" */
5609 { {0x0079}, {0x0075}, UCOL_LESS
}, /* "y" < "u" */
5610 { {0x0071}, {0x0075}, UCOL_LESS
}, /* "q" << "u" */
5612 { {0x0074}, {0x0069}, UCOL_LESS
}, /* "t" << "i" */
5613 { {0x006f}, {0x0070}, UCOL_LESS
}, /* "o" << "p" */
5615 { {0x0079}, {0x0065}, UCOL_LESS
}, /* "y" < "e" */
5616 { {0x0069}, {0x0075}, UCOL_LESS
}, /* "i" < "u" */
5618 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5619 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL_LESS
}, /* "quest" < "were" */
5620 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5621 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL_LESS
}, /* "quack" < "quest" */
5624 static int nRangeTestcasesQwerty
= LEN(rangeTestcasesQwerty
);
5626 static void TestSameStrengthList(void)
5628 const char* strRules
[] = {
5630 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",
5633 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5635 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, LEN(strRules
));
5638 static void TestSameStrengthListQuoted(void)
5640 const char* strRules
[] = {
5641 /* Lists with quoted characters */
5642 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5643 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5645 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5646 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5648 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\u0033",
5649 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5651 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, LEN(strRules
));
5654 static void TestSameStrengthListSupplemental(void)
5656 const char* strRules
[] = {
5657 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5658 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5659 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5660 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5662 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, LEN(strRules
));
5665 static void TestSameStrengthListQwerty(void)
5667 const char* strRules
[] = {
5668 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5669 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5670 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5671 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5672 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5674 /* Quoted characters also will work if two quoted characters are not consecutive. */
5675 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5677 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5678 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5681 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, LEN(strRules
));
5684 static void TestSameStrengthListQuotedQwerty(void)
5686 const char* strRules
[] = {
5687 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */
5688 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */
5689 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */
5691 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5692 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5694 doTestOneTestCase(rangeTestcasesQwerty
, nRangeTestcasesQwerty
, strRules
, LEN(strRules
));
5697 static void TestSameStrengthListRanges(void)
5699 const char* strRules
[] = {
5700 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5702 doTestOneTestCase(rangeTestcases
, nRangeTestcases
, strRules
, LEN(strRules
));
5705 static void TestSameStrengthListSupplementalRanges(void)
5707 const char* strRules
[] = {
5708 "&\\ufffe<*\\uffff-\\U00010002",
5710 doTestOneTestCase(rangeTestcasesSupplemental
, nRangeTestcasesSupplemental
, strRules
, LEN(strRules
));
5713 static void TestSpecialCharacters(void)
5715 const char* strRules
[] = {
5717 "&';'<'+'<','<'-'<'&'<'*'",
5726 const static OneTestCase specialCharacterStrings
[] = {
5727 { {0x003b}, {0x002b}, UCOL_LESS
}, /* ; < + */
5728 { {0x002b}, {0x002c}, UCOL_LESS
}, /* + < , */
5729 { {0x002c}, {0x002d}, UCOL_LESS
}, /* , < - */
5730 { {0x002d}, {0x0026}, UCOL_LESS
}, /* - < & */
5732 doTestOneTestCase(specialCharacterStrings
, LEN(specialCharacterStrings
), strRules
, LEN(strRules
));
5735 static void TestPrivateUseCharacters(void)
5737 const char* strRules
[] = {
5739 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5740 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5743 const static OneTestCase privateUseCharacterStrings
[] = {
5744 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
5745 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
5746 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
5747 { {0xe2da}, {0xe2db}, UCOL_LESS
},
5748 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
5749 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
5751 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
5754 static void TestPrivateUseCharactersInList(void)
5756 const char* strRules
[] = {
5758 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5759 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5760 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5763 const static OneTestCase privateUseCharacterStrings
[] = {
5764 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
5765 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
5766 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
5767 { {0xe2da}, {0xe2db}, UCOL_LESS
},
5768 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
5769 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
5771 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
5774 static void TestPrivateUseCharactersInRange(void)
5776 const char* strRules
[] = {
5778 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5779 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5780 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5783 const static OneTestCase privateUseCharacterStrings
[] = {
5784 { {0x5ea7}, {0xe2d8}, UCOL_LESS
},
5785 { {0xe2d8}, {0xe2d9}, UCOL_LESS
},
5786 { {0xe2d9}, {0xe2da}, UCOL_LESS
},
5787 { {0xe2da}, {0xe2db}, UCOL_LESS
},
5788 { {0xe2db}, {0xe2dc}, UCOL_LESS
},
5789 { {0xe2dc}, {0x4e8d}, UCOL_LESS
},
5791 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
5794 static void TestInvalidListsAndRanges(void)
5796 const char* invalidRules
[] = {
5797 /* Range not in starred expression */
5798 "&\\ufffe<\\uffff-\\U00010002",
5800 /* Range without start */
5803 /* Range without end */
5806 /* More than one hyphen */
5809 /* Range in the wrong order */
5815 UErrorCode status
= U_ZERO_ERROR
;
5816 UParseError parse_error
;
5817 int n_rules
= LEN(invalidRules
);
5820 UCollator
*myCollation
;
5822 for (rule_no
= 0; rule_no
< n_rules
; ++rule_no
) {
5824 length
= u_unescape(invalidRules
[rule_no
], rule
, 500);
5826 log_err("ERROR: The rule cannot be unescaped: %s\n");
5829 myCollation
= ucol_openRules(rule
, length
, UCOL_ON
, UCOL_TERTIARY
, &parse_error
, &status
);
5830 if(!U_FAILURE(status
)){
5831 log_err("ERROR: Could not cause a failure as expected: \n");
5833 status
= U_ZERO_ERROR
;
5838 * This test ensures that characters placed before a character in a different script have the same lead byte
5839 * in their collation key before and after script reordering.
5841 static void TestBeforeRuleWithScriptReordering(void)
5844 UErrorCode status
= U_ZERO_ERROR
;
5845 UCollator
*myCollation
;
5846 char srules
[500] = "&[before 1]\\u03b1 < \\u0e01";
5848 uint32_t rulesLength
= 0;
5849 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
5850 UCollationResult collResult
;
5852 uint8_t baseKey
[256];
5853 uint32_t baseKeyLength
;
5854 uint8_t beforeKey
[256];
5855 uint32_t beforeKeyLength
;
5857 UChar base
[] = { 0x03b1 }; /* base */
5858 int32_t baseLen
= sizeof(base
)/sizeof(*base
);
5860 UChar before
[] = { 0x0e01 }; /* ko kai */
5861 int32_t beforeLen
= sizeof(before
)/sizeof(*before
);
5863 /*UChar *data[] = { before, base };
5864 genericRulesStarter(srules, data, 2);*/
5866 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5869 /* build collator */
5870 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5872 rulesLength
= u_unescape(srules
, rules
, LEN(rules
));
5873 myCollation
= ucol_openRules(rules
, rulesLength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
5874 if(U_FAILURE(status
)) {
5875 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
5879 /* check collation results - before rule applied but not script reordering */
5880 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
5881 if (collResult
!= UCOL_GREATER
) {
5882 log_err("Collation result not correct before script reordering = %d\n", collResult
);
5885 /* check the lead byte of the collation keys before script reordering */
5886 baseKeyLength
= ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
5887 beforeKeyLength
= ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
5888 if (baseKey
[0] != beforeKey
[0]) {
5889 log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
5892 /* reorder the scripts */
5893 ucol_setReorderCodes(myCollation
, reorderCodes
, 1, &status
);
5894 if(U_FAILURE(status
)) {
5895 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
5899 /* check collation results - before rule applied and after script reordering */
5900 collResult
= ucol_strcoll(myCollation
, base
, baseLen
, before
, beforeLen
);
5901 if (collResult
!= UCOL_GREATER
) {
5902 log_err("Collation result not correct after script reordering = %d\n", collResult
);
5905 /* check the lead byte of the collation keys after script reordering */
5906 ucol_getSortKey(myCollation
, base
, baseLen
, baseKey
, 256);
5907 ucol_getSortKey(myCollation
, before
, beforeLen
, beforeKey
, 256);
5908 if (baseKey
[0] != beforeKey
[0]) {
5909 log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey
[0], beforeKey
[0]);
5912 ucol_close(myCollation
);
5916 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5918 static void TestNonLeadBytesDuringCollationReordering(void)
5920 UErrorCode status
= U_ZERO_ERROR
;
5921 UCollator
*myCollation
;
5922 int32_t reorderCodes
[1] = {USCRIPT_GREEK
};
5924 uint8_t baseKey
[256];
5925 uint32_t baseKeyLength
;
5926 uint8_t reorderKey
[256];
5927 uint32_t reorderKeyLength
;
5929 UChar testString
[] = { 0x03b1, 0x03b2, 0x03b3 };
5934 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5936 /* build collator tertiary */
5937 myCollation
= ucol_open("", &status
);
5938 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
5939 if(U_FAILURE(status
)) {
5940 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5943 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), baseKey
, 256);
5945 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
5946 if(U_FAILURE(status
)) {
5947 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
5950 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), reorderKey
, 256);
5952 if (baseKeyLength
!= reorderKeyLength
) {
5953 log_err("Key lengths not the same during reordering.\n");
5957 for (i
= 1; i
< baseKeyLength
; i
++) {
5958 if (baseKey
[i
] != reorderKey
[i
]) {
5959 log_err("Collation key bytes not the same at position %d.\n", i
);
5963 ucol_close(myCollation
);
5965 /* build collator quaternary */
5966 myCollation
= ucol_open("", &status
);
5967 ucol_setStrength(myCollation
, UCOL_QUATERNARY
);
5968 if(U_FAILURE(status
)) {
5969 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
5972 baseKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), baseKey
, 256);
5974 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
5975 if(U_FAILURE(status
)) {
5976 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
5979 reorderKeyLength
= ucol_getSortKey(myCollation
, testString
, LEN(testString
), reorderKey
, 256);
5981 if (baseKeyLength
!= reorderKeyLength
) {
5982 log_err("Key lengths not the same during reordering.\n");
5986 for (i
= 1; i
< baseKeyLength
; i
++) {
5987 if (baseKey
[i
] != reorderKey
[i
]) {
5988 log_err("Collation key bytes not the same at position %d.\n", i
);
5992 ucol_close(myCollation
);
5996 * Test reordering API.
5998 static void TestReorderingAPI(void)
6000 UErrorCode status
= U_ZERO_ERROR
;
6001 UCollator
*myCollation
;
6002 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
6003 int32_t duplicateReorderCodes
[] = {USCRIPT_CUNEIFORM
, USCRIPT_GREEK
, UCOL_REORDER_CODE_CURRENCY
, USCRIPT_EGYPTIAN_HIEROGLYPHS
};
6004 int32_t reorderCodesStartingWithDefault
[] = {UCOL_REORDER_CODE_DEFAULT
, USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
6005 UCollationResult collResult
;
6006 int32_t retrievedReorderCodesLength
;
6007 int32_t retrievedReorderCodes
[10];
6008 UChar greekString
[] = { 0x03b1 };
6009 UChar punctuationString
[] = { 0x203e };
6012 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6014 /* build collator tertiary */
6015 myCollation
= ucol_open("", &status
);
6016 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
6017 if(U_FAILURE(status
)) {
6018 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6022 /* set the reorderding */
6023 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
6024 if (U_FAILURE(status
)) {
6025 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
6029 /* get the reordering */
6030 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6031 if (status
!= U_BUFFER_OVERFLOW_ERROR
) {
6032 log_err_status(status
, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status
));
6035 status
= U_ZERO_ERROR
;
6036 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6037 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6040 /* now let's really get it */
6041 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, LEN(retrievedReorderCodes
), &status
);
6042 if (U_FAILURE(status
)) {
6043 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
6046 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6047 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6050 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
6051 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
6052 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
6056 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6057 if (collResult
!= UCOL_LESS
) {
6058 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
6062 /* clear the reordering */
6063 ucol_setReorderCodes(myCollation
, NULL
, 0, &status
);
6064 if (U_FAILURE(status
)) {
6065 log_err_status(status
, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status
));
6069 /* get the reordering again */
6070 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6071 if (retrievedReorderCodesLength
!= 0) {
6072 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, 0);
6076 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6077 if (collResult
!= UCOL_GREATER
) {
6078 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
6082 /* test for error condition on duplicate reorder codes */
6083 ucol_setReorderCodes(myCollation
, duplicateReorderCodes
, LEN(duplicateReorderCodes
), &status
);
6084 if (!U_FAILURE(status
)) {
6085 log_err_status(status
, "ERROR: setting duplicate reorder codes did not generate a failure\n");
6089 status
= U_ZERO_ERROR
;
6090 /* test for reorder codes after a reset code */
6091 ucol_setReorderCodes(myCollation
, reorderCodesStartingWithDefault
, LEN(reorderCodesStartingWithDefault
), &status
);
6092 if (!U_FAILURE(status
)) {
6093 log_err_status(status
, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
6097 ucol_close(myCollation
);
6101 * Test reordering API.
6103 static void TestReorderingAPIWithRuleCreatedCollator(void)
6105 UErrorCode status
= U_ZERO_ERROR
;
6106 UCollator
*myCollation
;
6108 int32_t rulesReorderCodes
[2] = {USCRIPT_HAN
, USCRIPT_GREEK
};
6109 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
6110 UCollationResult collResult
;
6111 int32_t retrievedReorderCodesLength
;
6112 int32_t retrievedReorderCodes
[10];
6113 UChar greekString
[] = { 0x03b1 };
6114 UChar punctuationString
[] = { 0x203e };
6115 UChar hanString
[] = { 0x65E5, 0x672C };
6118 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6120 /* build collator from rules */
6121 u_uastrcpy(rules
, "[reorder Hani Grek]");
6122 myCollation
= ucol_openRules(rules
, u_strlen(rules
), UCOL_DEFAULT
, UCOL_TERTIARY
, NULL
, &status
);
6123 if(U_FAILURE(status
)) {
6124 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6128 /* get the reordering */
6129 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, LEN(retrievedReorderCodes
), &status
);
6130 if (U_FAILURE(status
)) {
6131 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
6134 if (retrievedReorderCodesLength
!= LEN(rulesReorderCodes
)) {
6135 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(rulesReorderCodes
));
6138 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
6139 if (retrievedReorderCodes
[loopIndex
] != rulesReorderCodes
[loopIndex
]) {
6140 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
6144 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), hanString
, LEN(hanString
));
6145 if (collResult
!= UCOL_GREATER
) {
6146 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
6151 /* set the reorderding */
6152 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
6153 if (U_FAILURE(status
)) {
6154 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
6158 /* get the reordering */
6159 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6160 if (status
!= U_BUFFER_OVERFLOW_ERROR
) {
6161 log_err_status(status
, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status
));
6164 status
= U_ZERO_ERROR
;
6165 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6166 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6169 /* now let's really get it */
6170 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, retrievedReorderCodes
, LEN(retrievedReorderCodes
), &status
);
6171 if (U_FAILURE(status
)) {
6172 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
6175 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6176 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6179 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
6180 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
6181 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
6185 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6186 if (collResult
!= UCOL_LESS
) {
6187 log_err_status(status
, "ERROR: collation result should have been UCOL_LESS\n");
6191 /* clear the reordering */
6192 ucol_setReorderCodes(myCollation
, NULL
, 0, &status
);
6193 if (U_FAILURE(status
)) {
6194 log_err_status(status
, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status
));
6198 /* get the reordering again */
6199 retrievedReorderCodesLength
= ucol_getReorderCodes(myCollation
, NULL
, 0, &status
);
6200 if (retrievedReorderCodesLength
!= 0) {
6201 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, 0);
6205 collResult
= ucol_strcoll(myCollation
, greekString
, LEN(greekString
), punctuationString
, LEN(punctuationString
));
6206 if (collResult
!= UCOL_GREATER
) {
6207 log_err_status(status
, "ERROR: collation result should have been UCOL_GREATER\n");
6211 ucol_close(myCollation
);
6214 static int compareUScriptCodes(const void * a
, const void * b
)
6216 return ( *(int32_t*)a
- *(int32_t*)b
);
6219 static void TestEquivalentReorderingScripts(void) {
6220 UErrorCode status
= U_ZERO_ERROR
;
6221 int32_t equivalentScripts
[50];
6222 int32_t equivalentScriptsLength
;
6224 int32_t equivalentScriptsResult
[] = {
6238 USCRIPT_OLD_SOUTH_ARABIAN
,
6240 USCRIPT_IMPERIAL_ARAMAIC
,
6241 USCRIPT_INSCRIPTIONAL_PARTHIAN
,
6242 USCRIPT_INSCRIPTIONAL_PAHLAVI
,
6244 USCRIPT_OLD_PERSIAN
,
6246 USCRIPT_EGYPTIAN_HIEROGLYPHS
,
6247 USCRIPT_PHONETIC_POLLARD
,
6248 USCRIPT_SORA_SOMPENG
,
6249 USCRIPT_MEROITIC_CURSIVE
,
6250 USCRIPT_MEROITIC_HIEROGLYPHS
6253 qsort(equivalentScriptsResult
, LEN(equivalentScriptsResult
), sizeof(int32_t), compareUScriptCodes
);
6255 /* UScript.GOTHIC */
6256 equivalentScriptsLength
= ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC
, equivalentScripts
, LEN(equivalentScripts
), &status
);
6257 if (U_FAILURE(status
)) {
6258 log_err_status(status
, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
6262 fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
6263 fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
6264 for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6265 fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6268 if (equivalentScriptsLength
!= LEN(equivalentScriptsResult
)) {
6269 log_err_status(status
, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult
), equivalentScriptsLength
);
6272 for (loopIndex
= 0; loopIndex
< equivalentScriptsLength
; loopIndex
++) {
6273 if (equivalentScriptsResult
[loopIndex
] != equivalentScripts
[loopIndex
]) {
6274 log_err_status(status
, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult
[loopIndex
], equivalentScripts
[loopIndex
]);
6279 /* UScript.SHAVIAN */
6280 equivalentScriptsLength
= ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN
, equivalentScripts
, LEN(equivalentScripts
), &status
);
6281 if (U_FAILURE(status
)) {
6282 log_err_status(status
, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status
));
6285 if (equivalentScriptsLength
!= LEN(equivalentScriptsResult
)) {
6286 log_err_status(status
, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult
), equivalentScriptsLength
);
6289 for (loopIndex
= 0; loopIndex
< equivalentScriptsLength
; loopIndex
++) {
6290 if (equivalentScriptsResult
[loopIndex
] != equivalentScripts
[loopIndex
]) {
6291 log_err_status(status
, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult
[loopIndex
], equivalentScripts
[loopIndex
]);
6297 static void TestReorderingAcrossCloning(void)
6299 UErrorCode status
= U_ZERO_ERROR
;
6300 UCollator
*myCollation
;
6301 int32_t reorderCodes
[3] = {USCRIPT_GREEK
, USCRIPT_HAN
, UCOL_REORDER_CODE_PUNCTUATION
};
6302 UCollator
*clonedCollation
;
6304 int32_t retrievedReorderCodesLength
;
6305 int32_t retrievedReorderCodes
[10];
6308 log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6310 /* build collator tertiary */
6311 myCollation
= ucol_open("", &status
);
6312 ucol_setStrength(myCollation
, UCOL_TERTIARY
);
6313 if(U_FAILURE(status
)) {
6314 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6318 /* set the reorderding */
6319 ucol_setReorderCodes(myCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
6320 if (U_FAILURE(status
)) {
6321 log_err_status(status
, "ERROR: setting reorder codes: %s\n", myErrorName(status
));
6325 /* clone the collator */
6326 clonedCollation
= ucol_safeClone(myCollation
, NULL
, &bufferSize
, &status
);
6327 if (U_FAILURE(status
)) {
6328 log_err_status(status
, "ERROR: cloning collator: %s\n", myErrorName(status
));
6332 /* get the reordering */
6333 retrievedReorderCodesLength
= ucol_getReorderCodes(clonedCollation
, retrievedReorderCodes
, LEN(retrievedReorderCodes
), &status
);
6334 if (U_FAILURE(status
)) {
6335 log_err_status(status
, "ERROR: getting reorder codes: %s\n", myErrorName(status
));
6338 if (retrievedReorderCodesLength
!= LEN(reorderCodes
)) {
6339 log_err_status(status
, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength
, LEN(reorderCodes
));
6342 for (loopIndex
= 0; loopIndex
< retrievedReorderCodesLength
; loopIndex
++) {
6343 if (retrievedReorderCodes
[loopIndex
] != reorderCodes
[loopIndex
]) {
6344 log_err_status(status
, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex
);
6349 /*uprv_free(buffer);*/
6350 ucol_close(myCollation
);
6351 ucol_close(clonedCollation
);
6355 * Utility function to test one collation reordering test case set.
6356 * @param testcases Array of test cases.
6357 * @param n_testcases Size of the array testcases.
6358 * @param reorderTokens Array of reordering codes.
6359 * @param reorderTokensLen Size of the array reorderTokens.
6361 static void doTestOneReorderingAPITestCase(const OneTestCase testCases
[], uint32_t testCasesLen
, const int32_t reorderTokens
[], int32_t reorderTokensLen
)
6363 uint32_t testCaseNum
;
6364 UErrorCode status
= U_ZERO_ERROR
;
6365 UCollator
*myCollation
;
6367 myCollation
= ucol_open("", &status
);
6368 if (U_FAILURE(status
)) {
6369 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6372 ucol_setReorderCodes(myCollation
, reorderTokens
, reorderTokensLen
, &status
);
6373 if(U_FAILURE(status
)) {
6374 log_err_status(status
, "ERROR: while setting script order: %s\n", myErrorName(status
));
6378 for (testCaseNum
= 0; testCaseNum
< testCasesLen
; ++testCaseNum
) {
6380 testCases
[testCaseNum
].source
,
6381 testCases
[testCaseNum
].target
,
6382 testCases
[testCaseNum
].result
6385 ucol_close(myCollation
);
6388 static void TestGreekFirstReorder(void)
6390 const char* strRules
[] = {
6394 const int32_t apiRules
[] = {
6398 const static OneTestCase privateUseCharacterStrings
[] = {
6399 { {0x0391}, {0x0391}, UCOL_EQUAL
},
6400 { {0x0041}, {0x0391}, UCOL_GREATER
},
6401 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER
},
6402 { {0x0060}, {0x0391}, UCOL_LESS
},
6403 { {0x0391}, {0xe2dc}, UCOL_LESS
},
6404 { {0x0391}, {0x0060}, UCOL_GREATER
},
6407 /* Test rules creation */
6408 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6410 /* Test collation reordering API */
6411 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6414 static void TestGreekLastReorder(void)
6416 const char* strRules
[] = {
6417 "[reorder Zzzz Grek]"
6420 const int32_t apiRules
[] = {
6421 USCRIPT_UNKNOWN
, USCRIPT_GREEK
6424 const static OneTestCase privateUseCharacterStrings
[] = {
6425 { {0x0391}, {0x0391}, UCOL_EQUAL
},
6426 { {0x0041}, {0x0391}, UCOL_LESS
},
6427 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS
},
6428 { {0x0060}, {0x0391}, UCOL_LESS
},
6429 { {0x0391}, {0xe2dc}, UCOL_GREATER
},
6432 /* Test rules creation */
6433 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6435 /* Test collation reordering API */
6436 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6439 static void TestNonScriptReorder(void)
6441 const char* strRules
[] = {
6442 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6445 const int32_t apiRules
[] = {
6446 USCRIPT_GREEK
, UCOL_REORDER_CODE_SYMBOL
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
,
6447 UCOL_REORDER_CODE_PUNCTUATION
, UCOL_REORDER_CODE_SPACE
, USCRIPT_UNKNOWN
,
6448 UCOL_REORDER_CODE_CURRENCY
6451 const static OneTestCase privateUseCharacterStrings
[] = {
6452 { {0x0391}, {0x0041}, UCOL_LESS
},
6453 { {0x0041}, {0x0391}, UCOL_GREATER
},
6454 { {0x0060}, {0x0041}, UCOL_LESS
},
6455 { {0x0060}, {0x0391}, UCOL_GREATER
},
6456 { {0x0024}, {0x0041}, UCOL_GREATER
},
6459 /* Test rules creation */
6460 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6462 /* Test collation reordering API */
6463 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6466 static void TestHaniReorder(void)
6468 const char* strRules
[] = {
6471 const int32_t apiRules
[] = {
6475 const static OneTestCase privateUseCharacterStrings
[] = {
6476 { {0x4e00}, {0x0041}, UCOL_LESS
},
6477 { {0x4e00}, {0x0060}, UCOL_GREATER
},
6478 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS
},
6479 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER
},
6480 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS
},
6481 { {0xfa27}, {0x0041}, UCOL_LESS
},
6482 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS
},
6485 /* Test rules creation */
6486 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6488 /* Test collation reordering API */
6489 doTestOneReorderingAPITestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), apiRules
, LEN(apiRules
));
6492 static void TestHaniReorderWithOtherRules(void)
6494 const char* strRules
[] = {
6495 "[reorder Hani] &b<a"
6497 const int32_t apiRules
[] = {
6501 const static OneTestCase privateUseCharacterStrings
[] = {
6502 { {0x4e00}, {0x0041}, UCOL_LESS
},
6503 { {0x4e00}, {0x0060}, UCOL_GREATER
},
6504 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS
},
6505 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER
},
6506 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS
},
6507 { {0xfa27}, {0x0041}, UCOL_LESS
},
6508 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS
},
6509 { {0x0062}, {0x0061}, UCOL_LESS
},
6512 /* Test rules creation */
6513 doTestOneTestCase(privateUseCharacterStrings
, LEN(privateUseCharacterStrings
), strRules
, LEN(strRules
));
6516 static void TestMultipleReorder(void)
6518 const char* strRules
[] = {
6519 "[reorder Grek Zzzz DIGIT Latn Hani]"
6522 const int32_t apiRules
[] = {
6523 USCRIPT_GREEK
, USCRIPT_UNKNOWN
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_LATIN
, USCRIPT_HAN
6526 const static OneTestCase collationTestCases
[] = {
6527 { {0x0391}, {0x0041}, UCOL_LESS
},
6528 { {0x0031}, {0x0041}, UCOL_LESS
},
6529 { {0x0041}, {0x4e00}, UCOL_LESS
},
6532 /* Test rules creation */
6533 doTestOneTestCase(collationTestCases
, LEN(collationTestCases
), strRules
, LEN(strRules
));
6535 /* Test collation reordering API */
6536 doTestOneReorderingAPITestCase(collationTestCases
, LEN(collationTestCases
), apiRules
, LEN(apiRules
));
6540 * Test that covers issue reported in ticket 8814
6542 static void TestReorderWithNumericCollation()
6544 UErrorCode status
= U_ZERO_ERROR
;
6545 UCollator
*myCollation
;
6546 UCollator
*myReorderCollation
;
6547 int32_t reorderCodes
[] = {UCOL_REORDER_CODE_SPACE
, UCOL_REORDER_CODE_PUNCTUATION
, UCOL_REORDER_CODE_SYMBOL
, UCOL_REORDER_CODE_DIGIT
, USCRIPT_GREEK
,USCRIPT_LATIN
, USCRIPT_HEBREW
, UCOL_REORDER_CODE_OTHERS
};
6548 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
6549 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
6550 UChar fortyS
[] = { 0x0053 };
6551 UChar fortyThreeP
[] = { 0x0050 };
6552 uint8_t fortyS_sortKey
[128];
6553 int32_t fortyS_sortKey_Length
;
6554 uint8_t fortyThreeP_sortKey
[128];
6555 int32_t fortyThreeP_sortKey_Length
;
6556 uint8_t fortyS_sortKey_reorder
[128];
6557 int32_t fortyS_sortKey_reorder_Length
;
6558 uint8_t fortyThreeP_sortKey_reorder
[128];
6559 int32_t fortyThreeP_sortKey_reorder_Length
;
6560 UCollationResult collResult
;
6561 UCollationResult collResultReorder
;
6564 log_verbose("Testing reordering with and without numeric collation\n");
6566 /* build collator tertiary with numeric */
6567 myCollation
= ucol_open("", &status
);
6569 ucol_setStrength(myCollation, UCOL_TERTIARY);
6571 ucol_setAttribute(myCollation
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
6572 if(U_FAILURE(status
)) {
6573 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6577 /* build collator tertiary with numeric and reordering */
6578 myReorderCollation
= ucol_open("", &status
);
6580 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
6582 ucol_setAttribute(myReorderCollation
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
6583 ucol_setReorderCodes(myReorderCollation
, reorderCodes
, LEN(reorderCodes
), &status
);
6584 if(U_FAILURE(status
)) {
6585 log_err_status(status
, "ERROR: in creation of collator: %s\n", myErrorName(status
));
6589 fortyS_sortKey_Length
= ucol_getSortKey(myCollation
, fortyS
, LEN(fortyS
), fortyS_sortKey
, 128);
6590 fortyThreeP_sortKey_Length
= ucol_getSortKey(myCollation
, fortyThreeP
, LEN(fortyThreeP
), fortyThreeP_sortKey
, 128);
6591 fortyS_sortKey_reorder_Length
= ucol_getSortKey(myReorderCollation
, fortyS
, LEN(fortyS
), fortyS_sortKey_reorder
, 128);
6592 fortyThreeP_sortKey_reorder_Length
= ucol_getSortKey(myReorderCollation
, fortyThreeP
, LEN(fortyThreeP
), fortyThreeP_sortKey_reorder
, 128);
6594 if (fortyS_sortKey_Length
< 0 || fortyThreeP_sortKey_Length
< 0 || fortyS_sortKey_reorder_Length
< 0 || fortyThreeP_sortKey_reorder_Length
< 0) {
6595 log_err_status(status
, "ERROR: couldn't generate sort keys\n");
6598 collResult
= ucol_strcoll(myCollation
, fortyS
, LEN(fortyS
), fortyThreeP
, LEN(fortyThreeP
));
6599 collResultReorder
= ucol_strcoll(myReorderCollation
, fortyS
, LEN(fortyS
), fortyThreeP
, LEN(fortyThreeP
));
6601 fprintf(stderr, "\tcollResult = %x\n", collResult);
6602 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
6603 fprintf(stderr, "\nfortyS\n");
6604 for (i = 0; i < fortyS_sortKey_Length; i++) {
6605 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
6607 fprintf(stderr, "\nfortyThreeP\n");
6608 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
6609 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
6612 if (collResult
!= collResultReorder
) {
6613 log_err_status(status
, "ERROR: collation results should have been the same.\n");
6617 ucol_close(myCollation
);
6618 ucol_close(myReorderCollation
);
6621 static int compare_uint8_t_arrays(const uint8_t* a
, const uint8_t* b
)
6623 for (; *a
== *b
; ++a
, ++b
) {
6628 return (*a
< *b
? -1 : 1);
6631 static void TestImportRulesDeWithPhonebook(void)
6633 const char* normalRules
[] = {
6634 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
6635 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
6636 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
6638 const OneTestCase normalTests
[] = {
6639 { {0x00e6}, {0x00c6}, UCOL_LESS
},
6640 { {0x00fc}, {0x00dc}, UCOL_GREATER
},
6643 const char* importRules
[] = {
6644 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
6645 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6646 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6648 const OneTestCase importTests
[] = {
6649 { {0x00e6}, {0x00c6}, UCOL_LESS
},
6650 { {0x00fc}, {0x00dc}, UCOL_LESS
},
6653 doTestOneTestCase(normalTests
, LEN(normalTests
), normalRules
, LEN(normalRules
));
6654 doTestOneTestCase(importTests
, LEN(importTests
), importRules
, LEN(importRules
));
6657 static void TestImportRulesFiWithEor(void)
6660 const char* defaultRules
[] = {
6661 "&a<b", /* Dummy rule. */
6664 const OneTestCase defaultTests
[] = {
6665 { {0x0110}, {0x00F0}, UCOL_LESS
},
6666 { {0x00a3}, {0x00a5}, UCOL_LESS
},
6667 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS
},
6670 /* European Ordering rules: ignore currency characters. */
6671 const char* eorRules
[] = {
6672 "[import root-u-co-eor]",
6675 const OneTestCase eorTests
[] = {
6676 { {0x0110}, {0x00F0}, UCOL_LESS
},
6677 { {0x00a3}, {0x00a5}, UCOL_EQUAL
},
6678 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL
},
6681 const char* fiStdRules
[] = {
6682 "[import fi-u-co-standard]",
6685 const OneTestCase fiStdTests
[] = {
6686 { {0x0110}, {0x00F0}, UCOL_GREATER
},
6687 { {0x00a3}, {0x00a5}, UCOL_LESS
},
6688 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS
},
6691 /* Both European Ordering Rules and Fi Standard Rules. */
6692 const char* eorFiStdRules
[] = {
6693 "[import root-u-co-eor][import fi-u-co-standard]",
6696 /* This is essentially same as the one before once fi.txt is updated with import. */
6697 const char* fiEorRules
[] = {
6698 "[import fi-u-co-eor]",
6701 const OneTestCase fiEorTests
[] = {
6702 { {0x0110}, {0x00F0}, UCOL_GREATER
},
6703 { {0x00a3}, {0x00a5}, UCOL_EQUAL
},
6704 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL
},
6707 doTestOneTestCase(defaultTests
, LEN(defaultTests
), defaultRules
, LEN(defaultRules
));
6708 doTestOneTestCase(eorTests
, LEN(eorTests
), eorRules
, LEN(eorRules
));
6709 doTestOneTestCase(fiStdTests
, LEN(fiStdTests
), fiStdRules
, LEN(fiStdRules
));
6710 doTestOneTestCase(fiEorTests
, LEN(fiEorTests
), eorFiStdRules
, LEN(eorFiStdRules
));
6712 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
6715 "[import root-u-co-eor][import fi-u-co-standard]"
6720 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
6726 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
6727 * the resource files are built with -includeUnihanColl option.
6728 * TODO: Uncomment this function and make it work when unihan rules are built by default.
6730 static void TestImportRulesCJKWithUnihan(void)
6733 const char* defaultRules
[] = {
6734 "&a<b", /* Dummy rule. */
6737 const OneTestCase defaultTests
[] = {
6738 { {0x3402}, {0x4e1e}, UCOL_GREATER
},
6741 /* European Ordering rules: ignore currency characters. */
6742 const char* unihanRules
[] = {
6743 "[import ko-u-co-unihan]",
6746 const OneTestCase unihanTests
[] = {
6747 { {0x3402}, {0x4e1e}, UCOL_LESS
},
6750 doTestOneTestCase(defaultTests
, LEN(defaultTests
), defaultRules
, LEN(defaultRules
));
6751 doTestOneTestCase(unihanTests
, LEN(unihanTests
), unihanRules
, LEN(unihanRules
));
6756 static void TestImport(void)
6760 UCollator
* viescoll
;
6761 UCollator
* importviescoll
;
6763 UErrorCode status
= U_ZERO_ERROR
;
6765 int32_t viruleslength
;
6767 int32_t esruleslength
;
6769 int32_t viesruleslength
;
6770 char srules
[500] = "[import vi][import es]";
6772 uint32_t length
= 0;
6785 USet
* importTailoredSet
;
6788 vicoll
= ucol_open("vi", &status
);
6789 if(U_FAILURE(status
)){
6790 log_err_status(status
, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status
));
6794 virules
= (UChar
*) ucol_getRules(vicoll
, &viruleslength
);
6795 escoll
= ucol_open("es", &status
);
6796 esrules
= (UChar
*) ucol_getRules(escoll
, &esruleslength
);
6797 viesrules
= (UChar
*)uprv_malloc((viruleslength
+esruleslength
+1)*sizeof(UChar
*));
6799 u_strcat(viesrules
, virules
);
6800 u_strcat(viesrules
, esrules
);
6801 viesruleslength
= viruleslength
+ esruleslength
;
6802 viescoll
= ucol_openRules(viesrules
, viesruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6804 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6805 length
= u_unescape(srules
, rules
, 500);
6806 importviescoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6807 if(U_FAILURE(status
)){
6808 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6812 tailoredSet
= ucol_getTailoredSet(viescoll
, &status
);
6813 importTailoredSet
= ucol_getTailoredSet(importviescoll
, &status
);
6815 if(!uset_equals(tailoredSet
, importTailoredSet
)){
6816 log_err("Tailored sets not equal");
6819 uset_close(importTailoredSet
);
6821 itemCount
= uset_getItemCount(tailoredSet
);
6823 for( i
= 0; i
< itemCount
; i
++){
6824 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
6826 for (; start
<= end
; start
++){
6828 U16_APPEND(str
, k
, 500, start
, b
);
6829 ucol_getSortKey(viescoll
, str
, 1, sk1
, 500);
6830 ucol_getSortKey(importviescoll
, str
, 1, sk2
, 500);
6831 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6832 log_err("Sort key for %s not equal\n", str
);
6837 ucol_getSortKey(viescoll
, str
, strLength
, sk1
, 500);
6838 ucol_getSortKey(importviescoll
, str
, strLength
, sk2
, 500);
6839 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6840 log_err("ZZSort key for %s not equal\n", str
);
6847 uset_close(tailoredSet
);
6849 uprv_free(viesrules
);
6853 ucol_close(viescoll
);
6854 ucol_close(importviescoll
);
6857 static void TestImportWithType(void)
6861 UCollator
* videcoll
;
6862 UCollator
* importvidecoll
;
6864 UErrorCode status
= U_ZERO_ERROR
;
6865 const UChar
* virules
;
6866 int32_t viruleslength
;
6867 const UChar
* derules
;
6868 int32_t deruleslength
;
6870 int32_t videruleslength
;
6871 const char srules
[500] = "[import vi][import de-u-co-phonebk]";
6873 uint32_t length
= 0;
6885 USet
* importTailoredSet
;
6887 vicoll
= ucol_open("vi", &status
);
6888 if(U_FAILURE(status
)){
6889 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6892 virules
= ucol_getRules(vicoll
, &viruleslength
);
6893 /* decoll = ucol_open("de@collation=phonebook", &status); */
6894 decoll
= ucol_open("de-u-co-phonebk", &status
);
6895 if(U_FAILURE(status
)){
6896 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6901 derules
= ucol_getRules(decoll
, &deruleslength
);
6902 viderules
= (UChar
*)uprv_malloc((viruleslength
+deruleslength
+1)*sizeof(UChar
*));
6904 u_strcat(viderules
, virules
);
6905 u_strcat(viderules
, derules
);
6906 videruleslength
= viruleslength
+ deruleslength
;
6907 videcoll
= ucol_openRules(viderules
, videruleslength
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6909 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6910 length
= u_unescape(srules
, rules
, 500);
6911 importvidecoll
= ucol_openRules(rules
, length
, UCOL_ON
, UCOL_TERTIARY
, &error
, &status
);
6912 if(U_FAILURE(status
)){
6913 log_err_status(status
, "ERROR: in creation of rule based collator: %s\n", myErrorName(status
));
6917 tailoredSet
= ucol_getTailoredSet(videcoll
, &status
);
6918 importTailoredSet
= ucol_getTailoredSet(importvidecoll
, &status
);
6920 if(!uset_equals(tailoredSet
, importTailoredSet
)){
6921 log_err("Tailored sets not equal");
6924 uset_close(importTailoredSet
);
6926 itemCount
= uset_getItemCount(tailoredSet
);
6928 for( i
= 0; i
< itemCount
; i
++){
6929 strLength
= uset_getItem(tailoredSet
, i
, &start
, &end
, str
, 500, &status
);
6931 for (; start
<= end
; start
++){
6933 U16_APPEND_UNSAFE(str
, k
, start
);
6934 ucol_getSortKey(videcoll
, str
, 1, sk1
, 500);
6935 ucol_getSortKey(importvidecoll
, str
, 1, sk2
, 500);
6936 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6937 log_err("Sort key for %s not equal\n", str
);
6942 ucol_getSortKey(videcoll
, str
, strLength
, sk1
, 500);
6943 ucol_getSortKey(importvidecoll
, str
, strLength
, sk2
, 500);
6944 if(compare_uint8_t_arrays(sk1
, sk2
) != 0){
6945 log_err("Sort key for %s not equal\n", str
);
6952 uset_close(tailoredSet
);
6954 uprv_free(viderules
);
6956 ucol_close(videcoll
);
6957 ucol_close(importvidecoll
);
6962 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
6963 static const UChar longUpperStr1
[]= { /* 155 chars */
6964 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
6965 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
6966 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
6967 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
6968 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
6969 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
6970 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
6971 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
6972 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
6973 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
6976 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
6977 static const UChar longUpperStr2
[]= { /* 125 chars, > 128 collation elements */
6978 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6979 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6980 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6981 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6982 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
6985 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
6986 static const UChar longUpperStr3
[]= { /* 324 chars */
6987 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6988 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6989 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6990 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6991 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6992 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6993 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6994 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6995 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6996 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6997 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6998 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
7001 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
7004 const UChar
* longUpperStrPtr
;
7005 int32_t longUpperStrLen
;
7008 /* String pointers must be in reverse collation order of the corresponding strings */
7009 static const LongUpperStrItem longUpperStrItems
[] = {
7010 { longUpperStr1
, MY_ARRAY_LEN(longUpperStr1
) },
7011 { longUpperStr2
, MY_ARRAY_LEN(longUpperStr2
) },
7012 { longUpperStr3
, MY_ARRAY_LEN(longUpperStr3
) },
7016 enum { kCollKeyLenMax
= 800 }; /* longest expected is 749, but may change with collation changes */
7018 /* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
7019 static void TestCaseLevelBufferOverflow(void)
7021 UErrorCode status
= U_ZERO_ERROR
;
7022 UCollator
* ucol
= ucol_open("root", &status
);
7023 if ( U_SUCCESS(status
) ) {
7024 ucol_setAttribute(ucol
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
7025 if ( U_SUCCESS(status
) ) {
7026 const LongUpperStrItem
* itemPtr
;
7027 uint8_t sortKeyA
[kCollKeyLenMax
], sortKeyB
[kCollKeyLenMax
];
7028 for ( itemPtr
= longUpperStrItems
; itemPtr
->longUpperStrPtr
!= NULL
; itemPtr
++ ) {
7030 if (itemPtr
> longUpperStrItems
) {
7031 uprv_strcpy((char *)sortKeyB
, (char *)sortKeyA
);
7033 sortKeyLen
= ucol_getSortKey(ucol
, itemPtr
->longUpperStrPtr
, itemPtr
->longUpperStrLen
, sortKeyA
, kCollKeyLenMax
);
7034 if (sortKeyLen
<= 0 || sortKeyLen
> kCollKeyLenMax
) {
7035 log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen
);
7038 if ( itemPtr
> longUpperStrItems
) {
7039 int compareResult
= uprv_strcmp((char *)sortKeyA
, (char *)sortKeyB
);
7040 if (compareResult
>= 0) {
7041 log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult
);
7046 log_err_status(status
, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status
));
7050 log_err_status(status
, "ERROR in ucol_open for root: %s\n", myErrorName(status
));
7055 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
7057 void addMiscCollTest(TestNode
** root
)
7059 TEST(TestRuleOptions
);
7060 TEST(TestBeforePrefixFailure
);
7061 TEST(TestContractionClosure
);
7062 TEST(TestPrefixCompose
);
7063 TEST(TestStrCollIdenticalPrefix
);
7065 TEST(TestNewJapanese
);
7066 /*TEST(TestLimitations);*/
7068 TEST(TestExtremeCompression
);
7069 TEST(TestSurrogates
);
7070 TEST(TestVariableTopSetting
);
7071 TEST(TestBocsuCoverage
);
7072 TEST(TestCyrillicTailoring
);
7074 TEST(IncompleteCntTest
);
7075 TEST(BlackBirdTest
);
7077 TEST(BillFairmanTest
);
7078 TEST(RamsRulesTest
);
7079 TEST(IsTailoredTest
);
7080 TEST(TestCollations
);
7082 TEST(TestImplicitTailoring
);
7083 TEST(TestFCDProblem
);
7084 TEST(TestEmptyRule
);
7085 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
7087 /*TEST(TestJ831);*/ /* we changed lv locale */
7089 TEST(TestRedundantRules
);
7090 TEST(TestExpansionSyntax
);
7091 TEST(TestHangulTailoring
);
7093 TEST(TestIncrementalNormalize
);
7094 TEST(TestComposeDecompose
);
7095 TEST(TestCompressOverlap
);
7096 TEST(TestContraction
);
7097 TEST(TestExpansion
);
7098 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
7099 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
7101 TEST(TestSuppressContractions
);
7103 TEST(TestHebrewUCA
);
7104 TEST(TestPartialSortKeyTermination
);
7109 TEST(TestNumericCollation
);
7110 TEST(TestTibetanConformance
);
7111 TEST(TestPinyinProblem
);
7112 TEST(TestImplicitGeneration
);
7113 TEST(TestSeparateTrees
);
7114 TEST(TestBeforePinyin
);
7115 TEST(TestBeforeTightening
);
7116 /*TEST(TestMoreBefore);*/
7117 TEST(TestTailorNULL
);
7118 TEST(TestUpperFirstQuaternary
);
7124 TEST(TestSortKeyConsistency
);
7125 TEST(TestVI5913
); /* VI, RO tailored rules */
7126 TEST(TestCroatianSortKey
);
7127 TEST(TestTailor6179
);
7128 TEST(TestUCAPrecontext
);
7129 TEST(TestOutOfBuffer5468
);
7130 TEST(TestSameStrengthList
);
7132 TEST(TestSameStrengthListQuoted
);
7133 TEST(TestSameStrengthListSupplemental
);
7134 TEST(TestSameStrengthListQwerty
);
7135 TEST(TestSameStrengthListQuotedQwerty
);
7136 TEST(TestSameStrengthListRanges
);
7137 TEST(TestSameStrengthListSupplementalRanges
);
7138 TEST(TestSpecialCharacters
);
7139 TEST(TestPrivateUseCharacters
);
7140 TEST(TestPrivateUseCharactersInList
);
7141 TEST(TestPrivateUseCharactersInRange
);
7142 TEST(TestInvalidListsAndRanges
);
7143 TEST(TestImportRulesDeWithPhonebook
);
7144 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
7145 /* TEST(TestImportRulesCJKWithUnihan); */
7147 TEST(TestImportWithType
);
7149 TEST(TestBeforeRuleWithScriptReordering
);
7150 TEST(TestNonLeadBytesDuringCollationReordering
);
7151 TEST(TestReorderingAPI
);
7152 TEST(TestReorderingAPIWithRuleCreatedCollator
);
7153 TEST(TestEquivalentReorderingScripts
);
7154 TEST(TestGreekFirstReorder
);
7155 TEST(TestGreekLastReorder
);
7156 TEST(TestNonScriptReorder
);
7157 TEST(TestHaniReorder
);
7158 TEST(TestHaniReorderWithOtherRules
);
7159 TEST(TestMultipleReorder
);
7160 TEST(TestReorderingAcrossCloning
);
7161 /* test for ticket 8814 - disabled until resolved */
7162 /*TEST(TestReorderWithNumericCollation);*/
7164 TEST(TestCaseLevelBufferOverflow
);
7167 #endif /* #if !UCONFIG_NO_COLLATION */